brain/scripts/import_journal.py

239 lines
7.4 KiB
Python

"""
Import UpNote journal entries into Obsidian daily notes.
Source: UpNote notebook folder with .md.lnk shortcuts -> UUID.md in root.
Target: 05 Daily Notes/YYYY-MM-DD.md with frontmatter.
Usage:
python import_journal.py --dry-run # preview only
python import_journal.py --test # write to _import_test/ (staging)
python import_journal.py # full import (merges on conflict)
"""
from __future__ import annotations
import argparse
import re
import sys
from dataclasses import dataclass
from pathlib import Path
# --- Config -------------------------------------------------------------
UPNOTE_ROOT = Path(
r"C:\Users\d-chrka\AppData\Roaming\UpNote\UpNote Backup"
r"\HtgSdi2hYyUfnYq3OZkBwx13H5q2\Markdown\General Space"
)
JOURNAL_NB = UPNOTE_ROOT / "notebooks" / "Bereiche (A)" / "Privat" / "Persönlich" / "Journal"
VAULT = Path(r"D:\projects\chrka\brain")
DAILY = VAULT / "05 Daily Notes"
STAGING = DAILY / "_import_test"
MONTHS = {
"Jan": 1, "Feb": 2, "Mär": 3, "Mrz": 3, "Apr": 4, "Mai": 5, "Jun": 6,
"Jul": 7, "Aug": 8, "Sep": 9, "Sept": 9, "Okt": 10, "Nov": 11, "Dez": 12,
}
TITLE_RE = re.compile(r"^#{1,3}\s*Jo:\s*(\d{1,2})\.\s*([A-Za-zäö]+)\s*📘", re.MULTILINE)
YEAR_RE = re.compile(r"^#{1,3}\s*(\d{4})\s*-\s*(\w+)", re.MULTILINE)
META_TABLE_RE = re.compile(r"^\|.*\|\s*\n\|[\s\-:|]+\|\s*\n(?:\|.*\|\s*\n)+", re.MULTILINE)
HR_RE = re.compile(r"^\*\s*\*\s*\*\s*$", re.MULTILINE)
BR_RE = re.compile(r"^\s*<br\s*/?>\s*$", re.MULTILINE)
EMPTY_BULLET_RE = re.compile(r"^\s*-\s*$", re.MULTILINE)
JO_LINK_RE = re.compile(r"\[\[Jo:\s*(\d{1,2})\.\s*([A-Za-zäö]+)\s*📘\]\]")
JO_MONTH_LINK_RE = re.compile(r"\[\[Jo:\s*([A-Za-zäö]+)\s*📘\]\]")
# --- Parsing ------------------------------------------------------------
@dataclass
class Entry:
uuid: str
date: str # YYYY-MM-DD
weekday: str # e.g. Freitag
year: int
month: int
day: int
body: str # cleaned bullet content
@property
def month_str(self) -> str:
return f"{self.year:04d}-{self.month:02d}"
def parse_entry(src: Path) -> Entry | None:
text = src.read_text(encoding="utf-8")
# extract title (day + month-abbrev)
m_title = TITLE_RE.search(text)
if not m_title:
return None
day = int(m_title.group(1))
mon_ab = m_title.group(2).rstrip(".")
if mon_ab not in MONTHS:
print(f" ! unknown month '{mon_ab}' in {src.name}", file=sys.stderr)
return None
month = MONTHS[mon_ab]
# body starts after the meta table / hr
body_start = m_title.end()
rest = text[body_start:]
# strip meta table
rest = META_TABLE_RE.sub("", rest, count=1)
# strip leading hr
rest = HR_RE.sub("", rest, count=1)
# find year heading
m_year = YEAR_RE.search(rest)
if not m_year:
return None
year = int(m_year.group(1))
weekday = m_year.group(2)
content = rest[m_year.end():]
# clean
content = BR_RE.sub("", content)
content = EMPTY_BULLET_RE.sub("", content)
# collapse 3+ blank lines
content = re.sub(r"\n{3,}", "\n\n", content).strip()
# rewrite Jo: links -> [[YYYY-MM-DD]] using current entry year
def _day_link(m: re.Match) -> str:
d = int(m.group(1))
mab = m.group(2).rstrip(".")
mn = MONTHS.get(mab)
if not mn:
return m.group(0)
return f"[[{year:04d}-{mn:02d}-{d:02d}]]"
def _month_link(m: re.Match) -> str:
mab = m.group(1).rstrip(".")
# full German month names
full = {
"Januar": 1, "Februar": 2, "März": 3, "April": 4, "Mai": 5,
"Juni": 6, "Juli": 7, "August": 8, "September": 9,
"Oktober": 10, "November": 11, "Dezember": 12,
}
mn = full.get(mab) or MONTHS.get(mab)
if not mn:
return m.group(0)
return f"[[{year:04d}-{mn:02d}]]"
content = JO_LINK_RE.sub(_day_link, content)
content = JO_MONTH_LINK_RE.sub(_month_link, content)
if not content or content.isspace():
return None
# skip if only whitespace/non-content lines remain
meaningful = [ln for ln in content.splitlines() if ln.strip() and not re.match(r"^[\-\s]*$", ln)]
if not meaningful:
return None
return Entry(
uuid=src.stem,
date=f"{year:04d}-{month:02d}-{day:02d}",
weekday=weekday,
year=year, month=month, day=day,
body=content,
)
# --- Rendering ----------------------------------------------------------
def prev_next_links(entry: Entry) -> tuple[str, str]:
import datetime as _dt
d = _dt.date(entry.year, entry.month, entry.day)
p = d - _dt.timedelta(days=1)
n = d + _dt.timedelta(days=1)
return p.isoformat(), n.isoformat()
def render(entry: Entry) -> str:
prev, nxt = prev_next_links(entry)
return (
f"---\n"
f"date: {entry.date}\n"
f"tags:\n - daily\n"
f"---\n\n"
f"# {entry.date}{entry.weekday}\n\n"
f"[[{prev}|◀ {prev}]] · [[{entry.month_str}|🗓 {entry.month_str}]] · [[{nxt}|{nxt} ▶]]\n\n"
f"## Was war\n\n"
f"{entry.body}\n"
)
# --- Merge --------------------------------------------------------------
def merge_into(existing: str, entry: Entry) -> str:
"""Append body under a dated '## Was war (UpNote-Import)' section."""
block = f"\n## Was war (UpNote-Import)\n\n{entry.body}\n"
return existing.rstrip() + "\n" + block
# --- Main ---------------------------------------------------------------
def collect_uuids() -> list[str]:
uuids = []
for lnk in JOURNAL_NB.glob("*.md.lnk"):
uuids.append(lnk.name[:-len(".lnk")]) # UUID.md
return uuids
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--dry-run", action="store_true")
ap.add_argument("--test", action="store_true", help="write to staging folder")
ap.add_argument("--limit", type=int, default=0, help="max entries (0=all)")
args = ap.parse_args()
out_dir = STAGING if args.test else DAILY
if not args.dry_run:
out_dir.mkdir(parents=True, exist_ok=True)
entries: list[Entry] = []
skipped = 0
for md_name in collect_uuids():
src = UPNOTE_ROOT / md_name
if not src.exists():
print(f" ! missing source: {md_name}", file=sys.stderr)
continue
e = parse_entry(src)
if e is None:
skipped += 1
continue
entries.append(e)
entries.sort(key=lambda x: x.date)
if args.limit:
entries = entries[: args.limit]
print(f"Parsed: {len(entries)} entries (skipped empty: {skipped})")
if entries:
print(f"Range: {entries[0].date} .. {entries[-1].date}")
written = merged = 0
for e in entries:
target = out_dir / f"{e.date}.md"
rendered = render(e)
if args.dry_run:
print(f" would write {target.relative_to(VAULT)} ({len(e.body)} chars)")
continue
if target.exists() and not args.test:
merged_content = merge_into(target.read_text(encoding="utf-8"), e)
target.write_text(merged_content, encoding="utf-8")
merged += 1
else:
target.write_text(rendered, encoding="utf-8")
written += 1
if not args.dry_run:
print(f"Written: {written}, merged: {merged}, out_dir: {out_dir.relative_to(VAULT)}")
return 0
if __name__ == "__main__":
sys.exit(main())