brain/scripts/import_journal.py

"""
Import UpNote journal entries into Obsidian daily notes.

Source: UpNote notebook folder with .md.lnk shortcuts -> UUID.md in root.
Target: 05 Daily Notes/YYYY-MM-DD.md with frontmatter.

Usage:
  python import_journal.py --dry-run          # preview only
  python import_journal.py --test             # write to _import_test/ (staging)
  python import_journal.py                    # full import (merges on conflict)
"""

from __future__ import annotations

import argparse
import re
import sys
from dataclasses import dataclass
from pathlib import Path

# --- Config -------------------------------------------------------------

UPNOTE_ROOT = Path(
    r"C:\Users\d-chrka\AppData\Roaming\UpNote\UpNote Backup"
    r"\HtgSdi2hYyUfnYq3OZkBwx13H5q2\Markdown\General Space"
)
JOURNAL_NB = UPNOTE_ROOT / "notebooks" / "Bereiche (A)" / "Privat" / "Persönlich" / "Journal"
VAULT = Path(r"D:\projects\chrka\brain")
DAILY = VAULT / "05 Daily Notes"
STAGING = DAILY / "_import_test"

MONTHS = {
    "Jan": 1, "Feb": 2, "Mär": 3, "Mrz": 3, "Apr": 4, "Mai": 5, "Jun": 6,
    "Jul": 7, "Aug": 8, "Sep": 9, "Sept": 9, "Okt": 10, "Nov": 11, "Dez": 12,
}

TITLE_RE = re.compile(r"^#{1,3}\s*Jo:\s*(\d{1,2})\.\s*([A-Za-zäö]+)\s*📘", re.MULTILINE)
YEAR_RE = re.compile(r"^#{1,3}\s*(\d{4})\s*-\s*(\w+)", re.MULTILINE)
META_TABLE_RE = re.compile(r"^\|.*\|\s*\n\|[\s\-:|]+\|\s*\n(?:\|.*\|\s*\n)+", re.MULTILINE)
HR_RE = re.compile(r"^\*\s*\*\s*\*\s*$", re.MULTILINE)
BR_RE = re.compile(r"^\s*<br\s*/?>\s*$", re.MULTILINE)
EMPTY_BULLET_RE = re.compile(r"^\s*-\s*$", re.MULTILINE)
JO_LINK_RE = re.compile(r"\[\[Jo:\s*(\d{1,2})\.\s*([A-Za-zäö]+)\s*📘\]\]")
JO_MONTH_LINK_RE = re.compile(r"\[\[Jo:\s*([A-Za-zäö]+)\s*📘\]\]")


# --- Parsing ------------------------------------------------------------

@dataclass
class Entry:
    uuid: str
    date: str            # YYYY-MM-DD
    weekday: str         # e.g. Freitag
    year: int
    month: int
    day: int
    body: str            # cleaned bullet content

    @property
    def month_str(self) -> str:
        return f"{self.year:04d}-{self.month:02d}"


def parse_entry(src: Path) -> Entry | None:
    text = src.read_text(encoding="utf-8")

    # extract title (day + month-abbrev)
    m_title = TITLE_RE.search(text)
    if not m_title:
        return None
    day = int(m_title.group(1))
    mon_ab = m_title.group(2).rstrip(".")
    if mon_ab not in MONTHS:
        print(f"  ! unknown month '{mon_ab}' in {src.name}", file=sys.stderr)
        return None
    month = MONTHS[mon_ab]

    # body starts after the meta table / hr
    body_start = m_title.end()
    rest = text[body_start:]

    # strip meta table
    rest = META_TABLE_RE.sub("", rest, count=1)
    # strip leading hr
    rest = HR_RE.sub("", rest, count=1)

    # find year heading
    m_year = YEAR_RE.search(rest)
    if not m_year:
        return None
    year = int(m_year.group(1))
    weekday = m_year.group(2)

    content = rest[m_year.end():]

    # clean
    content = BR_RE.sub("", content)
    content = EMPTY_BULLET_RE.sub("", content)
    # collapse 3+ blank lines
    content = re.sub(r"\n{3,}", "\n\n", content).strip()

    # rewrite Jo: links -> [[YYYY-MM-DD]] using current entry year
    def _day_link(m: re.Match) -> str:
        d = int(m.group(1))
        mab = m.group(2).rstrip(".")
        mn = MONTHS.get(mab)
        if not mn:
            return m.group(0)
        return f"[[{year:04d}-{mn:02d}-{d:02d}]]"

    def _month_link(m: re.Match) -> str:
        mab = m.group(1).rstrip(".")
        # full German month names
        full = {
            "Januar": 1, "Februar": 2, "März": 3, "April": 4, "Mai": 5,
            "Juni": 6, "Juli": 7, "August": 8, "September": 9,
            "Oktober": 10, "November": 11, "Dezember": 12,
        }
        mn = full.get(mab) or MONTHS.get(mab)
        if not mn:
            return m.group(0)
        return f"[[{year:04d}-{mn:02d}]]"

    content = JO_LINK_RE.sub(_day_link, content)
    content = JO_MONTH_LINK_RE.sub(_month_link, content)

    if not content or content.isspace():
        return None

    # skip if only whitespace/non-content lines remain
    meaningful = [ln for ln in content.splitlines() if ln.strip() and not re.match(r"^[\-\s]*$", ln)]
    if not meaningful:
        return None

    return Entry(
        uuid=src.stem,
        date=f"{year:04d}-{month:02d}-{day:02d}",
        weekday=weekday,
        year=year, month=month, day=day,
        body=content,
    )


# --- Rendering ----------------------------------------------------------

def prev_next_links(entry: Entry) -> tuple[str, str]:
    import datetime as _dt
    d = _dt.date(entry.year, entry.month, entry.day)
    p = d - _dt.timedelta(days=1)
    n = d + _dt.timedelta(days=1)
    return p.isoformat(), n.isoformat()


def render(entry: Entry) -> str:
    prev, nxt = prev_next_links(entry)
    return (
        f"---\n"
        f"date: {entry.date}\n"
        f"tags:\n  - daily\n"
        f"---\n\n"
        f"# {entry.date} — {entry.weekday}\n\n"
        f"[[{prev}|◀ {prev}]] · [[{entry.month_str}|🗓 {entry.month_str}]] · [[{nxt}|{nxt} ▶]]\n\n"
        f"## Was war\n\n"
        f"{entry.body}\n"
    )


# --- Merge --------------------------------------------------------------

def merge_into(existing: str, entry: Entry) -> str:
    """Append body under a dated '## Was war (UpNote-Import)' section."""
    block = f"\n## Was war (UpNote-Import)\n\n{entry.body}\n"
    return existing.rstrip() + "\n" + block


# --- Main ---------------------------------------------------------------

def collect_uuids() -> list[str]:
    uuids = []
    for lnk in JOURNAL_NB.glob("*.md.lnk"):
        uuids.append(lnk.name[:-len(".lnk")])  # UUID.md
    return uuids


def main() -> int:
    ap = argparse.ArgumentParser()
    ap.add_argument("--dry-run", action="store_true")
    ap.add_argument("--test", action="store_true", help="write to staging folder")
    ap.add_argument("--limit", type=int, default=0, help="max entries (0=all)")
    args = ap.parse_args()

    out_dir = STAGING if args.test else DAILY
    if not args.dry_run:
        out_dir.mkdir(parents=True, exist_ok=True)

    entries: list[Entry] = []
    skipped = 0
    for md_name in collect_uuids():
        src = UPNOTE_ROOT / md_name
        if not src.exists():
            print(f"  ! missing source: {md_name}", file=sys.stderr)
            continue
        e = parse_entry(src)
        if e is None:
            skipped += 1
            continue
        entries.append(e)

    entries.sort(key=lambda x: x.date)
    if args.limit:
        entries = entries[: args.limit]

    print(f"Parsed: {len(entries)} entries (skipped empty: {skipped})")
    if entries:
        print(f"Range: {entries[0].date} .. {entries[-1].date}")

    written = merged = 0
    for e in entries:
        target = out_dir / f"{e.date}.md"
        rendered = render(e)
        if args.dry_run:
            print(f"  would write {target.relative_to(VAULT)}  ({len(e.body)} chars)")
            continue
        if target.exists() and not args.test:
            merged_content = merge_into(target.read_text(encoding="utf-8"), e)
            target.write_text(merged_content, encoding="utf-8")
            merged += 1
        else:
            target.write_text(rendered, encoding="utf-8")
            written += 1

    if not args.dry_run:
        print(f"Written: {written}, merged: {merged}, out_dir: {out_dir.relative_to(VAULT)}")
    return 0


if __name__ == "__main__":
    sys.exit(main())