brain/scripts/import_meetings.py

"""
Import UpNote M: meetings into 03 Bereiche/Meetings/.

Title format: 'M: YYYY-MM-DD[:] Titel 📅'
Target:       '03 Bereiche/Meetings/YYYY-MM-DD Titel.md'

Merge on conflict (append). Skip templates. Tag 'upnote-import'.
Use --trash to pull from UpNote trash.
"""

from __future__ import annotations

import argparse
import re
import sys
from dataclasses import dataclass
from pathlib import Path

try:
    sys.stdout.reconfigure(encoding="utf-8")  # type: ignore[attr-defined]
except Exception:
    pass

UPNOTE_ROOT = Path(
    r"C:\Users\d-chrka\AppData\Roaming\UpNote\UpNote Backup"
    r"\HtgSdi2hYyUfnYq3OZkBwx13H5q2\Markdown\General Space"
)
VAULT = Path(r"D:\projects\chrka\brain")
MEETINGS = VAULT / "03 Bereiche" / "Meetings"

TITLE_RE = re.compile(
    r"^#{1,3}\s*M:\s*(\d{4}-\d{2}-\d{2})[:\s]+(.+?)(?:\s*📅)?\s*$",
    re.MULTILINE,
)
META_TABLE_RE = re.compile(r"^\|.*\|\s*\n\|[\s\-:|]+\|\s*\n(?:\|.*\|\s*\n)+", re.MULTILINE)
HR_RE = re.compile(r"^\*\s*\*\s*\*\s*$", re.MULTILINE)
BR_RE = re.compile(r"^\s*<br\s*/?>\s*$", re.MULTILINE)

PERSON_INLINE_RE = re.compile(r"\[\[([^\[\]|]+?)\s*👤\s*\]\]")
LP_INLINE_RE = re.compile(r"\[\[LP:\s*(.+?)\s*🗺️?\s*\]\]")
M_INLINE_RE = re.compile(r"\[\[M:\s*(.+?)\s*📅?\s*\]\]")
N_INLINE_RE = re.compile(r"\[\[N:\s*(.+?)\s*📑?\s*\]\]")

# UpNote section headers
SEC_TEILNEHMER = re.compile(r"^#{1,3}.*Teilnehmer.*$", re.MULTILINE)
SEC_THEMA = re.compile(r"^#{1,3}.*Thema.*$", re.MULTILINE)
SEC_NOTIZEN = re.compile(r"^#{1,3}.*Notizen.*$", re.MULTILINE)
SEC_DATUM = re.compile(r"^#{1,3}.*Datum.*$", re.MULTILINE)


@dataclass
class Meeting:
    uuid: str
    date: str
    title: str
    body: str   # cleaned, post-processed


def clean(rest: str) -> str:
    rest = META_TABLE_RE.sub("", rest, count=1)
    rest = HR_RE.sub("", rest, count=1)
    rest = BR_RE.sub("", rest)
    # rewrite wikilinks
    rest = PERSON_INLINE_RE.sub(lambda m: f"[[{m.group(1).strip()}]]", rest)
    rest = LP_INLINE_RE.sub(lambda m: f"[[{m.group(1).strip()}]]", rest)
    rest = M_INLINE_RE.sub(lambda m: f"[[{m.group(1).strip()}]]", rest)
    rest = N_INLINE_RE.sub(lambda m: f"[[{m.group(1).strip()}]]", rest)
    # strip empty '>' quotes left from Thema placeholder
    rest = re.sub(r"^>\s*$", "", rest, flags=re.MULTILINE)
    rest = re.sub(r"\n{3,}", "\n\n", rest).strip()
    return rest


def has_content(body: str) -> bool:
    """Check if meeting has any real content beyond empty section headers and bullets."""
    # strip all headings and bullets, see if anything meaningful remains
    stripped = re.sub(r"^#{1,4}.*$", "", body, flags=re.MULTILINE)
    stripped = re.sub(r"^\s*-\s*$", "", stripped, flags=re.MULTILINE)
    stripped = re.sub(r"\s+", "", stripped)
    return len(stripped) > 10


def parse(src: Path) -> Meeting | None:
    text = src.read_text(encoding="utf-8")
    m = TITLE_RE.search(text)
    if not m:
        return None
    date = m.group(1)
    title = m.group(2).strip()
    # strip md bold/italic
    title = re.sub(r"^\*+|\*+$", "", title).strip()
    # unwrap markdown links: [text](url) -> text
    title = re.sub(r"\[([^\]]+)\]\([^\)]+\)", r"\1", title)
    # skip templates
    if "@@" in title or "{{" in title:
        return None
    # normalize filesystem-hostile chars
    title = re.sub(r"[\\/:*?\"<>|]", "-", title)
    title = re.sub(r"\s+", " ", title).strip()

    body = clean(text[m.end():])
    if not has_content(body):
        return None
    return Meeting(uuid=src.stem, date=date, title=title, body=body)


def render(mtg: Meeting) -> str:
    return (
        f"---\n"
        f"tags:\n  - meeting\n  - upnote-import\n"
        f"date: {mtg.date}\n"
        f"---\n\n"
        f"# {mtg.date} — {mtg.title}\n\n"
        f"{mtg.body}\n"
    )


def append_block(existing: str, mtg: Meeting) -> str:
    block = f"\n\n## UpNote-Import\n\n{mtg.body}\n"
    return existing.rstrip() + block


def collect(trash: bool) -> list[Path]:
    d = UPNOTE_ROOT / "trash" if trash else UPNOTE_ROOT
    out = []
    for p in d.glob("*.md"):
        try:
            head = p.read_text(encoding="utf-8", errors="ignore")[:200]
        except Exception:
            continue
        if re.match(r"^#{1,3}\s*M:\s", head):
            out.append(p)
    return out


def main() -> int:
    ap = argparse.ArgumentParser()
    ap.add_argument("--dry-run", action="store_true")
    ap.add_argument("--trash", action="store_true")
    args = ap.parse_args()

    MEETINGS.mkdir(parents=True, exist_ok=True)

    meetings: list[Meeting] = []
    skipped_empty = 0
    for src in collect(trash=args.trash):
        m = parse(src)
        if m is None:
            skipped_empty += 1
            continue
        meetings.append(m)

    meetings.sort(key=lambda x: (x.date, x.title.lower()))

    print(f"Parsed: {len(meetings)} meetings (skipped empty/template: {skipped_empty})")
    if args.dry_run:
        for m in meetings:
            print(f"  {m.date}  {m.title}")
        return 0

    written = merged = 0
    for mtg in meetings:
        target = MEETINGS / f"{mtg.date} {mtg.title}.md"
        if target.exists():
            target.write_text(append_block(target.read_text(encoding="utf-8"), mtg), encoding="utf-8")
            merged += 1
        else:
            target.write_text(render(mtg), encoding="utf-8")
            written += 1

    print(f"Written: {written}, merged: {merged}")
    return 0


if __name__ == "__main__":
    sys.exit(main())