brain/scripts/import_persons_lps.py

"""
Import UpNote P: (persons) and LP: (landing pages) notes.

Persons -> 00 Kontext/Personen/  (skip if file already exists)
Landing pages -> 08 Landing Pages/  (new folder)

All imports get tag 'upnote-import' for triage.
Wikilinks like [[Name 👤]] are rewritten to [[Name]]; [[LP: Foo 🗺️]] -> [[Foo]].
"""

from __future__ import annotations

import argparse
import re
import sys
from dataclasses import dataclass
from pathlib import Path

try:
    sys.stdout.reconfigure(encoding="utf-8")  # type: ignore[attr-defined]
except Exception:
    pass

UPNOTE_ROOT = Path(
    r"C:\Users\d-chrka\AppData\Roaming\UpNote\UpNote Backup"
    r"\HtgSdi2hYyUfnYq3OZkBwx13H5q2\Markdown\General Space"
)
VAULT = Path(r"D:\projects\chrka\brain")
PERSONEN = VAULT / "00 Kontext" / "Personen"
LPS = VAULT / "08 Landing Pages"

META_TABLE_RE = re.compile(r"^\|.*\|\s*\n\|[\s\-:|]+\|\s*\n(?:\|.*\|\s*\n)+", re.MULTILINE)
HR_RE = re.compile(r"^\*\s*\*\s*\*\s*$", re.MULTILINE)
BR_RE = re.compile(r"^\s*<br\s*/?>\s*$", re.MULTILINE)

TITLE_P_RE = re.compile(r"^#{1,3}\s*P:\s*(.+?)(?:\s*👤)?\s*$", re.MULTILINE)
TITLE_LP_RE = re.compile(r"^#{1,3}\s*LP:\s*(.+?)(?:\s*🗺️?)?\s*$", re.MULTILINE)

# find [[LP: Foo 🗺️]] anywhere
LP_INLINE_RE = re.compile(r"\[\[LP:\s*(.+?)\s*🗺️?\s*\]\]")
PERSON_INLINE_RE = re.compile(r"\[\[([^\[\]|]+?)\s*👤\s*\]\]")


@dataclass
class Item:
    uuid: str
    title: str
    body: str
    kind: str          # 'person' | 'lp'
    category: str | None  # from first LP link on page


def clean_body(rest: str) -> str:
    rest = META_TABLE_RE.sub("", rest, count=1)
    rest = HR_RE.sub("", rest, count=1)
    rest = BR_RE.sub("", rest)
    # rewrite wikilinks
    rest = PERSON_INLINE_RE.sub(lambda m: f"[[{m.group(1).strip()}]]", rest)
    rest = LP_INLINE_RE.sub(lambda m: f"[[{m.group(1).strip()}]]", rest)
    rest = re.sub(r"\n{3,}", "\n\n", rest).strip()
    return rest


def detect_category(full_text: str) -> str | None:
    """First LP link in text (before we strip) gives category hint."""
    m = LP_INLINE_RE.search(full_text)
    if m:
        return m.group(1).strip()
    return None


def parse_person(src: Path) -> Item | None:
    text = src.read_text(encoding="utf-8")
    m = TITLE_P_RE.search(text)
    if not m:
        return None
    title = m.group(1).strip().rstrip("📑 ").strip()
    cat = detect_category(text)
    body = clean_body(text[m.end():])
    if len(body) < 3:
        # still import empty-ish persons (might be stub), but mark
        body = "_Stub-Import aus UpNote. Inhalte ergänzen._"
    return Item(uuid=src.stem, title=title, body=body, kind="person", category=cat)


def parse_lp(src: Path) -> Item | None:
    text = src.read_text(encoding="utf-8")
    m = TITLE_LP_RE.search(text)
    if not m:
        return None
    title = m.group(1).strip().rstrip("📑 ").strip()
    body = clean_body(text[m.end():])
    if len(body) < 3:
        return None
    return Item(uuid=src.stem, title=title, body=body, kind="lp", category=None)


def render(item: Item) -> str:
    if item.kind == "person":
        fm = ["---", "tags:", "  - person", "  - upnote-import"]
        if item.category:
            fm.append(f"kategorie: {item.category}")
        fm.append("---")
        return "\n".join(fm) + f"\n\n# {item.title}\n\n{item.body}\n"
    else:
        fm = ["---", "tags:", "  - landing-page", "  - upnote-import", "---"]
        return "\n".join(fm) + f"\n\n# {item.title}\n\n{item.body}\n"


def existing_person_stems() -> set[str]:
    if not PERSONEN.exists():
        return set()
    return {p.stem.lower() for p in PERSONEN.glob("*.md")}


def main() -> int:
    ap = argparse.ArgumentParser()
    ap.add_argument("--dry-run", action="store_true")
    args = ap.parse_args()

    PERSONEN.mkdir(parents=True, exist_ok=True)
    LPS.mkdir(parents=True, exist_ok=True)

    existing_p = existing_person_stems()

    p_new: list[Item] = []
    p_skip: list[str] = []
    lps: list[Item] = []

    for src in UPNOTE_ROOT.glob("*.md"):
        head = src.read_text(encoding="utf-8", errors="ignore")[:200]
        if re.match(r"^#{1,3}\s*P:\s", head):
            item = parse_person(src)
            if not item:
                continue
            if "@@" in item.title:
                continue  # skip UpNote templates
            if item.title.lower() in existing_p:
                p_skip.append(item.title)
                continue
            p_new.append(item)
        elif re.match(r"^#{1,3}\s*LP:\s", head):
            item = parse_lp(src)
            if item:
                lps.append(item)

    print(f"Persons: {len(p_new)} new, {len(p_skip)} skipped (already in vault)")
    for t in p_skip:
        print(f"  skip: {t}")
    print(f"Landing pages: {len(lps)}")

    if args.dry_run:
        print("\n--- would write ---")
        for i in p_new:
            print(f"  [person]  {i.title}  (kategorie={i.category})")
        for i in lps:
            print(f"  [lp]      {i.title}")
        return 0

    written = 0
    for i in p_new:
        target = PERSONEN / f"{i.title}.md"
        if target.exists():
            continue  # belt + suspenders
        target.write_text(render(i), encoding="utf-8")
        written += 1
    for i in lps:
        # slugify minimal
        safe = re.sub(r"[\\/:*?\"<>|]", "-", i.title)
        target = LPS / f"{safe}.md"
        target.write_text(render(i), encoding="utf-8")
        written += 1
    print(f"\nWritten: {written}")
    return 0


if __name__ == "__main__":
    sys.exit(main())