brain/scripts/import_persons_lps.py

179 lines
5.4 KiB
Python

"""
Import UpNote P: (persons) and LP: (landing pages) notes.
Persons -> 00 Kontext/Personen/ (skip if file already exists)
Landing pages -> 08 Landing Pages/ (new folder)
All imports get tag 'upnote-import' for triage.
Wikilinks like [[Name 👤]] are rewritten to [[Name]]; [[LP: Foo 🗺️]] -> [[Foo]].
"""
from __future__ import annotations
import argparse
import re
import sys
from dataclasses import dataclass
from pathlib import Path
try:
sys.stdout.reconfigure(encoding="utf-8") # type: ignore[attr-defined]
except Exception:
pass
UPNOTE_ROOT = Path(
r"C:\Users\d-chrka\AppData\Roaming\UpNote\UpNote Backup"
r"\HtgSdi2hYyUfnYq3OZkBwx13H5q2\Markdown\General Space"
)
VAULT = Path(r"D:\projects\chrka\brain")
PERSONEN = VAULT / "00 Kontext" / "Personen"
LPS = VAULT / "08 Landing Pages"
META_TABLE_RE = re.compile(r"^\|.*\|\s*\n\|[\s\-:|]+\|\s*\n(?:\|.*\|\s*\n)+", re.MULTILINE)
HR_RE = re.compile(r"^\*\s*\*\s*\*\s*$", re.MULTILINE)
BR_RE = re.compile(r"^\s*<br\s*/?>\s*$", re.MULTILINE)
TITLE_P_RE = re.compile(r"^#{1,3}\s*P:\s*(.+?)(?:\s*👤)?\s*$", re.MULTILINE)
TITLE_LP_RE = re.compile(r"^#{1,3}\s*LP:\s*(.+?)(?:\s*🗺️?)?\s*$", re.MULTILINE)
# find [[LP: Foo 🗺️]] anywhere
LP_INLINE_RE = re.compile(r"\[\[LP:\s*(.+?)\s*🗺️?\s*\]\]")
PERSON_INLINE_RE = re.compile(r"\[\[([^\[\]|]+?)\s*👤\s*\]\]")
@dataclass
class Item:
uuid: str
title: str
body: str
kind: str # 'person' | 'lp'
category: str | None # from first LP link on page
def clean_body(rest: str) -> str:
rest = META_TABLE_RE.sub("", rest, count=1)
rest = HR_RE.sub("", rest, count=1)
rest = BR_RE.sub("", rest)
# rewrite wikilinks
rest = PERSON_INLINE_RE.sub(lambda m: f"[[{m.group(1).strip()}]]", rest)
rest = LP_INLINE_RE.sub(lambda m: f"[[{m.group(1).strip()}]]", rest)
rest = re.sub(r"\n{3,}", "\n\n", rest).strip()
return rest
def detect_category(full_text: str) -> str | None:
"""First LP link in text (before we strip) gives category hint."""
m = LP_INLINE_RE.search(full_text)
if m:
return m.group(1).strip()
return None
def parse_person(src: Path) -> Item | None:
text = src.read_text(encoding="utf-8")
m = TITLE_P_RE.search(text)
if not m:
return None
title = m.group(1).strip().rstrip("📑 ").strip()
cat = detect_category(text)
body = clean_body(text[m.end():])
if len(body) < 3:
# still import empty-ish persons (might be stub), but mark
body = "_Stub-Import aus UpNote. Inhalte ergänzen._"
return Item(uuid=src.stem, title=title, body=body, kind="person", category=cat)
def parse_lp(src: Path) -> Item | None:
text = src.read_text(encoding="utf-8")
m = TITLE_LP_RE.search(text)
if not m:
return None
title = m.group(1).strip().rstrip("📑 ").strip()
body = clean_body(text[m.end():])
if len(body) < 3:
return None
return Item(uuid=src.stem, title=title, body=body, kind="lp", category=None)
def render(item: Item) -> str:
if item.kind == "person":
fm = ["---", "tags:", " - person", " - upnote-import"]
if item.category:
fm.append(f"kategorie: {item.category}")
fm.append("---")
return "\n".join(fm) + f"\n\n# {item.title}\n\n{item.body}\n"
else:
fm = ["---", "tags:", " - landing-page", " - upnote-import", "---"]
return "\n".join(fm) + f"\n\n# {item.title}\n\n{item.body}\n"
def existing_person_stems() -> set[str]:
if not PERSONEN.exists():
return set()
return {p.stem.lower() for p in PERSONEN.glob("*.md")}
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--dry-run", action="store_true")
args = ap.parse_args()
PERSONEN.mkdir(parents=True, exist_ok=True)
LPS.mkdir(parents=True, exist_ok=True)
existing_p = existing_person_stems()
p_new: list[Item] = []
p_skip: list[str] = []
lps: list[Item] = []
for src in UPNOTE_ROOT.glob("*.md"):
head = src.read_text(encoding="utf-8", errors="ignore")[:200]
if re.match(r"^#{1,3}\s*P:\s", head):
item = parse_person(src)
if not item:
continue
if "@@" in item.title:
continue # skip UpNote templates
if item.title.lower() in existing_p:
p_skip.append(item.title)
continue
p_new.append(item)
elif re.match(r"^#{1,3}\s*LP:\s", head):
item = parse_lp(src)
if item:
lps.append(item)
print(f"Persons: {len(p_new)} new, {len(p_skip)} skipped (already in vault)")
for t in p_skip:
print(f" skip: {t}")
print(f"Landing pages: {len(lps)}")
if args.dry_run:
print("\n--- would write ---")
for i in p_new:
print(f" [person] {i.title} (kategorie={i.category})")
for i in lps:
print(f" [lp] {i.title}")
return 0
written = 0
for i in p_new:
target = PERSONEN / f"{i.title}.md"
if target.exists():
continue # belt + suspenders
target.write_text(render(i), encoding="utf-8")
written += 1
for i in lps:
# slugify minimal
safe = re.sub(r"[\\/:*?\"<>|]", "-", i.title)
target = LPS / f"{safe}.md"
target.write_text(render(i), encoding="utf-8")
written += 1
print(f"\nWritten: {written}")
return 0
if __name__ == "__main__":
sys.exit(main())