brain/scripts/import_meetings.py

177 lines
5.3 KiB
Python

"""
Import UpNote M: meetings into 03 Bereiche/Meetings/.
Title format: 'M: YYYY-MM-DD[:] Titel 📅'
Target: '03 Bereiche/Meetings/YYYY-MM-DD Titel.md'
Merge on conflict (append). Skip templates. Tag 'upnote-import'.
Use --trash to pull from UpNote trash.
"""
from __future__ import annotations
import argparse
import re
import sys
from dataclasses import dataclass
from pathlib import Path
try:
sys.stdout.reconfigure(encoding="utf-8") # type: ignore[attr-defined]
except Exception:
pass
UPNOTE_ROOT = Path(
r"C:\Users\d-chrka\AppData\Roaming\UpNote\UpNote Backup"
r"\HtgSdi2hYyUfnYq3OZkBwx13H5q2\Markdown\General Space"
)
VAULT = Path(r"D:\projects\chrka\brain")
MEETINGS = VAULT / "03 Bereiche" / "Meetings"
TITLE_RE = re.compile(
r"^#{1,3}\s*M:\s*(\d{4}-\d{2}-\d{2})[:\s]+(.+?)(?:\s*📅)?\s*$",
re.MULTILINE,
)
META_TABLE_RE = re.compile(r"^\|.*\|\s*\n\|[\s\-:|]+\|\s*\n(?:\|.*\|\s*\n)+", re.MULTILINE)
HR_RE = re.compile(r"^\*\s*\*\s*\*\s*$", re.MULTILINE)
BR_RE = re.compile(r"^\s*<br\s*/?>\s*$", re.MULTILINE)
PERSON_INLINE_RE = re.compile(r"\[\[([^\[\]|]+?)\s*👤\s*\]\]")
LP_INLINE_RE = re.compile(r"\[\[LP:\s*(.+?)\s*🗺️?\s*\]\]")
M_INLINE_RE = re.compile(r"\[\[M:\s*(.+?)\s*📅?\s*\]\]")
N_INLINE_RE = re.compile(r"\[\[N:\s*(.+?)\s*📑?\s*\]\]")
# UpNote section headers
SEC_TEILNEHMER = re.compile(r"^#{1,3}.*Teilnehmer.*$", re.MULTILINE)
SEC_THEMA = re.compile(r"^#{1,3}.*Thema.*$", re.MULTILINE)
SEC_NOTIZEN = re.compile(r"^#{1,3}.*Notizen.*$", re.MULTILINE)
SEC_DATUM = re.compile(r"^#{1,3}.*Datum.*$", re.MULTILINE)
@dataclass
class Meeting:
uuid: str
date: str
title: str
body: str # cleaned, post-processed
def clean(rest: str) -> str:
rest = META_TABLE_RE.sub("", rest, count=1)
rest = HR_RE.sub("", rest, count=1)
rest = BR_RE.sub("", rest)
# rewrite wikilinks
rest = PERSON_INLINE_RE.sub(lambda m: f"[[{m.group(1).strip()}]]", rest)
rest = LP_INLINE_RE.sub(lambda m: f"[[{m.group(1).strip()}]]", rest)
rest = M_INLINE_RE.sub(lambda m: f"[[{m.group(1).strip()}]]", rest)
rest = N_INLINE_RE.sub(lambda m: f"[[{m.group(1).strip()}]]", rest)
# strip empty '>' quotes left from Thema placeholder
rest = re.sub(r"^>\s*$", "", rest, flags=re.MULTILINE)
rest = re.sub(r"\n{3,}", "\n\n", rest).strip()
return rest
def has_content(body: str) -> bool:
"""Check if meeting has any real content beyond empty section headers and bullets."""
# strip all headings and bullets, see if anything meaningful remains
stripped = re.sub(r"^#{1,4}.*$", "", body, flags=re.MULTILINE)
stripped = re.sub(r"^\s*-\s*$", "", stripped, flags=re.MULTILINE)
stripped = re.sub(r"\s+", "", stripped)
return len(stripped) > 10
def parse(src: Path) -> Meeting | None:
text = src.read_text(encoding="utf-8")
m = TITLE_RE.search(text)
if not m:
return None
date = m.group(1)
title = m.group(2).strip()
# strip md bold/italic
title = re.sub(r"^\*+|\*+$", "", title).strip()
# unwrap markdown links: [text](url) -> text
title = re.sub(r"\[([^\]]+)\]\([^\)]+\)", r"\1", title)
# skip templates
if "@@" in title or "{{" in title:
return None
# normalize filesystem-hostile chars
title = re.sub(r"[\\/:*?\"<>|]", "-", title)
title = re.sub(r"\s+", " ", title).strip()
body = clean(text[m.end():])
if not has_content(body):
return None
return Meeting(uuid=src.stem, date=date, title=title, body=body)
def render(mtg: Meeting) -> str:
return (
f"---\n"
f"tags:\n - meeting\n - upnote-import\n"
f"date: {mtg.date}\n"
f"---\n\n"
f"# {mtg.date}{mtg.title}\n\n"
f"{mtg.body}\n"
)
def append_block(existing: str, mtg: Meeting) -> str:
block = f"\n\n## UpNote-Import\n\n{mtg.body}\n"
return existing.rstrip() + block
def collect(trash: bool) -> list[Path]:
d = UPNOTE_ROOT / "trash" if trash else UPNOTE_ROOT
out = []
for p in d.glob("*.md"):
try:
head = p.read_text(encoding="utf-8", errors="ignore")[:200]
except Exception:
continue
if re.match(r"^#{1,3}\s*M:\s", head):
out.append(p)
return out
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--dry-run", action="store_true")
ap.add_argument("--trash", action="store_true")
args = ap.parse_args()
MEETINGS.mkdir(parents=True, exist_ok=True)
meetings: list[Meeting] = []
skipped_empty = 0
for src in collect(trash=args.trash):
m = parse(src)
if m is None:
skipped_empty += 1
continue
meetings.append(m)
meetings.sort(key=lambda x: (x.date, x.title.lower()))
print(f"Parsed: {len(meetings)} meetings (skipped empty/template: {skipped_empty})")
if args.dry_run:
for m in meetings:
print(f" {m.date} {m.title}")
return 0
written = merged = 0
for mtg in meetings:
target = MEETINGS / f"{mtg.date} {mtg.title}.md"
if target.exists():
target.write_text(append_block(target.read_text(encoding="utf-8"), mtg), encoding="utf-8")
merged += 1
else:
target.write_text(render(mtg), encoding="utf-8")
written += 1
print(f"Written: {written}, merged: {merged}")
return 0
if __name__ == "__main__":
sys.exit(main())