""" Import UpNote 'N:' notes into the Obsidian vault. Classifies by title keywords into existing vault folders. Unknown -> 01 Inbox/. All imports tagged with 'upnote-import' for later triage. Usage: python import_notes.py --dry-run # preview classifications python import_notes.py --test # write to 01 Inbox/_import_test/ python import_notes.py # full import """ from __future__ import annotations import argparse import re import sys # force utf-8 stdout for emoji-laden titles try: sys.stdout.reconfigure(encoding="utf-8") # type: ignore[attr-defined] except Exception: pass from dataclasses import dataclass from pathlib import Path UPNOTE_ROOT = Path( r"C:\Users\d-chrka\AppData\Roaming\UpNote\UpNote Backup" r"\HtgSdi2hYyUfnYq3OZkBwx13H5q2\Markdown\General Space" ) VAULT = Path(r"D:\projects\chrka\brain") INBOX = VAULT / "01 Inbox" TITLE_RE = re.compile(r"^#{1,3}\s*N:\s*(.+?)(?:\s*📑)?\s*$", re.MULTILINE) META_TABLE_RE = re.compile(r"^\|.*\|\s*\n\|[\s\-:|]+\|\s*\n(?:\|.*\|\s*\n)+", re.MULTILINE) HR_RE = re.compile(r"^\*\s*\*\s*\*\s*$", re.MULTILINE) BR_RE = re.compile(r"^\s*\s*$", re.MULTILINE) LP_LINK_RE = re.compile(r"^\s*\[\[LP:[^\]]+\]\]\s*$", re.MULTILINE) # Classifier: (regex on lowercased title, target folder relative to VAULT) # First match wins. Order matters. RULES: list[tuple[str, str]] = [ # --- Versicherungen & Finanzen (Familie) (r"versicherung|haftpflicht|hausrat|rente|rürup|riester|krankenzusatz|berufsunfähig|basisrente|metallrente|direktversicherung|steuererklärung|volkswohlbund|canadalife|union investment|gebäudeversicherung|jagdhaftpflicht|gesetzliche krankenversicherung", "03 Bereiche/Finanzen"), # --- Gesundheit (r"supplemente|schmerzmittel|sportübung|7mind|bike-fitting|fitting|präventionskurs|stressmanagement", "03 Bereiche/Gesundheit"), # --- Politik (UCW/UWG/Fraktion) (r"\bucw\b|\buwg\b|fraktionssitzung|mitgliederversammlung|wählergruppen|wahlabend|pressemitteilung|sitzungsvorbereitung|plakatierung|haushalt 20|feuerwehr drolshagen|uwg meeting|terma", "04 Ressourcen/Politik"), # --- Heimnetz & Home Assistant (privat) (r"home-?assistant|home-?assistent|müllkalender|zigbee|haproxy|opnsense|adguard|nginx ?proxy|\bnpm\b|multicast dns|macvlan|vlan|sophos|openSence|opensence|heimnetz|reverse-proxy|mqtt|teleport|admin-netz|netzwerk-interface|acme|full cert chain|straso|koogle", "03 Bereiche/Heimnetz & Home Assistant"), # --- Projekte (abgeschlossen/Urlaube/Feiern -> Archiv) (r"bretagne|slowenien|italien|kegeltour|ostpreußen|hausbooturlaub|südholland|planung geburtstag|partyplanung|urlaubsplanung|wahlabend ucw", "06 Archiv"), # --- KIT / IT-Management Arbeit (r"tisax|pc-migration|berechtigungskonzept|personalgespräch|personalentwicklung|jour fixe|witec|key-user|hydra|mes-support|desktop central|krah-app|verlagerung|docuware", "03 Bereiche/KIT"), # --- SAP (r"sap-transaktionen|sap |\bspn\b|kerberos", "03 Bereiche/SAP"), # --- IT-Ressourcen (How-Tos, technische Notes) (r"jenkins|docker|grafana|paperless|immich|gitea|blazor|resharper|jetbrains|wsl|powershell|sql-transaktion|sql-server|appflowy|ec2|claude|prompt|prompot|os-ticket|reject tickets|custom field|autoassign|subscribe longrunning|ssl-zertifikat|hetty|http-interception|dashboard für pv|netzwerkverbindung|fast typing|getting started|notes-organisation|second brain|meta-framework|mail automizer", "04 Ressourcen/IT"), # --- Psychologie / Persönlich (r"persönlichkeitstest|die 3 ks", "04 Ressourcen/Psychologie"), # --- Buchhagen / Haus (r"\bpool\b|teich|klärgrube|zapfanlage|pv-anlage|pv anlage|wechselrichter|stromzähler|dashboard für pv", "03 Bereiche/Familie"), # --- divers Ressourcen (Ideen, Listen) (r"bücher|lesestoff|geschenkideen|ideen essen|kindernamen|fahrradtouren|fahrrdtouren|hausrat hetty", "04 Ressourcen/divers"), # --- Rechtliches / Buchungen (r"agb|geschäftsbedingungen|buchungsbestätigung|chaterbedingungen", "04 Ressourcen/divers"), # --- Vorlagen / Templates (r"@@titel@@", "04 Ressourcen/divers"), ] @dataclass class Note: uuid: str title: str body: str target_rel: str # folder relative to VAULT def slugify(title: str) -> str: # keep German chars; remove filesystem-hostile chars t = title.strip() t = re.sub(r"[\\/:*?\"<>|]", "-", t) t = re.sub(r"\s+", " ", t) return t[:120].strip(" .-") def classify(title: str) -> str: low = title.lower() for pat, folder in RULES: if re.search(pat, low): return folder return "01 Inbox" def parse_note(src: Path) -> Note | None: text = src.read_text(encoding="utf-8") m = TITLE_RE.search(text) if not m: return None title = m.group(1).strip() # strip surrounding markdown bold/italic from title title = re.sub(r"^\*+|\*+$", "", title).strip() rest = text[m.end():] rest = META_TABLE_RE.sub("", rest, count=1) rest = HR_RE.sub("", rest, count=1) rest = LP_LINK_RE.sub("", rest) rest = BR_RE.sub("", rest) rest = re.sub(r"\n{3,}", "\n\n", rest).strip() if not rest or len(rest) < 3: return None target = classify(title) return Note(uuid=src.stem, title=title, body=rest, target_rel=target) def render(note: Note) -> str: return ( f"---\n" f"tags:\n - upnote-import\n" f"---\n\n" f"# {note.title}\n\n" f"{note.body}\n" ) def collect_source_files(trash: bool = False) -> list[Path]: """Root .md files that start with N: title; optionally include trash.""" out = [] dirs = [UPNOTE_ROOT] if trash: dirs = [UPNOTE_ROOT / "trash"] for d in dirs: for p in d.glob("*.md"): try: head = p.read_text(encoding="utf-8", errors="ignore")[:200] except Exception: continue if re.match(r"^#{1,3}\s*N:\s", head): out.append(p) return out def existing_titles_in_vault() -> set[str]: """Lowercased filename stems of all .md files in vault (excludes _import_test).""" s = set() for p in VAULT.rglob("*.md"): if ".obsidian" in p.parts or "node_modules" in p.parts or "_import_test" in p.parts: continue s.add(p.stem.lower()) return s def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--dry-run", action="store_true") ap.add_argument("--test", action="store_true") ap.add_argument("--trash", action="store_true", help="source from trash/, skip titles already in vault") ap.add_argument("--limit", type=int, default=0) args = ap.parse_args() existing = existing_titles_in_vault() if args.trash else set() notes: list[Note] = [] skipped_empty = 0 skipped_dupe = 0 for src in collect_source_files(trash=args.trash): n = parse_note(src) if n is None: skipped_empty += 1 continue if args.trash and slugify(n.title).lower() in existing: skipped_dupe += 1 print(f" skip (already in vault): {n.title}") continue notes.append(n) notes.sort(key=lambda n: (n.target_rel, n.title.lower())) if args.limit: notes = notes[: args.limit] # stats counts: dict[str, int] = {} for n in notes: counts[n.target_rel] = counts.get(n.target_rel, 0) + 1 print(f"Parsed notes: {len(notes)} (skipped empty: {skipped_empty})") print("Classification:") for k in sorted(counts, key=lambda x: (-counts[x], x)): print(f" {counts[k]:3d} {k}") if args.dry_run: print("\n--- Dry-run detail ---") for n in notes: print(f" [{n.target_rel}] {n.title}") return 0 written = skipped_exists = 0 test_prefix = "_import_test/" if args.test else "" for n in notes: out_dir = VAULT / (test_prefix + n.target_rel) if args.test else VAULT / n.target_rel out_dir.mkdir(parents=True, exist_ok=True) fname = slugify(n.title) + ".md" target = out_dir / fname if target.exists(): # avoid overwriting existing files; disambiguate with UUID target = out_dir / f"{slugify(n.title)} ({n.uuid[:8]}).md" if target.exists(): skipped_exists += 1 continue target.write_text(render(n), encoding="utf-8") written += 1 print(f"\nWritten: {written}, skipped (exists): {skipped_exists}") return 0 if __name__ == "__main__": sys.exit(main())