246 lines
8.5 KiB
Python
246 lines
8.5 KiB
Python
"""
|
|
Import UpNote 'N:' notes into the Obsidian vault.
|
|
|
|
Classifies by title keywords into existing vault folders.
|
|
Unknown -> 01 Inbox/. All imports tagged with 'upnote-import' for later triage.
|
|
|
|
Usage:
|
|
python import_notes.py --dry-run # preview classifications
|
|
python import_notes.py --test # write to 01 Inbox/_import_test/
|
|
python import_notes.py # full import
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import re
|
|
import sys
|
|
|
|
# force utf-8 stdout for emoji-laden titles
|
|
try:
|
|
sys.stdout.reconfigure(encoding="utf-8") # type: ignore[attr-defined]
|
|
except Exception:
|
|
pass
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
UPNOTE_ROOT = Path(
|
|
r"C:\Users\d-chrka\AppData\Roaming\UpNote\UpNote Backup"
|
|
r"\HtgSdi2hYyUfnYq3OZkBwx13H5q2\Markdown\General Space"
|
|
)
|
|
VAULT = Path(r"D:\projects\chrka\brain")
|
|
INBOX = VAULT / "01 Inbox"
|
|
|
|
TITLE_RE = re.compile(r"^#{1,3}\s*N:\s*(.+?)(?:\s*📑)?\s*$", re.MULTILINE)
|
|
META_TABLE_RE = re.compile(r"^\|.*\|\s*\n\|[\s\-:|]+\|\s*\n(?:\|.*\|\s*\n)+", re.MULTILINE)
|
|
HR_RE = re.compile(r"^\*\s*\*\s*\*\s*$", re.MULTILINE)
|
|
BR_RE = re.compile(r"^\s*<br\s*/?>\s*$", re.MULTILINE)
|
|
LP_LINK_RE = re.compile(r"^\s*\[\[LP:[^\]]+\]\]\s*$", re.MULTILINE)
|
|
|
|
# Classifier: (regex on lowercased title, target folder relative to VAULT)
|
|
# First match wins. Order matters.
|
|
RULES: list[tuple[str, str]] = [
|
|
# --- Versicherungen & Finanzen (Familie)
|
|
(r"versicherung|haftpflicht|hausrat|rente|rürup|riester|krankenzusatz|berufsunfähig|basisrente|metallrente|direktversicherung|steuererklärung|volkswohlbund|canadalife|union investment|gebäudeversicherung|jagdhaftpflicht|gesetzliche krankenversicherung",
|
|
"03 Bereiche/Finanzen"),
|
|
|
|
# --- Gesundheit
|
|
(r"supplemente|schmerzmittel|sportübung|7mind|bike-fitting|fitting|präventionskurs|stressmanagement",
|
|
"03 Bereiche/Gesundheit"),
|
|
|
|
# --- Politik (UCW/UWG/Fraktion)
|
|
(r"\bucw\b|\buwg\b|fraktionssitzung|mitgliederversammlung|wählergruppen|wahlabend|pressemitteilung|sitzungsvorbereitung|plakatierung|haushalt 20|feuerwehr drolshagen|uwg meeting|terma",
|
|
"04 Ressourcen/Politik"),
|
|
|
|
# --- Heimnetz & Home Assistant (privat)
|
|
(r"home-?assistant|home-?assistent|müllkalender|zigbee|haproxy|opnsense|adguard|nginx ?proxy|\bnpm\b|multicast dns|macvlan|vlan|sophos|openSence|opensence|heimnetz|reverse-proxy|mqtt|teleport|admin-netz|netzwerk-interface|acme|full cert chain|straso|koogle",
|
|
"03 Bereiche/Heimnetz & Home Assistant"),
|
|
|
|
# --- Projekte (abgeschlossen/Urlaube/Feiern -> Archiv)
|
|
(r"bretagne|slowenien|italien|kegeltour|ostpreußen|hausbooturlaub|südholland|planung geburtstag|partyplanung|urlaubsplanung|wahlabend ucw",
|
|
"06 Archiv"),
|
|
|
|
# --- KIT / IT-Management Arbeit
|
|
(r"tisax|pc-migration|berechtigungskonzept|personalgespräch|personalentwicklung|jour fixe|witec|key-user|hydra|mes-support|desktop central|krah-app|verlagerung|docuware",
|
|
"03 Bereiche/KIT"),
|
|
|
|
# --- SAP
|
|
(r"sap-transaktionen|sap |\bspn\b|kerberos",
|
|
"03 Bereiche/SAP"),
|
|
|
|
# --- IT-Ressourcen (How-Tos, technische Notes)
|
|
(r"jenkins|docker|grafana|paperless|immich|gitea|blazor|resharper|jetbrains|wsl|powershell|sql-transaktion|sql-server|appflowy|ec2|claude|prompt|prompot|os-ticket|reject tickets|custom field|autoassign|subscribe longrunning|ssl-zertifikat|hetty|http-interception|dashboard für pv|netzwerkverbindung|fast typing|getting started|notes-organisation|second brain|meta-framework|mail automizer",
|
|
"04 Ressourcen/IT"),
|
|
|
|
# --- Psychologie / Persönlich
|
|
(r"persönlichkeitstest|die 3 ks",
|
|
"04 Ressourcen/Psychologie"),
|
|
|
|
# --- Buchhagen / Haus
|
|
(r"\bpool\b|teich|klärgrube|zapfanlage|pv-anlage|pv anlage|wechselrichter|stromzähler|dashboard für pv",
|
|
"03 Bereiche/Familie"),
|
|
|
|
# --- divers Ressourcen (Ideen, Listen)
|
|
(r"bücher|lesestoff|geschenkideen|ideen essen|kindernamen|fahrradtouren|fahrrdtouren|hausrat hetty",
|
|
"04 Ressourcen/divers"),
|
|
|
|
# --- Rechtliches / Buchungen
|
|
(r"agb|geschäftsbedingungen|buchungsbestätigung|chaterbedingungen",
|
|
"04 Ressourcen/divers"),
|
|
|
|
# --- Vorlagen / Templates
|
|
(r"@@titel@@",
|
|
"04 Ressourcen/divers"),
|
|
]
|
|
|
|
|
|
@dataclass
|
|
class Note:
|
|
uuid: str
|
|
title: str
|
|
body: str
|
|
target_rel: str # folder relative to VAULT
|
|
|
|
|
|
def slugify(title: str) -> str:
|
|
# keep German chars; remove filesystem-hostile chars
|
|
t = title.strip()
|
|
t = re.sub(r"[\\/:*?\"<>|]", "-", t)
|
|
t = re.sub(r"\s+", " ", t)
|
|
return t[:120].strip(" .-")
|
|
|
|
|
|
def classify(title: str) -> str:
|
|
low = title.lower()
|
|
for pat, folder in RULES:
|
|
if re.search(pat, low):
|
|
return folder
|
|
return "01 Inbox"
|
|
|
|
|
|
def parse_note(src: Path) -> Note | None:
|
|
text = src.read_text(encoding="utf-8")
|
|
m = TITLE_RE.search(text)
|
|
if not m:
|
|
return None
|
|
title = m.group(1).strip()
|
|
# strip surrounding markdown bold/italic from title
|
|
title = re.sub(r"^\*+|\*+$", "", title).strip()
|
|
|
|
rest = text[m.end():]
|
|
rest = META_TABLE_RE.sub("", rest, count=1)
|
|
rest = HR_RE.sub("", rest, count=1)
|
|
rest = LP_LINK_RE.sub("", rest)
|
|
rest = BR_RE.sub("", rest)
|
|
rest = re.sub(r"\n{3,}", "\n\n", rest).strip()
|
|
|
|
if not rest or len(rest) < 3:
|
|
return None
|
|
|
|
target = classify(title)
|
|
return Note(uuid=src.stem, title=title, body=rest, target_rel=target)
|
|
|
|
|
|
def render(note: Note) -> str:
|
|
return (
|
|
f"---\n"
|
|
f"tags:\n - upnote-import\n"
|
|
f"---\n\n"
|
|
f"# {note.title}\n\n"
|
|
f"{note.body}\n"
|
|
)
|
|
|
|
|
|
def collect_source_files(trash: bool = False) -> list[Path]:
|
|
"""Root .md files that start with N: title; optionally include trash."""
|
|
out = []
|
|
dirs = [UPNOTE_ROOT]
|
|
if trash:
|
|
dirs = [UPNOTE_ROOT / "trash"]
|
|
for d in dirs:
|
|
for p in d.glob("*.md"):
|
|
try:
|
|
head = p.read_text(encoding="utf-8", errors="ignore")[:200]
|
|
except Exception:
|
|
continue
|
|
if re.match(r"^#{1,3}\s*N:\s", head):
|
|
out.append(p)
|
|
return out
|
|
|
|
|
|
def existing_titles_in_vault() -> set[str]:
|
|
"""Lowercased filename stems of all .md files in vault (excludes _import_test)."""
|
|
s = set()
|
|
for p in VAULT.rglob("*.md"):
|
|
if ".obsidian" in p.parts or "node_modules" in p.parts or "_import_test" in p.parts:
|
|
continue
|
|
s.add(p.stem.lower())
|
|
return s
|
|
|
|
|
|
def main() -> int:
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("--dry-run", action="store_true")
|
|
ap.add_argument("--test", action="store_true")
|
|
ap.add_argument("--trash", action="store_true", help="source from trash/, skip titles already in vault")
|
|
ap.add_argument("--limit", type=int, default=0)
|
|
args = ap.parse_args()
|
|
|
|
existing = existing_titles_in_vault() if args.trash else set()
|
|
|
|
notes: list[Note] = []
|
|
skipped_empty = 0
|
|
skipped_dupe = 0
|
|
for src in collect_source_files(trash=args.trash):
|
|
n = parse_note(src)
|
|
if n is None:
|
|
skipped_empty += 1
|
|
continue
|
|
if args.trash and slugify(n.title).lower() in existing:
|
|
skipped_dupe += 1
|
|
print(f" skip (already in vault): {n.title}")
|
|
continue
|
|
notes.append(n)
|
|
|
|
notes.sort(key=lambda n: (n.target_rel, n.title.lower()))
|
|
if args.limit:
|
|
notes = notes[: args.limit]
|
|
|
|
# stats
|
|
counts: dict[str, int] = {}
|
|
for n in notes:
|
|
counts[n.target_rel] = counts.get(n.target_rel, 0) + 1
|
|
print(f"Parsed notes: {len(notes)} (skipped empty: {skipped_empty})")
|
|
print("Classification:")
|
|
for k in sorted(counts, key=lambda x: (-counts[x], x)):
|
|
print(f" {counts[k]:3d} {k}")
|
|
|
|
if args.dry_run:
|
|
print("\n--- Dry-run detail ---")
|
|
for n in notes:
|
|
print(f" [{n.target_rel}] {n.title}")
|
|
return 0
|
|
|
|
written = skipped_exists = 0
|
|
test_prefix = "_import_test/" if args.test else ""
|
|
for n in notes:
|
|
out_dir = VAULT / (test_prefix + n.target_rel) if args.test else VAULT / n.target_rel
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
fname = slugify(n.title) + ".md"
|
|
target = out_dir / fname
|
|
if target.exists():
|
|
# avoid overwriting existing files; disambiguate with UUID
|
|
target = out_dir / f"{slugify(n.title)} ({n.uuid[:8]}).md"
|
|
if target.exists():
|
|
skipped_exists += 1
|
|
continue
|
|
target.write_text(render(n), encoding="utf-8")
|
|
written += 1
|
|
|
|
print(f"\nWritten: {written}, skipped (exists): {skipped_exists}")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|