brain/scripts/import_inventar.py

"""
Import UpNote I: (inventar) notes into 03 Bereiche/Inventar/.

Title: '# I: Itemname 🗄️'
Extract standard property table rows into YAML frontmatter.
Preserve content below property table (e.g. '## weitere Details') as body.
Skip @@TITEL@@ templates. Tag: inventar, upnote-import.
Use --trash to pull from UpNote trash.
"""

from __future__ import annotations

import argparse
import re
import sys
from dataclasses import dataclass, field
from pathlib import Path

try:
    sys.stdout.reconfigure(encoding="utf-8")  # type: ignore[attr-defined]
except Exception:
    pass

UPNOTE_ROOT = Path(
    r"C:\Users\d-chrka\AppData\Roaming\UpNote\UpNote Backup"
    r"\HtgSdi2hYyUfnYq3OZkBwx13H5q2\Markdown\General Space"
)
VAULT = Path(r"D:\projects\chrka\brain")
INVENTAR = VAULT / "03 Bereiche" / "Inventar"

TITLE_RE = re.compile(r"^#{1,3}\s*I:\s*(.+?)(?:\s*🗄️?)?\s*$", re.MULTILINE)
# first tiny meta table (#dtInventar)
META_TABLE_RE = re.compile(r"^\|.*\|\s*\n\|[\s\-:|]+\|\s*\n(?:\|.*\|\s*\n)+", re.MULTILINE)
HR_RE = re.compile(r"^\*\s*\*\s*\*\s*$", re.MULTILINE)
BR_LINE_RE = re.compile(r"^\s*<br\s*/?>\s*$", re.MULTILINE)

# Standard inventory property keys (order preserved in frontmatter output)
PROP_KEYS = [
    "Typ", "Ort", "Details", "Firma", "Zuordnung",
    "Modell", "Seriennummer", "Hersteller",
    "Preis", "Anzahl", "Kaufdatum",
    "Rechnung", "Handbuch", "Tests / Shop", "Garantie",
]
# normalized -> canonical
PROP_KEY_NORM = {k.lower().replace(" ", "").replace("/", ""): k for k in PROP_KEYS}

# YAML-safe key transform
YAML_KEY = {
    "Typ": "typ",
    "Ort": "ort",
    "Details": "details",
    "Firma": "firma",
    "Zuordnung": "zuordnung",
    "Modell": "modell",
    "Seriennummer": "seriennummer",
    "Hersteller": "hersteller",
    "Preis": "preis",
    "Anzahl": "anzahl",
    "Kaufdatum": "kaufdatum",
    "Rechnung": "rechnung",
    "Handbuch": "handbuch",
    "Tests / Shop": "tests_shop",
    "Garantie": "garantie",
}


@dataclass
class Item:
    uuid: str
    title: str
    props: dict[str, str] = field(default_factory=dict)
    body: str = ""


def clean_cell(v: str) -> str:
    v = v.replace("<br>", "").replace("<br/>", "").replace("<br />", "")
    return v.strip()


def extract_props(table: str) -> dict[str, str]:
    """Parse 2-col property table. Returns dict of canonical key -> value (non-empty)."""
    out: dict[str, str] = {}
    for line in table.splitlines():
        if not line.startswith("|"):
            continue
        cells = [c.strip() for c in line.strip().strip("|").split("|")]
        if len(cells) < 2:
            continue
        # skip header and separator rows
        if set(cells[0]) <= set("-: ") and set(cells[1]) <= set("-: "):
            continue
        key_norm = cells[0].lower().replace(" ", "").replace("/", "")
        if key_norm not in PROP_KEY_NORM:
            continue
        canonical = PROP_KEY_NORM[key_norm]
        value = clean_cell(cells[1])
        if value:
            out[canonical] = value
    return out


def yaml_escape(v: str) -> str:
    """Quote if value has YAML-special chars."""
    if re.search(r'[:\[\]{}#&*!|>\'"%@`,]', v) or v.startswith("-") or "\n" in v:
        # use double-quoted, escape backslash + double quote
        return '"' + v.replace("\\", "\\\\").replace('"', '\\"') + '"'
    return v


def parse(src: Path) -> Item | None:
    text = src.read_text(encoding="utf-8")
    m = TITLE_RE.search(text)
    if not m:
        return None
    title = m.group(1).strip()
    if "@@" in title or "{{" in title:
        return None
    # normalize filesystem-hostile chars
    safe_title = re.sub(r"[\\/:*?\"<>|]", "-", title)
    safe_title = re.sub(r"\s+", " ", safe_title).strip()

    rest = text[m.end():]

    # drop #dtInventar marker (plain line or inside small 1-row table)
    # first variant: small table containing only <br> + #dtInventar
    def _is_meta_marker(tbl: str) -> bool:
        return "#dtInventar" in tbl and "Typ" not in tbl and "Modell" not in tbl

    # consume up to one meta-marker table
    tm = META_TABLE_RE.search(rest)
    if tm and _is_meta_marker(tm.group(0)):
        rest = rest[:tm.start()] + rest[tm.end():]
    # also drop bare '#dtInventar' lines
    rest = re.sub(r"^\s*#dtInventar\s*$", "", rest, flags=re.MULTILINE)
    # drop first HR
    rest = HR_RE.sub("", rest, count=1)

    # find main property table
    props: dict[str, str] = {}
    prop_match = META_TABLE_RE.search(rest)
    if prop_match:
        props = extract_props(prop_match.group(0))
        rest = rest[:prop_match.start()] + rest[prop_match.end():]

    # clean body
    rest = BR_LINE_RE.sub("", rest)
    rest = re.sub(r"\n{3,}", "\n\n", rest).strip()

    return Item(uuid=src.stem, title=safe_title, props=props, body=rest)


def render(item: Item) -> str:
    fm = ["---", "tags:", "  - inventar", "  - upnote-import"]
    for k in PROP_KEYS:
        if k in item.props:
            fm.append(f"{YAML_KEY[k]}: {yaml_escape(item.props[k])}")
    fm.append("---")
    header = "\n".join(fm)
    body = item.body if item.body else ""
    return f"{header}\n\n# {item.title}\n\n{body}\n".rstrip() + "\n"


def append_block(existing: str, item: Item) -> str:
    lines = ["\n\n## UpNote-Import\n"]
    if item.props:
        for k in PROP_KEYS:
            if k in item.props:
                lines.append(f"- **{k}:** {item.props[k]}")
        lines.append("")
    if item.body:
        lines.append(item.body)
    return existing.rstrip() + "\n".join(lines) + "\n"


def collect(trash: bool) -> list[Path]:
    d = UPNOTE_ROOT / "trash" if trash else UPNOTE_ROOT
    out = []
    for p in d.glob("*.md"):
        try:
            head = p.read_text(encoding="utf-8", errors="ignore")[:200]
        except Exception:
            continue
        if re.match(r"^#{1,3}\s*I:\s", head):
            out.append(p)
    return out


def main() -> int:
    ap = argparse.ArgumentParser()
    ap.add_argument("--dry-run", action="store_true")
    ap.add_argument("--trash", action="store_true")
    args = ap.parse_args()

    INVENTAR.mkdir(parents=True, exist_ok=True)

    items: list[Item] = []
    skipped = 0
    for src in collect(trash=args.trash):
        it = parse(src)
        if it is None:
            skipped += 1
            continue
        items.append(it)

    items.sort(key=lambda x: x.title.lower())

    print(f"Parsed: {len(items)} inventar items (skipped template/empty: {skipped})")
    if args.dry_run:
        for it in items:
            pk = ", ".join(k for k in PROP_KEYS if k in it.props) or "(no props)"
            print(f"  {it.title}  [{pk}]")
        return 0

    written = merged = 0
    for it in items:
        target = INVENTAR / f"{it.title}.md"
        if target.exists():
            target.write_text(
                append_block(target.read_text(encoding="utf-8"), it),
                encoding="utf-8",
            )
            merged += 1
        else:
            target.write_text(render(it), encoding="utf-8")
            written += 1

    print(f"Written: {written}, merged: {merged}")
    return 0


if __name__ == "__main__":
    sys.exit(main())