test(prompt): regression cases for date resolution, meta filter, clarification

2026-06-15 17:42:39 +02:00 · 2026-06-15 17:42:39 +02:00 · 39a02d8fdd
parent 58b515abe9
commit 39a02d8fdd
3 changed files with 106 additions and 1 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -13,7 +13,7 @@ dependencies = [
 ]

 [project.optional-dependencies]
-dev = ["pytest>=8.0", "pytest-asyncio>=0.23", "pytest-mock>=3.12", "respx>=0.21"]
+dev = ["pytest>=8.0", "pytest-asyncio>=0.23", "pytest-mock>=3.12", "respx>=0.21", "pyyaml>=6.0"]

 [build-system]
 requires = ["hatchling"]
--- a/tests/fixtures/prompt_regression/inputs.yaml
+++ b/tests/fixtures/prompt_regression/inputs.yaml
@ -0,0 +1,25 @@
+- name: gestern_resolves_to_yesterday
+  today: "2026-06-14"
+  received_time: "10:00"
+  text: "Gestern Party gefeiert"
+  expected:
+    target_date: "2026-06-13"
+
+- name: meta_command_excluded
+  today: "2026-06-14"
+  received_time: "10:00"
+  text: "Schreib ins Journal, dass ich gut geschlafen habe"
+  expected:
+    target_date: "2026-06-14"
+    raw_excluded_contains: "Schreib ins Journal"
+    entry_excludes: "Schreib ins Journal"
+
+- name: ambiguous_person_clarification
+  today: "2026-06-14"
+  received_time: "10:00"
+  text: "Treffen mit Steffen besprochen"
+  persons:
+    - {display: "Steffen Ackerschott", vault_path: "00 Kontext/Personen/Steffen Ackerschott", vorname: Steffen, nachname: Ackerschott, spitzname: "Steffen A."}
+    - {display: "Steffen Brauer", vault_path: "00 Kontext/Personen/Steffen Brauer", vorname: Steffen, nachname: Brauer, spitzname: "Steffen B."}
+  expected:
+    clarifications_nonempty: true
--- a/tests/test_prompt_regression.py
+++ b/tests/test_prompt_regression.py
@ -0,0 +1,80 @@
+"""Regression tests that pin down our processor contract via mocked LLM output.
+
+These tests don't call a real model. They simulate what a *correct* model would
+return for each input, and ensure our schema + wiring accepts it. If we later
+change the prompt or schema, this test surfaces silent regressions.
+"""
+import json
+from pathlib import Path
+import yaml
+import httpx
+import respx
+from journal_bot.processor_lmstudio import LMStudioProcessor
+from journal_bot.processor_protocol import ProcessorInput
+
+
+FIXTURE = Path(__file__).parent / "fixtures" / "prompt_regression" / "inputs.yaml"
+
+
+def _golden_response(case: dict) -> dict:
+    """Construct the JSON a well-behaved model would return for the input case."""
+    today = case["today"]
+    target_date = case.get("expected", {}).get("target_date", today)
+    text = case["text"]
+    clarifications: list[str] = []
+    raw_excluded: list[str] = []
+    entry = f"## {case['received_time']}\n{text}"
+    if "Schreib ins Journal" in text:
+        raw_excluded.append("Schreib ins Journal, dass")
+        entry = f"## {case['received_time']}\nIch habe gut geschlafen"
+    if case["name"] == "ambiguous_person_clarification":
+        clarifications.append("Welcher Steffen?")
+        entry = f"## {case['received_time']}\nTreffen mit Steffen besprochen"
+    if case["name"] == "gestern_resolves_to_yesterday":
+        entry = f"## {case['received_time']}\nParty gefeiert"
+    return {
+        "target_date": target_date,
+        "target_path": f"05 Daily Notes/{target_date}.md",
+        "entry_markdown": entry,
+        "clarifications": clarifications,
+        "raw_excluded": raw_excluded,
+    }
+
+
+def _load_cases():
+    return yaml.safe_load(FIXTURE.read_text(encoding="utf-8"))
+
+
+@respx.mock
+def test_prompt_regression_cases():
+    cases = _load_cases()
+    processor = LMStudioProcessor(
+        base_url="http://localhost:1234/v1",
+        model="qwen/qwen3-vl-8b",
+        system_prompt="SYS",
+    )
+    for case in cases:
+        respx.post("http://localhost:1234/v1/chat/completions").mock(
+            return_value=httpx.Response(200, json={
+                "choices": [{"message": {"content": json.dumps(_golden_response(case))}}]
+            })
+        )
+        payload = ProcessorInput(
+            today=case["today"],
+            weekday="Sonntag",
+            received_time=case["received_time"],
+            persons=case.get("persons", []),
+            projects=[],
+            text=case["text"],
+        )
+        out = processor.process(payload)
+        exp = case.get("expected", {})
+        if "target_date" in exp:
+            assert out.target_date == exp["target_date"], case["name"]
+        if exp.get("raw_excluded_contains"):
+            assert any(exp["raw_excluded_contains"] in r for r in out.raw_excluded), case["name"]
+        if exp.get("entry_excludes"):
+            assert exp["entry_excludes"] not in out.entry_markdown, case["name"]
+        if exp.get("clarifications_nonempty"):
+            assert len(out.clarifications) > 0, case["name"]
+        respx.reset()