diff --git a/pyproject.toml b/pyproject.toml index 749e797..7065886 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ dependencies = [ ] [project.optional-dependencies] -dev = ["pytest>=8.0", "pytest-asyncio>=0.23", "pytest-mock>=3.12", "respx>=0.21"] +dev = ["pytest>=8.0", "pytest-asyncio>=0.23", "pytest-mock>=3.12", "respx>=0.21", "pyyaml>=6.0"] [build-system] requires = ["hatchling"] diff --git a/tests/fixtures/prompt_regression/inputs.yaml b/tests/fixtures/prompt_regression/inputs.yaml new file mode 100644 index 0000000..fd2300c --- /dev/null +++ b/tests/fixtures/prompt_regression/inputs.yaml @@ -0,0 +1,25 @@ +- name: gestern_resolves_to_yesterday + today: "2026-06-14" + received_time: "10:00" + text: "Gestern Party gefeiert" + expected: + target_date: "2026-06-13" + +- name: meta_command_excluded + today: "2026-06-14" + received_time: "10:00" + text: "Schreib ins Journal, dass ich gut geschlafen habe" + expected: + target_date: "2026-06-14" + raw_excluded_contains: "Schreib ins Journal" + entry_excludes: "Schreib ins Journal" + +- name: ambiguous_person_clarification + today: "2026-06-14" + received_time: "10:00" + text: "Treffen mit Steffen besprochen" + persons: + - {display: "Steffen Ackerschott", vault_path: "00 Kontext/Personen/Steffen Ackerschott", vorname: Steffen, nachname: Ackerschott, spitzname: "Steffen A."} + - {display: "Steffen Brauer", vault_path: "00 Kontext/Personen/Steffen Brauer", vorname: Steffen, nachname: Brauer, spitzname: "Steffen B."} + expected: + clarifications_nonempty: true diff --git a/tests/test_prompt_regression.py b/tests/test_prompt_regression.py new file mode 100644 index 0000000..4436224 --- /dev/null +++ b/tests/test_prompt_regression.py @@ -0,0 +1,80 @@ +"""Regression tests that pin down our processor contract via mocked LLM output. + +These tests don't call a real model. They simulate what a *correct* model would +return for each input, and ensure our schema + wiring accepts it. If we later +change the prompt or schema, this test surfaces silent regressions. +""" +import json +from pathlib import Path +import yaml +import httpx +import respx +from journal_bot.processor_lmstudio import LMStudioProcessor +from journal_bot.processor_protocol import ProcessorInput + + +FIXTURE = Path(__file__).parent / "fixtures" / "prompt_regression" / "inputs.yaml" + + +def _golden_response(case: dict) -> dict: + """Construct the JSON a well-behaved model would return for the input case.""" + today = case["today"] + target_date = case.get("expected", {}).get("target_date", today) + text = case["text"] + clarifications: list[str] = [] + raw_excluded: list[str] = [] + entry = f"## {case['received_time']}\n{text}" + if "Schreib ins Journal" in text: + raw_excluded.append("Schreib ins Journal, dass") + entry = f"## {case['received_time']}\nIch habe gut geschlafen" + if case["name"] == "ambiguous_person_clarification": + clarifications.append("Welcher Steffen?") + entry = f"## {case['received_time']}\nTreffen mit Steffen besprochen" + if case["name"] == "gestern_resolves_to_yesterday": + entry = f"## {case['received_time']}\nParty gefeiert" + return { + "target_date": target_date, + "target_path": f"05 Daily Notes/{target_date}.md", + "entry_markdown": entry, + "clarifications": clarifications, + "raw_excluded": raw_excluded, + } + + +def _load_cases(): + return yaml.safe_load(FIXTURE.read_text(encoding="utf-8")) + + +@respx.mock +def test_prompt_regression_cases(): + cases = _load_cases() + processor = LMStudioProcessor( + base_url="http://localhost:1234/v1", + model="qwen/qwen3-vl-8b", + system_prompt="SYS", + ) + for case in cases: + respx.post("http://localhost:1234/v1/chat/completions").mock( + return_value=httpx.Response(200, json={ + "choices": [{"message": {"content": json.dumps(_golden_response(case))}}] + }) + ) + payload = ProcessorInput( + today=case["today"], + weekday="Sonntag", + received_time=case["received_time"], + persons=case.get("persons", []), + projects=[], + text=case["text"], + ) + out = processor.process(payload) + exp = case.get("expected", {}) + if "target_date" in exp: + assert out.target_date == exp["target_date"], case["name"] + if exp.get("raw_excluded_contains"): + assert any(exp["raw_excluded_contains"] in r for r in out.raw_excluded), case["name"] + if exp.get("entry_excludes"): + assert exp["entry_excludes"] not in out.entry_markdown, case["name"] + if exp.get("clarifications_nonempty"): + assert len(out.clarifications) > 0, case["name"] + respx.reset()