"""Regression tests that pin down our processor contract via mocked LLM output. These tests don't call a real model. They simulate what a *correct* model would return for each input, and ensure our schema + wiring accepts it. If we later change the prompt or schema, this test surfaces silent regressions. """ import json from pathlib import Path import yaml import httpx import respx from journal_bot.processor_lmstudio import LMStudioProcessor from journal_bot.processor_protocol import ProcessorInput FIXTURE = Path(__file__).parent / "fixtures" / "prompt_regression" / "inputs.yaml" def _golden_response(case: dict) -> dict: """Construct the JSON a well-behaved model would return for the input case.""" today = case["today"] target_date = case.get("expected", {}).get("target_date", today) text = case["text"] clarifications: list[str] = [] raw_excluded: list[str] = [] entry = f"## {case['received_time']}\n{text}" if "Schreib ins Journal" in text: raw_excluded.append("Schreib ins Journal, dass") entry = f"## {case['received_time']}\nIch habe gut geschlafen" if case["name"] == "ambiguous_person_clarification": clarifications.append("Welcher Steffen?") entry = f"## {case['received_time']}\nTreffen mit Steffen besprochen" if case["name"] == "gestern_resolves_to_yesterday": entry = f"## {case['received_time']}\nParty gefeiert" return { "target_date": target_date, "target_path": f"05 Daily Notes/{target_date}.md", "entry_markdown": entry, "clarifications": clarifications, "raw_excluded": raw_excluded, } def _load_cases(): return yaml.safe_load(FIXTURE.read_text(encoding="utf-8")) @respx.mock def test_prompt_regression_cases(): cases = _load_cases() processor = LMStudioProcessor( base_url="http://localhost:1234/v1", model="qwen/qwen3-vl-8b", system_prompt="SYS", ) for case in cases: respx.post("http://localhost:1234/v1/chat/completions").mock( return_value=httpx.Response(200, json={ "choices": [{"message": {"content": json.dumps(_golden_response(case))}}] }) ) payload = ProcessorInput( today=case["today"], weekday="Sonntag", received_time=case["received_time"], persons=case.get("persons", []), projects=[], text=case["text"], ) out = processor.process(payload) exp = case.get("expected", {}) if "target_date" in exp: assert out.target_date == exp["target_date"], case["name"] if exp.get("raw_excluded_contains"): assert any(exp["raw_excluded_contains"] in r for r in out.raw_excluded), case["name"] if exp.get("entry_excludes"): assert exp["entry_excludes"] not in out.entry_markdown, case["name"] if exp.get("clarifications_nonempty"): assert len(out.clarifications) > 0, case["name"] respx.reset()