81 lines
3.0 KiB
Python
81 lines
3.0 KiB
Python
"""Regression tests that pin down our processor contract via mocked LLM output.
|
|
|
|
These tests don't call a real model. They simulate what a *correct* model would
|
|
return for each input, and ensure our schema + wiring accepts it. If we later
|
|
change the prompt or schema, this test surfaces silent regressions.
|
|
"""
|
|
import json
|
|
from pathlib import Path
|
|
import yaml
|
|
import httpx
|
|
import respx
|
|
from journal_bot.processor_lmstudio import LMStudioProcessor
|
|
from journal_bot.processor_protocol import ProcessorInput
|
|
|
|
|
|
FIXTURE = Path(__file__).parent / "fixtures" / "prompt_regression" / "inputs.yaml"
|
|
|
|
|
|
def _golden_response(case: dict) -> dict:
|
|
"""Construct the JSON a well-behaved model would return for the input case."""
|
|
today = case["today"]
|
|
target_date = case.get("expected", {}).get("target_date", today)
|
|
text = case["text"]
|
|
clarifications: list[str] = []
|
|
raw_excluded: list[str] = []
|
|
entry = f"## {case['received_time']}\n{text}"
|
|
if "Schreib ins Journal" in text:
|
|
raw_excluded.append("Schreib ins Journal, dass")
|
|
entry = f"## {case['received_time']}\nIch habe gut geschlafen"
|
|
if case["name"] == "ambiguous_person_clarification":
|
|
clarifications.append("Welcher Steffen?")
|
|
entry = f"## {case['received_time']}\nTreffen mit Steffen besprochen"
|
|
if case["name"] == "gestern_resolves_to_yesterday":
|
|
entry = f"## {case['received_time']}\nParty gefeiert"
|
|
return {
|
|
"target_date": target_date,
|
|
"target_path": f"05 Daily Notes/{target_date}.md",
|
|
"entry_markdown": entry,
|
|
"clarifications": clarifications,
|
|
"raw_excluded": raw_excluded,
|
|
}
|
|
|
|
|
|
def _load_cases():
|
|
return yaml.safe_load(FIXTURE.read_text(encoding="utf-8"))
|
|
|
|
|
|
@respx.mock
|
|
def test_prompt_regression_cases():
|
|
cases = _load_cases()
|
|
processor = LMStudioProcessor(
|
|
base_url="http://localhost:1234/v1",
|
|
model="qwen/qwen3-vl-8b",
|
|
system_prompt="SYS",
|
|
)
|
|
for case in cases:
|
|
respx.post("http://localhost:1234/v1/chat/completions").mock(
|
|
return_value=httpx.Response(200, json={
|
|
"choices": [{"message": {"content": json.dumps(_golden_response(case))}}]
|
|
})
|
|
)
|
|
payload = ProcessorInput(
|
|
today=case["today"],
|
|
weekday="Sonntag",
|
|
received_time=case["received_time"],
|
|
persons=case.get("persons", []),
|
|
projects=[],
|
|
text=case["text"],
|
|
)
|
|
out = processor.process(payload)
|
|
exp = case.get("expected", {})
|
|
if "target_date" in exp:
|
|
assert out.target_date == exp["target_date"], case["name"]
|
|
if exp.get("raw_excluded_contains"):
|
|
assert any(exp["raw_excluded_contains"] in r for r in out.raw_excluded), case["name"]
|
|
if exp.get("entry_excludes"):
|
|
assert exp["entry_excludes"] not in out.entry_markdown, case["name"]
|
|
if exp.get("clarifications_nonempty"):
|
|
assert len(out.clarifications) > 0, case["name"]
|
|
respx.reset()
|