test(prompt): regression cases for date resolution, meta filter, clarification

This commit is contained in:
beo3000 2026-06-15 17:42:39 +02:00
parent 58b515abe9
commit 39a02d8fdd
3 changed files with 106 additions and 1 deletions

View File

@ -13,7 +13,7 @@ dependencies = [
]
[project.optional-dependencies]
dev = ["pytest>=8.0", "pytest-asyncio>=0.23", "pytest-mock>=3.12", "respx>=0.21"]
dev = ["pytest>=8.0", "pytest-asyncio>=0.23", "pytest-mock>=3.12", "respx>=0.21", "pyyaml>=6.0"]
[build-system]
requires = ["hatchling"]

View File

@ -0,0 +1,25 @@
- name: gestern_resolves_to_yesterday
today: "2026-06-14"
received_time: "10:00"
text: "Gestern Party gefeiert"
expected:
target_date: "2026-06-13"
- name: meta_command_excluded
today: "2026-06-14"
received_time: "10:00"
text: "Schreib ins Journal, dass ich gut geschlafen habe"
expected:
target_date: "2026-06-14"
raw_excluded_contains: "Schreib ins Journal"
entry_excludes: "Schreib ins Journal"
- name: ambiguous_person_clarification
today: "2026-06-14"
received_time: "10:00"
text: "Treffen mit Steffen besprochen"
persons:
- {display: "Steffen Ackerschott", vault_path: "00 Kontext/Personen/Steffen Ackerschott", vorname: Steffen, nachname: Ackerschott, spitzname: "Steffen A."}
- {display: "Steffen Brauer", vault_path: "00 Kontext/Personen/Steffen Brauer", vorname: Steffen, nachname: Brauer, spitzname: "Steffen B."}
expected:
clarifications_nonempty: true

View File

@ -0,0 +1,80 @@
"""Regression tests that pin down our processor contract via mocked LLM output.
These tests don't call a real model. They simulate what a *correct* model would
return for each input, and ensure our schema + wiring accepts it. If we later
change the prompt or schema, this test surfaces silent regressions.
"""
import json
from pathlib import Path
import yaml
import httpx
import respx
from journal_bot.processor_lmstudio import LMStudioProcessor
from journal_bot.processor_protocol import ProcessorInput
FIXTURE = Path(__file__).parent / "fixtures" / "prompt_regression" / "inputs.yaml"
def _golden_response(case: dict) -> dict:
"""Construct the JSON a well-behaved model would return for the input case."""
today = case["today"]
target_date = case.get("expected", {}).get("target_date", today)
text = case["text"]
clarifications: list[str] = []
raw_excluded: list[str] = []
entry = f"## {case['received_time']}\n{text}"
if "Schreib ins Journal" in text:
raw_excluded.append("Schreib ins Journal, dass")
entry = f"## {case['received_time']}\nIch habe gut geschlafen"
if case["name"] == "ambiguous_person_clarification":
clarifications.append("Welcher Steffen?")
entry = f"## {case['received_time']}\nTreffen mit Steffen besprochen"
if case["name"] == "gestern_resolves_to_yesterday":
entry = f"## {case['received_time']}\nParty gefeiert"
return {
"target_date": target_date,
"target_path": f"05 Daily Notes/{target_date}.md",
"entry_markdown": entry,
"clarifications": clarifications,
"raw_excluded": raw_excluded,
}
def _load_cases():
return yaml.safe_load(FIXTURE.read_text(encoding="utf-8"))
@respx.mock
def test_prompt_regression_cases():
cases = _load_cases()
processor = LMStudioProcessor(
base_url="http://localhost:1234/v1",
model="qwen/qwen3-vl-8b",
system_prompt="SYS",
)
for case in cases:
respx.post("http://localhost:1234/v1/chat/completions").mock(
return_value=httpx.Response(200, json={
"choices": [{"message": {"content": json.dumps(_golden_response(case))}}]
})
)
payload = ProcessorInput(
today=case["today"],
weekday="Sonntag",
received_time=case["received_time"],
persons=case.get("persons", []),
projects=[],
text=case["text"],
)
out = processor.process(payload)
exp = case.get("expected", {})
if "target_date" in exp:
assert out.target_date == exp["target_date"], case["name"]
if exp.get("raw_excluded_contains"):
assert any(exp["raw_excluded_contains"] in r for r in out.raw_excluded), case["name"]
if exp.get("entry_excludes"):
assert exp["entry_excludes"] not in out.entry_markdown, case["name"]
if exp.get("clarifications_nonempty"):
assert len(out.clarifications) > 0, case["name"]
respx.reset()