test(prompt): regression cases for date resolution, meta filter, clarification
This commit is contained in:
parent
58b515abe9
commit
39a02d8fdd
|
|
@ -13,7 +13,7 @@ dependencies = [
|
|||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = ["pytest>=8.0", "pytest-asyncio>=0.23", "pytest-mock>=3.12", "respx>=0.21"]
|
||||
dev = ["pytest>=8.0", "pytest-asyncio>=0.23", "pytest-mock>=3.12", "respx>=0.21", "pyyaml>=6.0"]
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,25 @@
|
|||
- name: gestern_resolves_to_yesterday
|
||||
today: "2026-06-14"
|
||||
received_time: "10:00"
|
||||
text: "Gestern Party gefeiert"
|
||||
expected:
|
||||
target_date: "2026-06-13"
|
||||
|
||||
- name: meta_command_excluded
|
||||
today: "2026-06-14"
|
||||
received_time: "10:00"
|
||||
text: "Schreib ins Journal, dass ich gut geschlafen habe"
|
||||
expected:
|
||||
target_date: "2026-06-14"
|
||||
raw_excluded_contains: "Schreib ins Journal"
|
||||
entry_excludes: "Schreib ins Journal"
|
||||
|
||||
- name: ambiguous_person_clarification
|
||||
today: "2026-06-14"
|
||||
received_time: "10:00"
|
||||
text: "Treffen mit Steffen besprochen"
|
||||
persons:
|
||||
- {display: "Steffen Ackerschott", vault_path: "00 Kontext/Personen/Steffen Ackerschott", vorname: Steffen, nachname: Ackerschott, spitzname: "Steffen A."}
|
||||
- {display: "Steffen Brauer", vault_path: "00 Kontext/Personen/Steffen Brauer", vorname: Steffen, nachname: Brauer, spitzname: "Steffen B."}
|
||||
expected:
|
||||
clarifications_nonempty: true
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
"""Regression tests that pin down our processor contract via mocked LLM output.
|
||||
|
||||
These tests don't call a real model. They simulate what a *correct* model would
|
||||
return for each input, and ensure our schema + wiring accepts it. If we later
|
||||
change the prompt or schema, this test surfaces silent regressions.
|
||||
"""
|
||||
import json
|
||||
from pathlib import Path
|
||||
import yaml
|
||||
import httpx
|
||||
import respx
|
||||
from journal_bot.processor_lmstudio import LMStudioProcessor
|
||||
from journal_bot.processor_protocol import ProcessorInput
|
||||
|
||||
|
||||
FIXTURE = Path(__file__).parent / "fixtures" / "prompt_regression" / "inputs.yaml"
|
||||
|
||||
|
||||
def _golden_response(case: dict) -> dict:
|
||||
"""Construct the JSON a well-behaved model would return for the input case."""
|
||||
today = case["today"]
|
||||
target_date = case.get("expected", {}).get("target_date", today)
|
||||
text = case["text"]
|
||||
clarifications: list[str] = []
|
||||
raw_excluded: list[str] = []
|
||||
entry = f"## {case['received_time']}\n{text}"
|
||||
if "Schreib ins Journal" in text:
|
||||
raw_excluded.append("Schreib ins Journal, dass")
|
||||
entry = f"## {case['received_time']}\nIch habe gut geschlafen"
|
||||
if case["name"] == "ambiguous_person_clarification":
|
||||
clarifications.append("Welcher Steffen?")
|
||||
entry = f"## {case['received_time']}\nTreffen mit Steffen besprochen"
|
||||
if case["name"] == "gestern_resolves_to_yesterday":
|
||||
entry = f"## {case['received_time']}\nParty gefeiert"
|
||||
return {
|
||||
"target_date": target_date,
|
||||
"target_path": f"05 Daily Notes/{target_date}.md",
|
||||
"entry_markdown": entry,
|
||||
"clarifications": clarifications,
|
||||
"raw_excluded": raw_excluded,
|
||||
}
|
||||
|
||||
|
||||
def _load_cases():
|
||||
return yaml.safe_load(FIXTURE.read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
@respx.mock
|
||||
def test_prompt_regression_cases():
|
||||
cases = _load_cases()
|
||||
processor = LMStudioProcessor(
|
||||
base_url="http://localhost:1234/v1",
|
||||
model="qwen/qwen3-vl-8b",
|
||||
system_prompt="SYS",
|
||||
)
|
||||
for case in cases:
|
||||
respx.post("http://localhost:1234/v1/chat/completions").mock(
|
||||
return_value=httpx.Response(200, json={
|
||||
"choices": [{"message": {"content": json.dumps(_golden_response(case))}}]
|
||||
})
|
||||
)
|
||||
payload = ProcessorInput(
|
||||
today=case["today"],
|
||||
weekday="Sonntag",
|
||||
received_time=case["received_time"],
|
||||
persons=case.get("persons", []),
|
||||
projects=[],
|
||||
text=case["text"],
|
||||
)
|
||||
out = processor.process(payload)
|
||||
exp = case.get("expected", {})
|
||||
if "target_date" in exp:
|
||||
assert out.target_date == exp["target_date"], case["name"]
|
||||
if exp.get("raw_excluded_contains"):
|
||||
assert any(exp["raw_excluded_contains"] in r for r in out.raw_excluded), case["name"]
|
||||
if exp.get("entry_excludes"):
|
||||
assert exp["entry_excludes"] not in out.entry_markdown, case["name"]
|
||||
if exp.get("clarifications_nonempty"):
|
||||
assert len(out.clarifications) > 0, case["name"]
|
||||
respx.reset()
|
||||
Loading…
Reference in New Issue