From c67f6d39d552dc3a10d4c288f3c9faf10fc0e5b8 Mon Sep 17 00:00:00 2001
From: beo3000 <pakabu@web.de>
Date: Fri, 20 Mar 2026 11:23:12 +0100
Subject: [PATCH] docs: add GUI app implementation plan

---
 docs/superpowers/plans/2026-03-20-gui-app.md | 1334 ++++++++++++++++++
 1 file changed, 1334 insertions(+)
 create mode 100644 docs/superpowers/plans/2026-03-20-gui-app.md

diff --git a/docs/superpowers/plans/2026-03-20-gui-app.md b/docs/superpowers/plans/2026-03-20-gui-app.md
new file mode 100644
index 0000000..2191368
--- /dev/null
+++ b/docs/superpowers/plans/2026-03-20-gui-app.md
@@ -0,0 +1,1334 @@
+# GUI App Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Convert `dictate.py` into a modular packaged desktop app with tray-based log panel, no terminal window, and system integration (autostart, start menu, desktop shortcut).
+
+**Architecture:** Split the monolithic `dictate.py` into a `whisper_app/` package with one module per concern. A central log queue in `app.py` replaces all `print()` calls; a compact tkinter panel displays the queue. PyInstaller builds a `--onedir --noconsole` binary per platform.
+
+**Tech Stack:** Python 3.10+, tkinter, pystray, faster-whisper, sounddevice, pynput, PyInstaller, Pillow, pywin32 (Windows only)
+
+---
+
+## File Map
+
+| File | Action | Responsibility |
+|---|---|---|
+| `whisper_app/__init__.py` | Create | Package marker |
+| `whisper_app/app.py` | Create | AppState enum, log queue, `log()`, `set_log_queue()` |
+| `whisper_app/config.py` | Create | Path resolution, load/save config + vocab |
+| `whisper_app/typer.py` | Create | `type_text()` cross-platform |
+| `whisper_app/hotkey.py` | Create | `HotkeyListener` (move from dictate.py) |
+| `whisper_app/audio.py` | Create | `get_audio_stream()`, `audio_callback()` |
+| `whisper_app/transcriber.py` | Create | `load_model()`, `stop_and_transcribe()` |
+| `whisper_app/overlay.py` | Create | Recording overlay window |
+| `whisper_app/tray.py` | Create | pystray icon + menu |
+| `whisper_app/log_window.py` | Create | Compact log panel (380×220px) |
+| `whisper_app/settings_window.py` | Create | Settings dialog + INSTALLATION section |
+| `whisper_app/vocab_window.py` | Create | Vocabulary dialog |
+| `whisper_app/installer.py` | Create | Autostart, start menu, desktop shortcut |
+| `main.py` | Create | Entry point, wires all modules, error logfile fallback |
+| `build.py` | Create | Generates icon.ico, runs PyInstaller, copies config files |
+| `whisper-dictation.spec` | Create | Manual PyInstaller spec for ctranslate2 |
+| `dictate.py` | Delete | Replaced by the above |
+| `tests/test_config.py` | Create | Path resolution tests |
+| `tests/test_app_log.py` | Create | Log queue / buffer tests |
+| `tests/test_installer.py` | Create | Installer status detection tests |
+
+---
+
+## Task 1: Package skeleton + config module
+
+**Files:**
+- Create: `whisper_app/__init__.py`
+- Create: `whisper_app/config.py`
+- Create: `tests/test_config.py`
+
+- [ ] **Step 1: Write failing tests for path resolution**
+
+```python
+# tests/test_config.py
+import sys, os, importlib
+
+def test_app_dir_dev_mode(monkeypatch, tmp_path):
+    """In dev mode, _app_dir() returns the repo root (two levels above config.py)."""
+    monkeypatch.delattr(sys, "frozen", raising=False)
+    from whisper_app import config
+    importlib.reload(config)
+    result = config._app_dir()
+    # Should be the repo root — parent of whisper_app/
+    assert os.path.isdir(result)
+    assert os.path.basename(result) != "whisper_app"
+
+def test_app_dir_frozen_mode(monkeypatch, tmp_path):
+    """In frozen mode, _app_dir() returns dirname(sys.executable)."""
+    monkeypatch.setattr(sys, "frozen", True, raising=False)
+    monkeypatch.setattr(sys, "executable", str(tmp_path / "whisper-dictation.exe"))
+    from whisper_app import config
+    importlib.reload(config)
+    assert config._app_dir() == str(tmp_path)
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+```
+pytest tests/test_config.py -v
+```
+Expected: `ModuleNotFoundError` — package doesn't exist yet.
+
+- [ ] **Step 3: Create package and config module**
+
+```python
+# whisper_app/__init__.py
+# (empty)
+```
+
+```python
+# whisper_app/config.py
+import json
+import os
+import sys
+
+def _app_dir() -> str:
+    """Root dir for config.json and vocabulary.json."""
+    if getattr(sys, "frozen", False):
+        return os.path.dirname(sys.executable)
+    # config.py lives at whisper_app/config.py → parent of parent = repo root
+    return os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+DATA_DIR = os.environ.get("WHISPER_DATA_DIR") or _app_dir()
+
+_env_local = os.environ.get("WHISPER_LOCAL_DIR")
+if _env_local:
+    _local_dir = _env_local
+elif os.name == "nt":
+    _local_dir = os.path.join(os.environ.get("LOCALAPPDATA", DATA_DIR), "WhisperDictation")
+else:
+    _local_dir = os.path.join(os.path.expanduser("~"), ".local", "share", "WhisperDictation")
+
+os.makedirs(_local_dir, exist_ok=True)
+
+CONFIG_FILE       = os.path.join(DATA_DIR,   "config.json")
+CONFIG_LOCAL_FILE = os.path.join(_local_dir, "config_local.json")
+VOCAB_FILE        = os.path.join(DATA_DIR,   "vocabulary.json")
+
+DEFAULT_CONFIG = {
+    "hotkey": "ctrl+shift+space",
+    "model": "medium",
+    "device": "cuda",
+    "compute_type": "float16",
+    "language": "de",
+    "audio_device": None,
+    "sample_rate": 16000,
+}
+
+MODELS = ["tiny", "base", "small", "medium", "large-v2", "large-v3"]
+LANGUAGES = {"Deutsch": "de", "English": "en", "Français": "fr", "Español": "es",
+             "Italiano": "it", "Auto": None}
+DEVICES = ["cuda", "cpu"]
+COMPUTE_TYPES = {"float16 (GPU)": "float16", "int8 (CPU/GPU)": "int8", "float32": "float32"}
+LOCAL_KEYS = {"audio_device", "device", "compute_type"}
+
+config: dict = {}
+vocab: dict = {"words": [], "replacements": []}
+
+def load_config() -> None:
+    global config
+    config = dict(DEFAULT_CONFIG)
+    if os.path.exists(CONFIG_FILE):
+        with open(CONFIG_FILE) as f:
+            config.update(json.load(f))
+    if os.path.exists(CONFIG_LOCAL_FILE):
+        with open(CONFIG_LOCAL_FILE) as f:
+            config.update(json.load(f))
+
+def save_config() -> None:
+    shared = {k: v for k, v in config.items() if k not in LOCAL_KEYS}
+    local  = {k: v for k, v in config.items() if k in LOCAL_KEYS}
+    with open(CONFIG_FILE, "w") as f:
+        json.dump(shared, f, indent=2)
+    with open(CONFIG_LOCAL_FILE, "w") as f:
+        json.dump(local, f, indent=2)
+
+def load_vocab() -> None:
+    global vocab
+    if os.path.exists(VOCAB_FILE):
+        with open(VOCAB_FILE) as f:
+            vocab = json.load(f)
+    else:
+        vocab = {"words": [], "replacements": []}
+
+def save_vocab() -> None:
+    with open(VOCAB_FILE, "w") as f:
+        json.dump(vocab, f, indent=2, ensure_ascii=False)
+
+def apply_vocab(text: str) -> str:
+    for r in vocab.get("replacements", []):
+        text = text.replace(r["from"], r["to"])
+    return text
+
+def get_initial_prompt() -> str:
+    words = vocab.get("words", [])
+    return ", ".join(words) if words else ""
+```
+
+- [ ] **Step 4: Run tests**
+
+```
+pytest tests/test_config.py -v
+```
+Expected: 2 PASSED
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add whisper_app/__init__.py whisper_app/config.py tests/test_config.py
+git commit -m "feat: add whisper_app package with config module"
+```
+
+---
+
+## Task 2: Log queue in app.py
+
+**Files:**
+- Create: `whisper_app/app.py`
+- Create: `tests/test_app_log.py`
+
+- [ ] **Step 1: Write failing tests**
+
+```python
+# tests/test_app_log.py
+import queue, threading, importlib
+import whisper_app.app as app_mod
+
+def setup_function():
+    importlib.reload(app_mod)
+
+def test_log_before_queue_buffers():
+    app_mod.log("hello")
+    app_mod.log("world")
+    assert app_mod._log_buffer == ["hello", "world"]
+    assert app_mod._log_queue is None
+
+def test_log_buffer_capped_at_500():
+    for i in range(600):
+        app_mod.log(str(i))
+    assert len(app_mod._log_buffer) <= 500
+
+def test_set_log_queue_flushes_buffer():
+    app_mod.log("buffered")
+    q = queue.Queue()
+    app_mod.set_log_queue(q)
+    assert q.get_nowait() == "buffered"
+    assert app_mod._log_buffer == []
+
+def test_log_after_queue_goes_to_queue():
+    q = queue.Queue()
+    app_mod.set_log_queue(q)
+    app_mod.log("direct")
+    assert q.get_nowait() == "direct"
+
+def test_log_thread_safe():
+    q = queue.Queue()
+    app_mod.set_log_queue(q)
+    threads = [threading.Thread(target=lambda: app_mod.log("x")) for _ in range(50)]
+    for t in threads: t.start()
+    for t in threads: t.join()
+    assert q.qsize() == 50
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+```
+pytest tests/test_app_log.py -v
+```
+Expected: `ModuleNotFoundError`
+
+- [ ] **Step 3: Implement app.py**
+
+```python
+# whisper_app/app.py
+import queue
+import threading
+from enum import Enum
+
+class AppState(Enum):
+    IDLE         = "idle"
+    RECORDING    = "recording"
+    TRANSCRIBING = "transcribing"
+
+# ── Log queue ─────────────────────────────────────────────────────────────────
+
+_log_buffer: list[str] = []
+_log_queue: queue.Queue | None = None
+_log_lock = threading.Lock()
+_MAX_BUFFER = 500
+
+def log(msg: str) -> None:
+    with _log_lock:
+        if _log_queue is not None:
+            _log_queue.put(msg)
+        else:
+            if len(_log_buffer) >= _MAX_BUFFER:
+                _log_buffer.pop(0)
+            _log_buffer.append(msg)
+
+def set_log_queue(q: queue.Queue) -> None:
+    global _log_queue
+    with _log_lock:
+        _log_queue = q
+        buffered = list(_log_buffer)
+        _log_buffer.clear()
+    for msg in buffered:
+        q.put_nowait(msg)
+
+# ── Global state ──────────────────────────────────────────────────────────────
+
+state: AppState = AppState.IDLE
+audio_chunks: list = []
+model = None
+tray_icon = None
+overlay_window = None
+overlay_tk = None
+hotkey_listener = None
+```
+
+- [ ] **Step 4: Run tests**
+
+```
+pytest tests/test_app_log.py -v
+```
+Expected: 5 PASSED
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add whisper_app/app.py tests/test_app_log.py
+git commit -m "feat: add app module with thread-safe log queue"
+```
+
+---
+
+## Task 3: Move hotkey, typer, audio, transcriber modules
+
+**Files:**
+- Create: `whisper_app/hotkey.py`
+- Create: `whisper_app/typer.py`
+- Create: `whisper_app/audio.py`
+- Create: `whisper_app/transcriber.py`
+
+No new tests for this task — these are direct extractions with no logic change. Functional testing happens in Task 8 (manual smoke test).
+
+- [ ] **Step 1: Create hotkey.py** — copy `_MODIFIER_MAP`, `_KEY_MAP`, `_parse_hotkey`, `HotkeyListener` verbatim from `dictate.py`. No changes.
+
+```python
+# whisper_app/hotkey.py
+import threading
+from pynput.keyboard import Controller as KeyboardController, Listener as KeyboardListener, Key, KeyCode
+
+_MODIFIER_MAP = { ... }   # copy verbatim from dictate.py line 93-100
+_KEY_MAP      = { ... }   # copy verbatim from dictate.py line 102-110
+
+def _parse_hotkey(hotkey_str: str):
+    ...   # copy verbatim from dictate.py line 113-133
+
+class HotkeyListener:
+    ...   # copy verbatim from dictate.py line 136-168
+```
+
+- [ ] **Step 2: Create typer.py** — copy `type_text()` verbatim from `dictate.py`.
+
+```python
+# whisper_app/typer.py
+import os, shutil, subprocess, time
+from pynput.keyboard import Controller as KeyboardController
+
+def type_text(text: str) -> None:
+    ...   # copy verbatim from dictate.py line 75-88
+```
+
+- [ ] **Step 3: Create audio.py**
+
+```python
+# whisper_app/audio.py
+import sounddevice as sd
+from whisper_app import app, config
+
+def audio_callback(indata, frames, time_info, status):
+    if app.state == app.AppState.RECORDING:
+        app.audio_chunks.append(indata.copy())
+
+def get_audio_stream():
+    device = config.config.get("audio_device")
+    return sd.InputStream(
+        samplerate=config.config["sample_rate"],
+        channels=1,
+        device=device,
+        callback=audio_callback,
+    )
+```
+
+- [ ] **Step 4: Create transcriber.py**
+
+```python
+# whisper_app/transcriber.py
+import threading
+import time
+
+import numpy as np
+from faster_whisper import WhisperModel
+
+from whisper_app import app, config, typer
+
+def load_model() -> None:
+    app.log(f"Loading {config.config['model']} on {config.config['device']}...")
+    app.model = WhisperModel(
+        config.config["model"],
+        device=config.config["device"],
+        compute_type=config.config["compute_type"],
+    )
+    app.log("Model ready.")
+
+def stop_and_transcribe() -> None:
+    if app.state != app.AppState.RECORDING:
+        return
+    set_state(app.AppState.TRANSCRIBING)
+    chunks = list(app.audio_chunks)
+
+    if not chunks:
+        set_state(app.AppState.IDLE)
+        return
+
+    audio = np.concatenate(chunks, axis=0).flatten().astype(np.float32)
+    duration = len(audio) / config.config["sample_rate"]
+    rms = float(np.sqrt(np.mean(audio ** 2)))
+    app.log(f"Audio: {duration:.1f}s  RMS: {rms:.5f}")
+
+    if duration < 0.3 or rms < 0.0001:
+        app.log("Too short or silent — skipped.")
+        set_state(app.AppState.IDLE)
+        return
+
+    target_rms = 0.05
+    if rms > 0:
+        audio = audio * (target_rms / rms)
+    audio = np.clip(audio, -1.0, 1.0)
+
+    lang = config.config["language"] if config.config["language"] else None
+    prompt = config.get_initial_prompt()
+    segments, _ = app.model.transcribe(
+        audio, language=lang, beam_size=5, vad_filter=True,
+        initial_prompt=prompt if prompt else None,
+    )
+    text = " ".join(s.text for s in segments).strip()
+    text = config.apply_vocab(text)
+    app.log(f"Result: {repr(text)}")
+
+    set_state(app.AppState.IDLE)
+    if text:
+        time.sleep(0.15)
+        typer.type_text(text)
+
+def set_state(new_state: app.AppState) -> None:  # semi-public, used by main.py
+    app.state = new_state
+    if app.tray_icon:
+        from whisper_app import tray
+        tray.update_icon(new_state)
+    if new_state == app.AppState.RECORDING:
+        from whisper_app import overlay
+        overlay.show()
+    else:
+        from whisper_app import overlay
+        overlay.hide()
+```
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add whisper_app/hotkey.py whisper_app/typer.py whisper_app/audio.py whisper_app/transcriber.py
+git commit -m "feat: extract hotkey, typer, audio, transcriber modules"
+```
+
+---
+
+## Task 4: Overlay and tray modules
+
+**Files:**
+- Create: `whisper_app/overlay.py`
+- Create: `whisper_app/tray.py`
+
+- [ ] **Step 1: Create overlay.py** — extract from `dictate.py` lines 246-288.
+
+```python
+# whisper_app/overlay.py
+import os
+import tkinter as tk
+from whisper_app import app
+
+_window: tk.Toplevel | None = None
+
+def create(root: tk.Tk) -> None:
+    global _window
+    win = tk.Toplevel(root)
+    win.withdraw()
+    win.overrideredirect(True)
+    win.attributes("-topmost", True)
+    win.attributes("-alpha", 0.92)
+    win.configure(bg="#1a1a1a")
+
+    frame = tk.Frame(win, bg="#1a1a1a", padx=12, pady=10)
+    frame.pack(fill="both", expand=True)
+
+    dot = tk.Canvas(frame, width=14, height=14, bg="#1a1a1a", highlightthickness=0)
+    dot.create_oval(2, 2, 12, 12, fill="#e03030", outline="")
+    dot.pack(side="left", padx=(0, 8))
+
+    _sans = "Segoe UI" if os.name == "nt" else "sans-serif"
+    tk.Label(frame, text="Aufnahme läuft …", fg="white", bg="#1a1a1a",
+             font=(_sans, 11)).pack(side="left")
+    _window = win
+
+def show() -> None:
+    if app.overlay_tk and _window:
+        app.overlay_tk.after(0, _show_main)
+
+def hide() -> None:
+    if app.overlay_tk and _window:
+        app.overlay_tk.after(0, _hide_main)
+
+def _show_main() -> None:
+    sw = app.overlay_tk.winfo_screenwidth()
+    sh = app.overlay_tk.winfo_screenheight()
+    _window.geometry(f"220x54+{sw - 240}+{sh - 100}")
+    _window.deiconify()
+    _window.lift()
+
+def _hide_main() -> None:
+    _window.withdraw()
+```
+
+- [ ] **Step 2: Create tray.py**
+
+```python
+# whisper_app/tray.py
+import pystray
+from PIL import Image, ImageDraw
+from whisper_app import app
+from whisper_app.app import AppState
+
+def _make_icon(color: tuple) -> Image.Image:
+    img = Image.new("RGBA", (64, 64), (0, 0, 0, 0))
+    d = ImageDraw.Draw(img)
+    d.ellipse([4, 4, 60, 60], fill=color)
+    return img
+
+ICONS = {
+    AppState.IDLE:         _make_icon((40, 200, 80)),
+    AppState.RECORDING:    _make_icon((220, 50, 50)),
+    AppState.TRANSCRIBING: _make_icon((220, 180, 30)),
+}
+
+def update_icon(state: AppState) -> None:
+    if app.tray_icon:
+        app.tray_icon.icon = ICONS[state]
+
+def create(on_settings, on_vocab, on_show_log, on_quit) -> pystray.Icon:
+    menu = pystray.Menu(
+        pystray.MenuItem("Anzeigen", on_show_log, default=True),
+        pystray.MenuItem("Einstellungen", on_settings),
+        pystray.MenuItem("Vokabular", on_vocab),
+        pystray.Menu.SEPARATOR,
+        pystray.MenuItem("Beenden", on_quit),
+    )
+    icon = pystray.Icon("whisper", ICONS[AppState.IDLE], "Whisper Dictation", menu)
+    app.tray_icon = icon
+    return icon
+```
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add whisper_app/overlay.py whisper_app/tray.py
+git commit -m "feat: add overlay and tray modules"
+```
+
+---
+
+## Task 5: Log window
+
+**Files:**
+- Create: `whisper_app/log_window.py`
+
+- [ ] **Step 1: Create log_window.py**
+
+```python
+# whisper_app/log_window.py
+import os
+import queue
+import tkinter as tk
+from whisper_app import app
+
+BG     = "#18181f"
+BG2    = "#22222c"
+BG3    = "#2c2c38"
+BORDER = "#38384a"
+FG     = "#e8e8f0"
+FG2    = "#7878a0"
+AMBER  = "#f5a623"
+GREEN  = "#4ade80"
+RED    = "#f87171"
+YELLOW = "#facc15"
+_sans  = "Segoe UI" if os.name == "nt" else "sans-serif"
+_mono  = "Consolas" if os.name == "nt" else "monospace"
+
+_window: tk.Toplevel | None = None
+_text:   tk.Text      | None = None
+_log_q:  queue.Queue  | None = None
+_MAX_LINES = 200
+
+def create(root: tk.Tk, log_queue: queue.Queue,
+           on_settings, on_vocab) -> tk.Toplevel:
+    global _window, _text, _log_q
+    _log_q = log_queue
+
+    win = tk.Toplevel(root)
+    win.title("Whisper Dictation")
+    win.configure(bg=BG)
+    win.resizable(False, False)
+    win.protocol("WM_DELETE_WINDOW", win.withdraw)
+
+    # ── Header ──
+    hdr = tk.Frame(win, bg=BG2)
+    hdr.pack(fill="x")
+    tk.Frame(hdr, bg=AMBER, height=2).pack(fill="x")
+    hdr_inner = tk.Frame(hdr, bg=BG2, padx=12, pady=6)
+    hdr_inner.pack(fill="x")
+    tk.Label(hdr_inner, text="WHISPER DICTATION",
+             font=(_sans, 11, "bold"), bg=BG2, fg=FG).pack(side="left")
+    tk.Button(hdr_inner, text="✕", command=win.withdraw,
+              bg=BG2, fg=FG2, relief="flat", bd=0,
+              font=(_sans, 11), cursor="hand2").pack(side="right")
+
+    # ── Log text ──
+    txt = tk.Text(win, bg=BG, fg=FG, font=(_mono, 10),
+                  relief="flat", bd=0, padx=10, pady=6,
+                  state="disabled", wrap="none",
+                  width=48, height=10,
+                  highlightthickness=0)
+    txt.pack(fill="both", expand=True)
+    txt.tag_config("green",  foreground=GREEN)
+    txt.tag_config("red",    foreground=RED)
+    txt.tag_config("yellow", foreground=YELLOW)
+    txt.tag_config("grey",   foreground=FG2)
+    _text = txt
+
+    # ── Button bar ──
+    bar = tk.Frame(win, bg=BG2, pady=6, padx=10)
+    bar.pack(fill="x")
+    for label, cmd in [("⚙ Einstellungen", on_settings), ("📚 Vokabular", on_vocab)]:
+        b = tk.Button(bar, text=label, command=cmd,
+                      bg=BG3, fg=FG, relief="flat", bd=0,
+                      font=(_sans, 10), padx=10, pady=4, cursor="hand2")
+        b.pack(side="left", padx=(0, 4))
+    tk.Button(bar, text="🗑", command=_clear_log,
+              bg=BG3, fg=FG2, relief="flat", bd=0,
+              font=(_sans, 10), padx=8, pady=4, cursor="hand2").pack(side="right")
+
+    _window = win
+    win.withdraw()
+    root.after(100, _poll)
+    return win
+
+def show() -> None:
+    if _window:
+        _window.deiconify()
+        _window.lift()
+
+def _clear_log() -> None:
+    if _text:
+        _text.config(state="normal")
+        _text.delete("1.0", "end")
+        _text.config(state="disabled")
+
+def _tag_for(msg: str) -> str:
+    low = msg.lower()
+    if any(x in low for x in ("recording", "aufnahme")):
+        return "red"
+    if any(x in low for x in ("transcrib", "loading", "laden")):
+        return "yellow"
+    if any(x in low for x in ("result:", "ready", "bereit")):
+        return "green"
+    return "grey"
+
+def _poll() -> None:
+    if _log_q and _text:
+        try:
+            while True:
+                msg = _log_q.get_nowait()
+                _append(msg)
+        except queue.Empty:
+            pass
+    if app.overlay_tk:
+        app.overlay_tk.after(100, _poll)
+
+def _append(msg: str) -> None:
+    _text.config(state="normal")
+    # Trim to MAX_LINES
+    lines = int(_text.index("end-1c").split(".")[0])
+    if lines >= _MAX_LINES:
+        _text.delete("1.0", "2.0")
+    _text.insert("end", msg + "\n", _tag_for(msg))
+    _text.see("end")
+    _text.config(state="disabled")
+```
+
+- [ ] **Step 2: Commit**
+
+```bash
+git add whisper_app/log_window.py
+git commit -m "feat: add compact log panel"
+```
+
+---
+
+## Task 6: Settings and vocab windows
+
+**Files:**
+- Create: `whisper_app/settings_window.py`
+- Create: `whisper_app/vocab_window.py`
+
+- [ ] **Step 1: Create settings_window.py**
+
+Extract `_open_settings_main()` from `dictate.py` into `open(root, on_reload)`. Change all `config["x"]` references to use `config.config["x"]` and `config.save_config()`. Add an INSTALLATION section at the bottom that calls `installer.py` (created in Task 7).
+
+```python
+# whisper_app/settings_window.py
+import os, threading
+import tkinter as tk
+from tkinter import ttk
+import sounddevice as sd
+from whisper_app import config, installer
+
+# (colour constants identical to dictate.py — copy BG/BG2/BG3/BORDER/FG/FG2/AMBER/AMBER2/GREEN)
+
+def open(root: tk.Tk, on_reload) -> None:
+    root.after(0, lambda: _open_main(root, on_reload))
+
+def _open_main(root: tk.Tk, on_reload) -> None:
+    # Copy dictate.py lines 381–548 (_open_settings_main) verbatim, then apply:
+    #
+    # 1. Remove the global `config` reference at the top — import from module instead:
+    #      from whisper_app import config as cfg
+    #
+    # 2. Replace every occurrence of:
+    #      config["x"]        →  cfg.config["x"]
+    #      config.get("x")    →  cfg.config.get("x")
+    #      config["x"] = y    →  cfg.config["x"] = y
+    #
+    # 3. Replace:
+    #      save_config()      →  cfg.save_config()
+    #
+    # 4. In the save() callback, replace:
+    #      threading.Thread(target=reload_model_and_hotkey, daemon=True).start()
+    #      →  threading.Thread(target=on_reload, daemon=True).start()
+    #
+    # 5. After the existing button bar at the end of content, add:
+    #      _add_installation_section(win, content, BG, BG3, BORDER, FG, FG2, AMBER, FONT_UI, FONT_S, FONT_B)
+    pass  # copy + apply substitutions above
+
+def _add_installation_section(win, content, BG, BG3, BORDER, FG, FG2, AMBER, FONT_UI, FONT_S, FONT_B) -> None:
+    """Append INSTALLATION section to settings window."""
+    import sys
+    from whisper_app import installer
+
+    is_frozen = getattr(sys, "frozen", False)
+
+    # Section header — same helper as AUDIO/MODELL sections above
+    section("INSTALLATION")
+
+    features = [
+        ("Autostart",           installer.autostart_installed,  installer.install_autostart,  installer.remove_autostart),
+        ("Startmenü-Eintrag",   installer.startmenu_installed,  installer.install_startmenu,  installer.remove_startmenu),
+        ("Desktop-Verknüpfung", installer.desktop_installed,    installer.install_desktop,    installer.remove_desktop),
+    ]
+
+    for label, is_installed_fn, install_fn, remove_fn in features:
+        f = row(label)
+
+        status_var = tk.StringVar(value="eingerichtet" if is_installed_fn() else "nicht eingerichtet")
+        status_lbl = tk.Label(f, textvariable=status_var, font=FONT_S,
+                              bg=BG, fg=AMBER if is_installed_fn() else FG2)
+        status_lbl.pack(side="left", padx=(0, 12))
+
+        def make_callbacks(install_f, remove_f, sv, lbl):
+            def do_install():
+                install_f()
+                sv.set("eingerichtet")
+                lbl.config(fg=AMBER)
+            def do_remove():
+                remove_f()
+                sv.set("nicht eingerichtet")
+                lbl.config(fg=FG2)
+            return do_install, do_remove
+
+        do_install, do_remove = make_callbacks(install_fn, remove_fn, status_var, status_lbl)
+
+        btn_install = tk.Button(f, text="Einrichten", command=do_install,
+                                bg=BG3, fg=FG, font=FONT_S, relief="flat",
+                                padx=8, pady=3, cursor="hand2" if is_frozen else "arrow", bd=0)
+        btn_install.pack(side="left", padx=(0, 4))
+
+        btn_remove = tk.Button(f, text="Entfernen", command=do_remove,
+                               bg=BG3, fg=FG2, font=FONT_S, relief="flat",
+                               padx=8, pady=3, cursor="hand2" if is_frozen else "arrow", bd=0)
+        btn_remove.pack(side="left")
+
+        if not is_frozen:
+            for btn in (btn_install, btn_remove):
+                btn.config(state="disabled")
+            # Simple tooltip via label
+            tk.Label(f, text="Nur im gebauten Binary verfügbar",
+                     font=FONT_S, bg=BG, fg=FG2).pack(side="left", padx=(8, 0))
+```
+
+- [ ] **Step 2: Create vocab_window.py**
+
+Extract `_open_vocab_main()` from `dictate.py` verbatim into `open(root)`. Replace `config.config` references and `save_vocab()`/`vocab` references to use `config` module.
+
+```python
+# whisper_app/vocab_window.py
+import tkinter as tk
+from whisper_app import config
+
+def open(root: tk.Tk) -> None:
+    root.after(0, lambda: _open_main(root))
+
+def _open_main(root: tk.Tk) -> None:
+    # copy dictate.py lines 556-741 verbatim
+    # Replace: vocab["x"] → config.vocab["x"]
+    #          save_vocab() → config.save_vocab()
+    pass  # implementer fills in
+```
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add whisper_app/settings_window.py whisper_app/vocab_window.py
+git commit -m "feat: extract settings and vocab windows to modules"
+```
+
+---
+
+## Task 7: Installer module
+
+**Files:**
+- Create: `whisper_app/installer.py`
+- Create: `tests/test_installer.py`
+
+- [ ] **Step 1: Write failing tests**
+
+```python
+# tests/test_installer.py
+import sys, os
+
+def test_autostart_roundtrip(tmp_path, monkeypatch):
+    """install then remove leaves no trace."""
+    import whisper_app.installer as inst
+    monkeypatch.setattr(sys, "frozen", True, raising=False)
+    monkeypatch.setattr(sys, "executable", str(tmp_path / "wd.exe"))
+    if sys.platform != "win32":
+        monkeypatch.setenv("HOME", str(tmp_path))
+        # Ensure autostart dir path is under tmp
+        monkeypatch.setattr(inst, "_autostart_path",
+                            lambda: str(tmp_path / ".config/autostart/whisper-dictation.desktop"))
+    inst.install_autostart()
+    assert inst.autostart_installed()
+    inst.remove_autostart()
+    assert not inst.autostart_installed()
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+```
+pytest tests/test_installer.py -v
+```
+Expected: `ModuleNotFoundError`
+
+- [ ] **Step 3: Create installer.py**
+
+```python
+# whisper_app/installer.py
+import os
+import subprocess
+import sys
+
+def _exe_path() -> str:
+    return sys.executable if getattr(sys, "frozen", False) else ""
+
+def _is_frozen() -> bool:
+    return getattr(sys, "frozen", False)
+
+# ── Autostart ─────────────────────────────────────────────────────────────────
+
+def _autostart_path() -> str:
+    if sys.platform == "win32":
+        return ""   # Windows uses registry, not a file path
+    return os.path.join(os.path.expanduser("~"), ".config", "autostart",
+                        "whisper-dictation.desktop")
+
+def autostart_installed() -> bool:
+    if sys.platform == "win32":
+        import winreg
+        try:
+            key = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
+                r"Software\Microsoft\Windows\CurrentVersion\Run")
+            winreg.QueryValueEx(key, "WhisperDictation")
+            return True
+        except FileNotFoundError:
+            return False
+    return os.path.exists(_autostart_path())
+
+def install_autostart() -> None:
+    if sys.platform == "win32":
+        import winreg
+        key = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
+            r"Software\Microsoft\Windows\CurrentVersion\Run", access=winreg.KEY_SET_VALUE)
+        winreg.SetValueEx(key, "WhisperDictation", 0, winreg.REG_SZ, f'"{_exe_path()}"')
+    else:
+        path = _autostart_path()
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        _write_desktop(path)
+
+def remove_autostart() -> None:
+    if sys.platform == "win32":
+        import winreg
+        try:
+            key = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
+                r"Software\Microsoft\Windows\CurrentVersion\Run", access=winreg.KEY_SET_VALUE)
+            winreg.DeleteValue(key, "WhisperDictation")
+        except FileNotFoundError:
+            pass
+    else:
+        path = _autostart_path()
+        if os.path.exists(path):
+            os.remove(path)
+
+# ── Start menu ────────────────────────────────────────────────────────────────
+
+def _startmenu_path() -> str:
+    if sys.platform == "win32":
+        return os.path.join(os.environ.get("APPDATA", ""),
+            r"Microsoft\Windows\Start Menu\Programs\Whisper Dictation.lnk")
+    return os.path.join(os.path.expanduser("~"), ".local", "share", "applications",
+                        "whisper-dictation.desktop")
+
+def startmenu_installed() -> bool:
+    return os.path.exists(_startmenu_path())
+
+def install_startmenu() -> None:
+    path = _startmenu_path()
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    if sys.platform == "win32":
+        _create_lnk(path)
+    else:
+        _write_desktop(path)
+
+def remove_startmenu() -> None:
+    path = _startmenu_path()
+    if os.path.exists(path):
+        os.remove(path)
+
+# ── Desktop shortcut ──────────────────────────────────────────────────────────
+
+def _desktop_dir() -> str:
+    if sys.platform == "win32":
+        return os.environ.get("USERPROFILE", os.path.expanduser("~"))
+    try:
+        result = subprocess.run(["xdg-user-dir", "DESKTOP"], capture_output=True, text=True)
+        path = result.stdout.strip()
+        if path:
+            return path
+    except FileNotFoundError:
+        pass
+    return os.path.join(os.path.expanduser("~"), "Desktop")
+
+def _desktop_path() -> str:
+    if sys.platform == "win32":
+        return os.path.join(_desktop_dir(), "Whisper Dictation.lnk")
+    return os.path.join(_desktop_dir(), "whisper-dictation.desktop")
+
+def desktop_installed() -> bool:
+    return os.path.exists(_desktop_path())
+
+def install_desktop() -> None:
+    path = _desktop_path()
+    if sys.platform == "win32":
+        _create_lnk(path)
+    else:
+        _write_desktop(path)
+        os.chmod(path, 0o755)
+
+def remove_desktop() -> None:
+    path = _desktop_path()
+    if os.path.exists(path):
+        os.remove(path)
+
+# ── Helpers ───────────────────────────────────────────────────────────────────
+
+def _write_desktop(path: str) -> None:
+    icon_path = os.path.join(os.path.dirname(_exe_path()), "icon.png")
+    content = (
+        "[Desktop Entry]\n"
+        "Type=Application\n"
+        "Name=Whisper Dictation\n"
+        f"Exec={_exe_path()}\n"
+        f"Icon={icon_path}\n"
+        "Terminal=false\n"
+        "Categories=Utility;\n"
+    )
+    with open(path, "w") as f:
+        f.write(content)
+
+def _create_lnk(path: str) -> None:
+    if sys.platform != "win32":
+        return
+    import win32com.client  # guarded import — Windows only
+    shell = win32com.client.Dispatch("WScript.Shell")
+    lnk = shell.CreateShortCut(path)
+    lnk.Targetpath = _exe_path()
+    lnk.WorkingDirectory = os.path.dirname(_exe_path())
+    lnk.IconLocation = _exe_path()
+    lnk.save()
+```
+
+- [ ] **Step 4: Run tests**
+
+```
+pytest tests/test_installer.py -v
+```
+Expected: 1 PASSED (on Linux; on Windows the registry variant runs)
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add whisper_app/installer.py tests/test_installer.py
+git commit -m "feat: add installer module for system integration"
+```
+
+---
+
+## Task 8: main.py — wire everything together
+
+**Files:**
+- Create: `main.py`
+
+- [ ] **Step 1: Create main.py**
+
+```python
+# main.py
+import os
+import queue
+import sys
+import threading
+
+def _setup_error_log():
+    """Last-resort error log for crashes before tray appears (frozen mode)."""
+    if not getattr(sys, "frozen", False):
+        return
+    import traceback
+    if os.name == "nt":
+        log_dir = os.path.join(os.environ.get("LOCALAPPDATA", ""), "WhisperDictation")
+    else:
+        log_dir = os.path.join(os.path.expanduser("~"), ".local", "share", "WhisperDictation")
+    os.makedirs(log_dir, exist_ok=True)
+    log_path = os.path.join(log_dir, "error.log")
+    sys.excepthook = lambda *args: open(log_path, "a").write(
+        "".join(traceback.format_exception(*args)) + "\n"
+    )
+
+def main():
+    _setup_error_log()
+
+    from whisper_app import app, config, audio, transcriber, hotkey
+    from whisper_app import overlay, tray, log_window, settings_window, vocab_window
+    import tkinter as tk
+
+    config.load_config()
+    config.load_vocab()
+
+    # Tkinter root (hidden)
+    root = tk.Tk()
+    root.withdraw()
+    app.overlay_tk = root
+
+    # Log queue — connect before model load so early messages appear
+    log_q: queue.Queue = queue.Queue()
+    app.set_log_queue(log_q)
+
+    # Windows
+    overlay.create(root)
+    log_win = log_window.create(
+        root, log_q,
+        on_settings=lambda: settings_window.open(root, _reload),
+        on_vocab=lambda: vocab_window.open(root),
+    )
+
+    # Load model in background so the tray appears immediately
+    threading.Thread(target=transcriber.load_model, daemon=True).start()
+
+    # Audio stream
+    stream = audio.get_audio_stream()
+    stream.start()
+
+    # Hotkey
+    def _on_release():
+        threading.Thread(target=transcriber.stop_and_transcribe, daemon=True).start()
+
+    app.hotkey_listener = hotkey.HotkeyListener(
+        config.config["hotkey"],
+        on_press=_start_recording,
+        on_release=_on_release,
+    )
+
+    # Tray
+    icon = tray.create(
+        on_settings=lambda: settings_window.open(root, _reload),
+        on_vocab=lambda: vocab_window.open(root),
+        on_show_log=lambda: root.after(0, log_window.show),
+        on_quit=lambda: _quit(stream, icon),
+    )
+    threading.Thread(target=icon.run, daemon=True).start()
+
+    app.log(f"Bereit. Hotkey: {config.config['hotkey']}")
+    root.mainloop()
+    stream.stop()
+
+def _start_recording():
+    from whisper_app import app, transcriber
+    app.audio_chunks = []
+    transcriber.set_state(app.AppState.RECORDING)   # set_state is semi-public by design
+    app.log("Recording...")
+
+def _reload():
+    from whisper_app import app, config, transcriber, hotkey
+    if app.hotkey_listener:
+        app.hotkey_listener.stop()
+    threading.Thread(target=transcriber.load_model, daemon=True).start()
+    app.hotkey_listener = hotkey.HotkeyListener(
+        config.config["hotkey"],
+        on_press=_start_recording,
+        on_release=lambda: threading.Thread(
+            target=transcriber.stop_and_transcribe, daemon=True).start(),
+    )
+    app.log(f"Hotkey aktualisiert: {config.config['hotkey']}")
+
+def _quit(stream, icon):
+    stream.stop()
+    icon.stop()
+    from whisper_app import app
+    if app.overlay_tk:
+        app.overlay_tk.after(0, app.overlay_tk.quit)
+
+if __name__ == "__main__":
+    main()
+```
+
+- [ ] **Step 2: Smoke-test in dev mode**
+
+```bash
+# Windows
+python main.py
+# Linux
+python3 main.py
+```
+
+Expected: tray icon appears, no terminal window needed, clicking tray shows log panel.
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add main.py
+git commit -m "feat: add main.py entry point wiring all modules"
+```
+
+---
+
+## Task 9: PyInstaller spec and build script
+
+**Files:**
+- Create: `whisper-dictation.spec`
+- Create: `build.py`
+
+- [ ] **Step 1: Create whisper-dictation.spec**
+
+```python
+# whisper-dictation.spec
+# -*- mode: python ; coding: utf-8 -*-
+import sys
+
+a = Analysis(
+    ['main.py'],
+    pathex=[],
+    binaries=[],
+    datas=[],
+    hiddenimports=[
+        'ctranslate2',
+        'faster_whisper',
+        'sounddevice',
+        'pynput.keyboard._win32',
+        'pynput.keyboard._xorg',
+        'pynput.keyboard._uinput',
+    ],
+    hookspath=[],
+    hooksconfig={},
+    runtime_hooks=[],
+    excludes=[],
+    noarchive=False,
+)
+pyz = PYZ(a.pure)
+exe = EXE(
+    pyz,
+    a.scripts,
+    [],
+    exclude_binaries=True,
+    name='whisper-dictation',
+    debug=False,
+    bootloader_ignore_signals=False,
+    strip=False,
+    upx=True,
+    console=False,
+    icon='icon.ico',
+)
+coll = COLLECT(
+    exe,
+    a.binaries,
+    a.datas,
+    strip=False,
+    upx=True,
+    upx_exclude=[],
+    name='whisper-dictation',
+)
+```
+
+- [ ] **Step 2: Create build.py**
+
+```python
+# build.py
+import os
+import shutil
+import sys
+import subprocess
+from PIL import Image
+
+def generate_icon():
+    """Generate icon.ico from tray icon colors (green dot on transparent)."""
+    sizes = [16, 32, 48, 256]
+    frames = []
+    for size in sizes:
+        img = Image.new("RGBA", (size, size), (0, 0, 0, 0))
+        from PIL import ImageDraw
+        d = ImageDraw.Draw(img)
+        margin = max(1, size // 16)
+        d.ellipse([margin, margin, size - margin, size - margin], fill=(40, 200, 80))
+        frames.append(img)
+    frames[0].save("icon.ico", format="ICO", sizes=[(s, s) for s in sizes],
+                   append_images=frames[1:])
+    print("icon.ico generated.")
+
+def build():
+    generate_icon()
+    subprocess.run([sys.executable, "-m", "PyInstaller", "whisper-dictation.spec",
+                    "--noconfirm"], check=True)
+
+    dist_dir = os.path.join("dist", "whisper-dictation")
+    for fname in ["config.json", "vocabulary.json"]:
+        dest = os.path.join(dist_dir, fname)
+        if not os.path.exists(dest):
+            shutil.copy(fname, dest)
+            print(f"Copied {fname} → {dist_dir}/")
+        else:
+            print(f"Skipped {fname} (already exists in dist — preserving user edits)")
+
+    print(f"\nBuild complete: {dist_dir}/")
+
+if __name__ == "__main__":
+    build()
+```
+
+- [ ] **Step 3: Run build (on target platform)**
+
+```bash
+python build.py
+```
+
+Expected output ends with:
+```
+Build complete: dist/whisper-dictation/
+```
+
+- [ ] **Step 4: Test the binary**
+
+```bash
+# Windows
+dist\whisper-dictation\whisper-dictation.exe
+# Linux
+./dist/whisper-dictation/whisper-dictation
+```
+
+Expected: tray icon appears, no terminal window, log panel opens on click.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add whisper-dictation.spec build.py
+git commit -m "feat: add PyInstaller spec and build script"
+```
+
+---
+
+## Task 10: Delete dictate.py, update start scripts
+
+**Files:**
+- Delete: `dictate.py`
+- Modify: `start.sh`
+- Modify: `start.bat` (if exists)
+- Modify: `README.md`
+
+- [ ] **Step 1: Update start.sh**
+
+```bash
+#!/bin/bash
+cd "$(dirname "$0")"
+.venv-linux/bin/python -u main.py
+```
+
+- [ ] **Step 2: Update start.bat** (Windows dev mode)
+
+```bat
+@echo off
+cd /d "%~dp0"
+.venv-windows\Scripts\python.exe -u main.py
+```
+
+- [ ] **Step 3: Delete dictate.py**
+
+```bash
+git rm dictate.py
+```
+
+- [ ] **Step 4: Update README.md** — add section "Build" explaining `python build.py`, and add `pywin32` to Windows requirements. Keep existing usage instructions but replace `dictate.py` references with `main.py`.
+
+- [ ] **Step 5: Run all tests**
+
+```bash
+pytest tests/ -v
+```
+
+Expected: all PASSED
+
+- [ ] **Step 6: Final commit**
+
+```bash
+git add start.sh start.bat README.md
+git commit -m "feat: replace dictate.py with modular app, update start scripts"
+```