From c67f6d39d552dc3a10d4c288f3c9faf10fc0e5b8 Mon Sep 17 00:00:00 2001 From: beo3000 Date: Fri, 20 Mar 2026 11:23:12 +0100 Subject: [PATCH] docs: add GUI app implementation plan --- docs/superpowers/plans/2026-03-20-gui-app.md | 1334 ++++++++++++++++++ 1 file changed, 1334 insertions(+) create mode 100644 docs/superpowers/plans/2026-03-20-gui-app.md diff --git a/docs/superpowers/plans/2026-03-20-gui-app.md b/docs/superpowers/plans/2026-03-20-gui-app.md new file mode 100644 index 0000000..2191368 --- /dev/null +++ b/docs/superpowers/plans/2026-03-20-gui-app.md @@ -0,0 +1,1334 @@ +# GUI App Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Convert `dictate.py` into a modular packaged desktop app with tray-based log panel, no terminal window, and system integration (autostart, start menu, desktop shortcut). + +**Architecture:** Split the monolithic `dictate.py` into a `whisper_app/` package with one module per concern. A central log queue in `app.py` replaces all `print()` calls; a compact tkinter panel displays the queue. PyInstaller builds a `--onedir --noconsole` binary per platform. + +**Tech Stack:** Python 3.10+, tkinter, pystray, faster-whisper, sounddevice, pynput, PyInstaller, Pillow, pywin32 (Windows only) + +--- + +## File Map + +| File | Action | Responsibility | +|---|---|---| +| `whisper_app/__init__.py` | Create | Package marker | +| `whisper_app/app.py` | Create | AppState enum, log queue, `log()`, `set_log_queue()` | +| `whisper_app/config.py` | Create | Path resolution, load/save config + vocab | +| `whisper_app/typer.py` | Create | `type_text()` cross-platform | +| `whisper_app/hotkey.py` | Create | `HotkeyListener` (move from dictate.py) | +| `whisper_app/audio.py` | Create | `get_audio_stream()`, `audio_callback()` | +| `whisper_app/transcriber.py` | Create | `load_model()`, `stop_and_transcribe()` | +| `whisper_app/overlay.py` | Create | Recording overlay window | +| `whisper_app/tray.py` | Create | pystray icon + menu | +| `whisper_app/log_window.py` | Create | Compact log panel (380×220px) | +| `whisper_app/settings_window.py` | Create | Settings dialog + INSTALLATION section | +| `whisper_app/vocab_window.py` | Create | Vocabulary dialog | +| `whisper_app/installer.py` | Create | Autostart, start menu, desktop shortcut | +| `main.py` | Create | Entry point, wires all modules, error logfile fallback | +| `build.py` | Create | Generates icon.ico, runs PyInstaller, copies config files | +| `whisper-dictation.spec` | Create | Manual PyInstaller spec for ctranslate2 | +| `dictate.py` | Delete | Replaced by the above | +| `tests/test_config.py` | Create | Path resolution tests | +| `tests/test_app_log.py` | Create | Log queue / buffer tests | +| `tests/test_installer.py` | Create | Installer status detection tests | + +--- + +## Task 1: Package skeleton + config module + +**Files:** +- Create: `whisper_app/__init__.py` +- Create: `whisper_app/config.py` +- Create: `tests/test_config.py` + +- [ ] **Step 1: Write failing tests for path resolution** + +```python +# tests/test_config.py +import sys, os, importlib + +def test_app_dir_dev_mode(monkeypatch, tmp_path): + """In dev mode, _app_dir() returns the repo root (two levels above config.py).""" + monkeypatch.delattr(sys, "frozen", raising=False) + from whisper_app import config + importlib.reload(config) + result = config._app_dir() + # Should be the repo root — parent of whisper_app/ + assert os.path.isdir(result) + assert os.path.basename(result) != "whisper_app" + +def test_app_dir_frozen_mode(monkeypatch, tmp_path): + """In frozen mode, _app_dir() returns dirname(sys.executable).""" + monkeypatch.setattr(sys, "frozen", True, raising=False) + monkeypatch.setattr(sys, "executable", str(tmp_path / "whisper-dictation.exe")) + from whisper_app import config + importlib.reload(config) + assert config._app_dir() == str(tmp_path) +``` + +- [ ] **Step 2: Run test to verify it fails** + +``` +pytest tests/test_config.py -v +``` +Expected: `ModuleNotFoundError` — package doesn't exist yet. + +- [ ] **Step 3: Create package and config module** + +```python +# whisper_app/__init__.py +# (empty) +``` + +```python +# whisper_app/config.py +import json +import os +import sys + +def _app_dir() -> str: + """Root dir for config.json and vocabulary.json.""" + if getattr(sys, "frozen", False): + return os.path.dirname(sys.executable) + # config.py lives at whisper_app/config.py → parent of parent = repo root + return os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + +DATA_DIR = os.environ.get("WHISPER_DATA_DIR") or _app_dir() + +_env_local = os.environ.get("WHISPER_LOCAL_DIR") +if _env_local: + _local_dir = _env_local +elif os.name == "nt": + _local_dir = os.path.join(os.environ.get("LOCALAPPDATA", DATA_DIR), "WhisperDictation") +else: + _local_dir = os.path.join(os.path.expanduser("~"), ".local", "share", "WhisperDictation") + +os.makedirs(_local_dir, exist_ok=True) + +CONFIG_FILE = os.path.join(DATA_DIR, "config.json") +CONFIG_LOCAL_FILE = os.path.join(_local_dir, "config_local.json") +VOCAB_FILE = os.path.join(DATA_DIR, "vocabulary.json") + +DEFAULT_CONFIG = { + "hotkey": "ctrl+shift+space", + "model": "medium", + "device": "cuda", + "compute_type": "float16", + "language": "de", + "audio_device": None, + "sample_rate": 16000, +} + +MODELS = ["tiny", "base", "small", "medium", "large-v2", "large-v3"] +LANGUAGES = {"Deutsch": "de", "English": "en", "Français": "fr", "Español": "es", + "Italiano": "it", "Auto": None} +DEVICES = ["cuda", "cpu"] +COMPUTE_TYPES = {"float16 (GPU)": "float16", "int8 (CPU/GPU)": "int8", "float32": "float32"} +LOCAL_KEYS = {"audio_device", "device", "compute_type"} + +config: dict = {} +vocab: dict = {"words": [], "replacements": []} + +def load_config() -> None: + global config + config = dict(DEFAULT_CONFIG) + if os.path.exists(CONFIG_FILE): + with open(CONFIG_FILE) as f: + config.update(json.load(f)) + if os.path.exists(CONFIG_LOCAL_FILE): + with open(CONFIG_LOCAL_FILE) as f: + config.update(json.load(f)) + +def save_config() -> None: + shared = {k: v for k, v in config.items() if k not in LOCAL_KEYS} + local = {k: v for k, v in config.items() if k in LOCAL_KEYS} + with open(CONFIG_FILE, "w") as f: + json.dump(shared, f, indent=2) + with open(CONFIG_LOCAL_FILE, "w") as f: + json.dump(local, f, indent=2) + +def load_vocab() -> None: + global vocab + if os.path.exists(VOCAB_FILE): + with open(VOCAB_FILE) as f: + vocab = json.load(f) + else: + vocab = {"words": [], "replacements": []} + +def save_vocab() -> None: + with open(VOCAB_FILE, "w") as f: + json.dump(vocab, f, indent=2, ensure_ascii=False) + +def apply_vocab(text: str) -> str: + for r in vocab.get("replacements", []): + text = text.replace(r["from"], r["to"]) + return text + +def get_initial_prompt() -> str: + words = vocab.get("words", []) + return ", ".join(words) if words else "" +``` + +- [ ] **Step 4: Run tests** + +``` +pytest tests/test_config.py -v +``` +Expected: 2 PASSED + +- [ ] **Step 5: Commit** + +```bash +git add whisper_app/__init__.py whisper_app/config.py tests/test_config.py +git commit -m "feat: add whisper_app package with config module" +``` + +--- + +## Task 2: Log queue in app.py + +**Files:** +- Create: `whisper_app/app.py` +- Create: `tests/test_app_log.py` + +- [ ] **Step 1: Write failing tests** + +```python +# tests/test_app_log.py +import queue, threading, importlib +import whisper_app.app as app_mod + +def setup_function(): + importlib.reload(app_mod) + +def test_log_before_queue_buffers(): + app_mod.log("hello") + app_mod.log("world") + assert app_mod._log_buffer == ["hello", "world"] + assert app_mod._log_queue is None + +def test_log_buffer_capped_at_500(): + for i in range(600): + app_mod.log(str(i)) + assert len(app_mod._log_buffer) <= 500 + +def test_set_log_queue_flushes_buffer(): + app_mod.log("buffered") + q = queue.Queue() + app_mod.set_log_queue(q) + assert q.get_nowait() == "buffered" + assert app_mod._log_buffer == [] + +def test_log_after_queue_goes_to_queue(): + q = queue.Queue() + app_mod.set_log_queue(q) + app_mod.log("direct") + assert q.get_nowait() == "direct" + +def test_log_thread_safe(): + q = queue.Queue() + app_mod.set_log_queue(q) + threads = [threading.Thread(target=lambda: app_mod.log("x")) for _ in range(50)] + for t in threads: t.start() + for t in threads: t.join() + assert q.qsize() == 50 +``` + +- [ ] **Step 2: Run test to verify it fails** + +``` +pytest tests/test_app_log.py -v +``` +Expected: `ModuleNotFoundError` + +- [ ] **Step 3: Implement app.py** + +```python +# whisper_app/app.py +import queue +import threading +from enum import Enum + +class AppState(Enum): + IDLE = "idle" + RECORDING = "recording" + TRANSCRIBING = "transcribing" + +# ── Log queue ───────────────────────────────────────────────────────────────── + +_log_buffer: list[str] = [] +_log_queue: queue.Queue | None = None +_log_lock = threading.Lock() +_MAX_BUFFER = 500 + +def log(msg: str) -> None: + with _log_lock: + if _log_queue is not None: + _log_queue.put(msg) + else: + if len(_log_buffer) >= _MAX_BUFFER: + _log_buffer.pop(0) + _log_buffer.append(msg) + +def set_log_queue(q: queue.Queue) -> None: + global _log_queue + with _log_lock: + _log_queue = q + buffered = list(_log_buffer) + _log_buffer.clear() + for msg in buffered: + q.put_nowait(msg) + +# ── Global state ────────────────────────────────────────────────────────────── + +state: AppState = AppState.IDLE +audio_chunks: list = [] +model = None +tray_icon = None +overlay_window = None +overlay_tk = None +hotkey_listener = None +``` + +- [ ] **Step 4: Run tests** + +``` +pytest tests/test_app_log.py -v +``` +Expected: 5 PASSED + +- [ ] **Step 5: Commit** + +```bash +git add whisper_app/app.py tests/test_app_log.py +git commit -m "feat: add app module with thread-safe log queue" +``` + +--- + +## Task 3: Move hotkey, typer, audio, transcriber modules + +**Files:** +- Create: `whisper_app/hotkey.py` +- Create: `whisper_app/typer.py` +- Create: `whisper_app/audio.py` +- Create: `whisper_app/transcriber.py` + +No new tests for this task — these are direct extractions with no logic change. Functional testing happens in Task 8 (manual smoke test). + +- [ ] **Step 1: Create hotkey.py** — copy `_MODIFIER_MAP`, `_KEY_MAP`, `_parse_hotkey`, `HotkeyListener` verbatim from `dictate.py`. No changes. + +```python +# whisper_app/hotkey.py +import threading +from pynput.keyboard import Controller as KeyboardController, Listener as KeyboardListener, Key, KeyCode + +_MODIFIER_MAP = { ... } # copy verbatim from dictate.py line 93-100 +_KEY_MAP = { ... } # copy verbatim from dictate.py line 102-110 + +def _parse_hotkey(hotkey_str: str): + ... # copy verbatim from dictate.py line 113-133 + +class HotkeyListener: + ... # copy verbatim from dictate.py line 136-168 +``` + +- [ ] **Step 2: Create typer.py** — copy `type_text()` verbatim from `dictate.py`. + +```python +# whisper_app/typer.py +import os, shutil, subprocess, time +from pynput.keyboard import Controller as KeyboardController + +def type_text(text: str) -> None: + ... # copy verbatim from dictate.py line 75-88 +``` + +- [ ] **Step 3: Create audio.py** + +```python +# whisper_app/audio.py +import sounddevice as sd +from whisper_app import app, config + +def audio_callback(indata, frames, time_info, status): + if app.state == app.AppState.RECORDING: + app.audio_chunks.append(indata.copy()) + +def get_audio_stream(): + device = config.config.get("audio_device") + return sd.InputStream( + samplerate=config.config["sample_rate"], + channels=1, + device=device, + callback=audio_callback, + ) +``` + +- [ ] **Step 4: Create transcriber.py** + +```python +# whisper_app/transcriber.py +import threading +import time + +import numpy as np +from faster_whisper import WhisperModel + +from whisper_app import app, config, typer + +def load_model() -> None: + app.log(f"Loading {config.config['model']} on {config.config['device']}...") + app.model = WhisperModel( + config.config["model"], + device=config.config["device"], + compute_type=config.config["compute_type"], + ) + app.log("Model ready.") + +def stop_and_transcribe() -> None: + if app.state != app.AppState.RECORDING: + return + set_state(app.AppState.TRANSCRIBING) + chunks = list(app.audio_chunks) + + if not chunks: + set_state(app.AppState.IDLE) + return + + audio = np.concatenate(chunks, axis=0).flatten().astype(np.float32) + duration = len(audio) / config.config["sample_rate"] + rms = float(np.sqrt(np.mean(audio ** 2))) + app.log(f"Audio: {duration:.1f}s RMS: {rms:.5f}") + + if duration < 0.3 or rms < 0.0001: + app.log("Too short or silent — skipped.") + set_state(app.AppState.IDLE) + return + + target_rms = 0.05 + if rms > 0: + audio = audio * (target_rms / rms) + audio = np.clip(audio, -1.0, 1.0) + + lang = config.config["language"] if config.config["language"] else None + prompt = config.get_initial_prompt() + segments, _ = app.model.transcribe( + audio, language=lang, beam_size=5, vad_filter=True, + initial_prompt=prompt if prompt else None, + ) + text = " ".join(s.text for s in segments).strip() + text = config.apply_vocab(text) + app.log(f"Result: {repr(text)}") + + set_state(app.AppState.IDLE) + if text: + time.sleep(0.15) + typer.type_text(text) + +def set_state(new_state: app.AppState) -> None: # semi-public, used by main.py + app.state = new_state + if app.tray_icon: + from whisper_app import tray + tray.update_icon(new_state) + if new_state == app.AppState.RECORDING: + from whisper_app import overlay + overlay.show() + else: + from whisper_app import overlay + overlay.hide() +``` + +- [ ] **Step 5: Commit** + +```bash +git add whisper_app/hotkey.py whisper_app/typer.py whisper_app/audio.py whisper_app/transcriber.py +git commit -m "feat: extract hotkey, typer, audio, transcriber modules" +``` + +--- + +## Task 4: Overlay and tray modules + +**Files:** +- Create: `whisper_app/overlay.py` +- Create: `whisper_app/tray.py` + +- [ ] **Step 1: Create overlay.py** — extract from `dictate.py` lines 246-288. + +```python +# whisper_app/overlay.py +import os +import tkinter as tk +from whisper_app import app + +_window: tk.Toplevel | None = None + +def create(root: tk.Tk) -> None: + global _window + win = tk.Toplevel(root) + win.withdraw() + win.overrideredirect(True) + win.attributes("-topmost", True) + win.attributes("-alpha", 0.92) + win.configure(bg="#1a1a1a") + + frame = tk.Frame(win, bg="#1a1a1a", padx=12, pady=10) + frame.pack(fill="both", expand=True) + + dot = tk.Canvas(frame, width=14, height=14, bg="#1a1a1a", highlightthickness=0) + dot.create_oval(2, 2, 12, 12, fill="#e03030", outline="") + dot.pack(side="left", padx=(0, 8)) + + _sans = "Segoe UI" if os.name == "nt" else "sans-serif" + tk.Label(frame, text="Aufnahme läuft …", fg="white", bg="#1a1a1a", + font=(_sans, 11)).pack(side="left") + _window = win + +def show() -> None: + if app.overlay_tk and _window: + app.overlay_tk.after(0, _show_main) + +def hide() -> None: + if app.overlay_tk and _window: + app.overlay_tk.after(0, _hide_main) + +def _show_main() -> None: + sw = app.overlay_tk.winfo_screenwidth() + sh = app.overlay_tk.winfo_screenheight() + _window.geometry(f"220x54+{sw - 240}+{sh - 100}") + _window.deiconify() + _window.lift() + +def _hide_main() -> None: + _window.withdraw() +``` + +- [ ] **Step 2: Create tray.py** + +```python +# whisper_app/tray.py +import pystray +from PIL import Image, ImageDraw +from whisper_app import app +from whisper_app.app import AppState + +def _make_icon(color: tuple) -> Image.Image: + img = Image.new("RGBA", (64, 64), (0, 0, 0, 0)) + d = ImageDraw.Draw(img) + d.ellipse([4, 4, 60, 60], fill=color) + return img + +ICONS = { + AppState.IDLE: _make_icon((40, 200, 80)), + AppState.RECORDING: _make_icon((220, 50, 50)), + AppState.TRANSCRIBING: _make_icon((220, 180, 30)), +} + +def update_icon(state: AppState) -> None: + if app.tray_icon: + app.tray_icon.icon = ICONS[state] + +def create(on_settings, on_vocab, on_show_log, on_quit) -> pystray.Icon: + menu = pystray.Menu( + pystray.MenuItem("Anzeigen", on_show_log, default=True), + pystray.MenuItem("Einstellungen", on_settings), + pystray.MenuItem("Vokabular", on_vocab), + pystray.Menu.SEPARATOR, + pystray.MenuItem("Beenden", on_quit), + ) + icon = pystray.Icon("whisper", ICONS[AppState.IDLE], "Whisper Dictation", menu) + app.tray_icon = icon + return icon +``` + +- [ ] **Step 3: Commit** + +```bash +git add whisper_app/overlay.py whisper_app/tray.py +git commit -m "feat: add overlay and tray modules" +``` + +--- + +## Task 5: Log window + +**Files:** +- Create: `whisper_app/log_window.py` + +- [ ] **Step 1: Create log_window.py** + +```python +# whisper_app/log_window.py +import os +import queue +import tkinter as tk +from whisper_app import app + +BG = "#18181f" +BG2 = "#22222c" +BG3 = "#2c2c38" +BORDER = "#38384a" +FG = "#e8e8f0" +FG2 = "#7878a0" +AMBER = "#f5a623" +GREEN = "#4ade80" +RED = "#f87171" +YELLOW = "#facc15" +_sans = "Segoe UI" if os.name == "nt" else "sans-serif" +_mono = "Consolas" if os.name == "nt" else "monospace" + +_window: tk.Toplevel | None = None +_text: tk.Text | None = None +_log_q: queue.Queue | None = None +_MAX_LINES = 200 + +def create(root: tk.Tk, log_queue: queue.Queue, + on_settings, on_vocab) -> tk.Toplevel: + global _window, _text, _log_q + _log_q = log_queue + + win = tk.Toplevel(root) + win.title("Whisper Dictation") + win.configure(bg=BG) + win.resizable(False, False) + win.protocol("WM_DELETE_WINDOW", win.withdraw) + + # ── Header ── + hdr = tk.Frame(win, bg=BG2) + hdr.pack(fill="x") + tk.Frame(hdr, bg=AMBER, height=2).pack(fill="x") + hdr_inner = tk.Frame(hdr, bg=BG2, padx=12, pady=6) + hdr_inner.pack(fill="x") + tk.Label(hdr_inner, text="WHISPER DICTATION", + font=(_sans, 11, "bold"), bg=BG2, fg=FG).pack(side="left") + tk.Button(hdr_inner, text="✕", command=win.withdraw, + bg=BG2, fg=FG2, relief="flat", bd=0, + font=(_sans, 11), cursor="hand2").pack(side="right") + + # ── Log text ── + txt = tk.Text(win, bg=BG, fg=FG, font=(_mono, 10), + relief="flat", bd=0, padx=10, pady=6, + state="disabled", wrap="none", + width=48, height=10, + highlightthickness=0) + txt.pack(fill="both", expand=True) + txt.tag_config("green", foreground=GREEN) + txt.tag_config("red", foreground=RED) + txt.tag_config("yellow", foreground=YELLOW) + txt.tag_config("grey", foreground=FG2) + _text = txt + + # ── Button bar ── + bar = tk.Frame(win, bg=BG2, pady=6, padx=10) + bar.pack(fill="x") + for label, cmd in [("⚙ Einstellungen", on_settings), ("📚 Vokabular", on_vocab)]: + b = tk.Button(bar, text=label, command=cmd, + bg=BG3, fg=FG, relief="flat", bd=0, + font=(_sans, 10), padx=10, pady=4, cursor="hand2") + b.pack(side="left", padx=(0, 4)) + tk.Button(bar, text="🗑", command=_clear_log, + bg=BG3, fg=FG2, relief="flat", bd=0, + font=(_sans, 10), padx=8, pady=4, cursor="hand2").pack(side="right") + + _window = win + win.withdraw() + root.after(100, _poll) + return win + +def show() -> None: + if _window: + _window.deiconify() + _window.lift() + +def _clear_log() -> None: + if _text: + _text.config(state="normal") + _text.delete("1.0", "end") + _text.config(state="disabled") + +def _tag_for(msg: str) -> str: + low = msg.lower() + if any(x in low for x in ("recording", "aufnahme")): + return "red" + if any(x in low for x in ("transcrib", "loading", "laden")): + return "yellow" + if any(x in low for x in ("result:", "ready", "bereit")): + return "green" + return "grey" + +def _poll() -> None: + if _log_q and _text: + try: + while True: + msg = _log_q.get_nowait() + _append(msg) + except queue.Empty: + pass + if app.overlay_tk: + app.overlay_tk.after(100, _poll) + +def _append(msg: str) -> None: + _text.config(state="normal") + # Trim to MAX_LINES + lines = int(_text.index("end-1c").split(".")[0]) + if lines >= _MAX_LINES: + _text.delete("1.0", "2.0") + _text.insert("end", msg + "\n", _tag_for(msg)) + _text.see("end") + _text.config(state="disabled") +``` + +- [ ] **Step 2: Commit** + +```bash +git add whisper_app/log_window.py +git commit -m "feat: add compact log panel" +``` + +--- + +## Task 6: Settings and vocab windows + +**Files:** +- Create: `whisper_app/settings_window.py` +- Create: `whisper_app/vocab_window.py` + +- [ ] **Step 1: Create settings_window.py** + +Extract `_open_settings_main()` from `dictate.py` into `open(root, on_reload)`. Change all `config["x"]` references to use `config.config["x"]` and `config.save_config()`. Add an INSTALLATION section at the bottom that calls `installer.py` (created in Task 7). + +```python +# whisper_app/settings_window.py +import os, threading +import tkinter as tk +from tkinter import ttk +import sounddevice as sd +from whisper_app import config, installer + +# (colour constants identical to dictate.py — copy BG/BG2/BG3/BORDER/FG/FG2/AMBER/AMBER2/GREEN) + +def open(root: tk.Tk, on_reload) -> None: + root.after(0, lambda: _open_main(root, on_reload)) + +def _open_main(root: tk.Tk, on_reload) -> None: + # Copy dictate.py lines 381–548 (_open_settings_main) verbatim, then apply: + # + # 1. Remove the global `config` reference at the top — import from module instead: + # from whisper_app import config as cfg + # + # 2. Replace every occurrence of: + # config["x"] → cfg.config["x"] + # config.get("x") → cfg.config.get("x") + # config["x"] = y → cfg.config["x"] = y + # + # 3. Replace: + # save_config() → cfg.save_config() + # + # 4. In the save() callback, replace: + # threading.Thread(target=reload_model_and_hotkey, daemon=True).start() + # → threading.Thread(target=on_reload, daemon=True).start() + # + # 5. After the existing button bar at the end of content, add: + # _add_installation_section(win, content, BG, BG3, BORDER, FG, FG2, AMBER, FONT_UI, FONT_S, FONT_B) + pass # copy + apply substitutions above + +def _add_installation_section(win, content, BG, BG3, BORDER, FG, FG2, AMBER, FONT_UI, FONT_S, FONT_B) -> None: + """Append INSTALLATION section to settings window.""" + import sys + from whisper_app import installer + + is_frozen = getattr(sys, "frozen", False) + + # Section header — same helper as AUDIO/MODELL sections above + section("INSTALLATION") + + features = [ + ("Autostart", installer.autostart_installed, installer.install_autostart, installer.remove_autostart), + ("Startmenü-Eintrag", installer.startmenu_installed, installer.install_startmenu, installer.remove_startmenu), + ("Desktop-Verknüpfung", installer.desktop_installed, installer.install_desktop, installer.remove_desktop), + ] + + for label, is_installed_fn, install_fn, remove_fn in features: + f = row(label) + + status_var = tk.StringVar(value="eingerichtet" if is_installed_fn() else "nicht eingerichtet") + status_lbl = tk.Label(f, textvariable=status_var, font=FONT_S, + bg=BG, fg=AMBER if is_installed_fn() else FG2) + status_lbl.pack(side="left", padx=(0, 12)) + + def make_callbacks(install_f, remove_f, sv, lbl): + def do_install(): + install_f() + sv.set("eingerichtet") + lbl.config(fg=AMBER) + def do_remove(): + remove_f() + sv.set("nicht eingerichtet") + lbl.config(fg=FG2) + return do_install, do_remove + + do_install, do_remove = make_callbacks(install_fn, remove_fn, status_var, status_lbl) + + btn_install = tk.Button(f, text="Einrichten", command=do_install, + bg=BG3, fg=FG, font=FONT_S, relief="flat", + padx=8, pady=3, cursor="hand2" if is_frozen else "arrow", bd=0) + btn_install.pack(side="left", padx=(0, 4)) + + btn_remove = tk.Button(f, text="Entfernen", command=do_remove, + bg=BG3, fg=FG2, font=FONT_S, relief="flat", + padx=8, pady=3, cursor="hand2" if is_frozen else "arrow", bd=0) + btn_remove.pack(side="left") + + if not is_frozen: + for btn in (btn_install, btn_remove): + btn.config(state="disabled") + # Simple tooltip via label + tk.Label(f, text="Nur im gebauten Binary verfügbar", + font=FONT_S, bg=BG, fg=FG2).pack(side="left", padx=(8, 0)) +``` + +- [ ] **Step 2: Create vocab_window.py** + +Extract `_open_vocab_main()` from `dictate.py` verbatim into `open(root)`. Replace `config.config` references and `save_vocab()`/`vocab` references to use `config` module. + +```python +# whisper_app/vocab_window.py +import tkinter as tk +from whisper_app import config + +def open(root: tk.Tk) -> None: + root.after(0, lambda: _open_main(root)) + +def _open_main(root: tk.Tk) -> None: + # copy dictate.py lines 556-741 verbatim + # Replace: vocab["x"] → config.vocab["x"] + # save_vocab() → config.save_vocab() + pass # implementer fills in +``` + +- [ ] **Step 3: Commit** + +```bash +git add whisper_app/settings_window.py whisper_app/vocab_window.py +git commit -m "feat: extract settings and vocab windows to modules" +``` + +--- + +## Task 7: Installer module + +**Files:** +- Create: `whisper_app/installer.py` +- Create: `tests/test_installer.py` + +- [ ] **Step 1: Write failing tests** + +```python +# tests/test_installer.py +import sys, os + +def test_autostart_roundtrip(tmp_path, monkeypatch): + """install then remove leaves no trace.""" + import whisper_app.installer as inst + monkeypatch.setattr(sys, "frozen", True, raising=False) + monkeypatch.setattr(sys, "executable", str(tmp_path / "wd.exe")) + if sys.platform != "win32": + monkeypatch.setenv("HOME", str(tmp_path)) + # Ensure autostart dir path is under tmp + monkeypatch.setattr(inst, "_autostart_path", + lambda: str(tmp_path / ".config/autostart/whisper-dictation.desktop")) + inst.install_autostart() + assert inst.autostart_installed() + inst.remove_autostart() + assert not inst.autostart_installed() +``` + +- [ ] **Step 2: Run test to verify it fails** + +``` +pytest tests/test_installer.py -v +``` +Expected: `ModuleNotFoundError` + +- [ ] **Step 3: Create installer.py** + +```python +# whisper_app/installer.py +import os +import subprocess +import sys + +def _exe_path() -> str: + return sys.executable if getattr(sys, "frozen", False) else "" + +def _is_frozen() -> bool: + return getattr(sys, "frozen", False) + +# ── Autostart ───────────────────────────────────────────────────────────────── + +def _autostart_path() -> str: + if sys.platform == "win32": + return "" # Windows uses registry, not a file path + return os.path.join(os.path.expanduser("~"), ".config", "autostart", + "whisper-dictation.desktop") + +def autostart_installed() -> bool: + if sys.platform == "win32": + import winreg + try: + key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, + r"Software\Microsoft\Windows\CurrentVersion\Run") + winreg.QueryValueEx(key, "WhisperDictation") + return True + except FileNotFoundError: + return False + return os.path.exists(_autostart_path()) + +def install_autostart() -> None: + if sys.platform == "win32": + import winreg + key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, + r"Software\Microsoft\Windows\CurrentVersion\Run", access=winreg.KEY_SET_VALUE) + winreg.SetValueEx(key, "WhisperDictation", 0, winreg.REG_SZ, f'"{_exe_path()}"') + else: + path = _autostart_path() + os.makedirs(os.path.dirname(path), exist_ok=True) + _write_desktop(path) + +def remove_autostart() -> None: + if sys.platform == "win32": + import winreg + try: + key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, + r"Software\Microsoft\Windows\CurrentVersion\Run", access=winreg.KEY_SET_VALUE) + winreg.DeleteValue(key, "WhisperDictation") + except FileNotFoundError: + pass + else: + path = _autostart_path() + if os.path.exists(path): + os.remove(path) + +# ── Start menu ──────────────────────────────────────────────────────────────── + +def _startmenu_path() -> str: + if sys.platform == "win32": + return os.path.join(os.environ.get("APPDATA", ""), + r"Microsoft\Windows\Start Menu\Programs\Whisper Dictation.lnk") + return os.path.join(os.path.expanduser("~"), ".local", "share", "applications", + "whisper-dictation.desktop") + +def startmenu_installed() -> bool: + return os.path.exists(_startmenu_path()) + +def install_startmenu() -> None: + path = _startmenu_path() + os.makedirs(os.path.dirname(path), exist_ok=True) + if sys.platform == "win32": + _create_lnk(path) + else: + _write_desktop(path) + +def remove_startmenu() -> None: + path = _startmenu_path() + if os.path.exists(path): + os.remove(path) + +# ── Desktop shortcut ────────────────────────────────────────────────────────── + +def _desktop_dir() -> str: + if sys.platform == "win32": + return os.environ.get("USERPROFILE", os.path.expanduser("~")) + try: + result = subprocess.run(["xdg-user-dir", "DESKTOP"], capture_output=True, text=True) + path = result.stdout.strip() + if path: + return path + except FileNotFoundError: + pass + return os.path.join(os.path.expanduser("~"), "Desktop") + +def _desktop_path() -> str: + if sys.platform == "win32": + return os.path.join(_desktop_dir(), "Whisper Dictation.lnk") + return os.path.join(_desktop_dir(), "whisper-dictation.desktop") + +def desktop_installed() -> bool: + return os.path.exists(_desktop_path()) + +def install_desktop() -> None: + path = _desktop_path() + if sys.platform == "win32": + _create_lnk(path) + else: + _write_desktop(path) + os.chmod(path, 0o755) + +def remove_desktop() -> None: + path = _desktop_path() + if os.path.exists(path): + os.remove(path) + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +def _write_desktop(path: str) -> None: + icon_path = os.path.join(os.path.dirname(_exe_path()), "icon.png") + content = ( + "[Desktop Entry]\n" + "Type=Application\n" + "Name=Whisper Dictation\n" + f"Exec={_exe_path()}\n" + f"Icon={icon_path}\n" + "Terminal=false\n" + "Categories=Utility;\n" + ) + with open(path, "w") as f: + f.write(content) + +def _create_lnk(path: str) -> None: + if sys.platform != "win32": + return + import win32com.client # guarded import — Windows only + shell = win32com.client.Dispatch("WScript.Shell") + lnk = shell.CreateShortCut(path) + lnk.Targetpath = _exe_path() + lnk.WorkingDirectory = os.path.dirname(_exe_path()) + lnk.IconLocation = _exe_path() + lnk.save() +``` + +- [ ] **Step 4: Run tests** + +``` +pytest tests/test_installer.py -v +``` +Expected: 1 PASSED (on Linux; on Windows the registry variant runs) + +- [ ] **Step 5: Commit** + +```bash +git add whisper_app/installer.py tests/test_installer.py +git commit -m "feat: add installer module for system integration" +``` + +--- + +## Task 8: main.py — wire everything together + +**Files:** +- Create: `main.py` + +- [ ] **Step 1: Create main.py** + +```python +# main.py +import os +import queue +import sys +import threading + +def _setup_error_log(): + """Last-resort error log for crashes before tray appears (frozen mode).""" + if not getattr(sys, "frozen", False): + return + import traceback + if os.name == "nt": + log_dir = os.path.join(os.environ.get("LOCALAPPDATA", ""), "WhisperDictation") + else: + log_dir = os.path.join(os.path.expanduser("~"), ".local", "share", "WhisperDictation") + os.makedirs(log_dir, exist_ok=True) + log_path = os.path.join(log_dir, "error.log") + sys.excepthook = lambda *args: open(log_path, "a").write( + "".join(traceback.format_exception(*args)) + "\n" + ) + +def main(): + _setup_error_log() + + from whisper_app import app, config, audio, transcriber, hotkey + from whisper_app import overlay, tray, log_window, settings_window, vocab_window + import tkinter as tk + + config.load_config() + config.load_vocab() + + # Tkinter root (hidden) + root = tk.Tk() + root.withdraw() + app.overlay_tk = root + + # Log queue — connect before model load so early messages appear + log_q: queue.Queue = queue.Queue() + app.set_log_queue(log_q) + + # Windows + overlay.create(root) + log_win = log_window.create( + root, log_q, + on_settings=lambda: settings_window.open(root, _reload), + on_vocab=lambda: vocab_window.open(root), + ) + + # Load model in background so the tray appears immediately + threading.Thread(target=transcriber.load_model, daemon=True).start() + + # Audio stream + stream = audio.get_audio_stream() + stream.start() + + # Hotkey + def _on_release(): + threading.Thread(target=transcriber.stop_and_transcribe, daemon=True).start() + + app.hotkey_listener = hotkey.HotkeyListener( + config.config["hotkey"], + on_press=_start_recording, + on_release=_on_release, + ) + + # Tray + icon = tray.create( + on_settings=lambda: settings_window.open(root, _reload), + on_vocab=lambda: vocab_window.open(root), + on_show_log=lambda: root.after(0, log_window.show), + on_quit=lambda: _quit(stream, icon), + ) + threading.Thread(target=icon.run, daemon=True).start() + + app.log(f"Bereit. Hotkey: {config.config['hotkey']}") + root.mainloop() + stream.stop() + +def _start_recording(): + from whisper_app import app, transcriber + app.audio_chunks = [] + transcriber.set_state(app.AppState.RECORDING) # set_state is semi-public by design + app.log("Recording...") + +def _reload(): + from whisper_app import app, config, transcriber, hotkey + if app.hotkey_listener: + app.hotkey_listener.stop() + threading.Thread(target=transcriber.load_model, daemon=True).start() + app.hotkey_listener = hotkey.HotkeyListener( + config.config["hotkey"], + on_press=_start_recording, + on_release=lambda: threading.Thread( + target=transcriber.stop_and_transcribe, daemon=True).start(), + ) + app.log(f"Hotkey aktualisiert: {config.config['hotkey']}") + +def _quit(stream, icon): + stream.stop() + icon.stop() + from whisper_app import app + if app.overlay_tk: + app.overlay_tk.after(0, app.overlay_tk.quit) + +if __name__ == "__main__": + main() +``` + +- [ ] **Step 2: Smoke-test in dev mode** + +```bash +# Windows +python main.py +# Linux +python3 main.py +``` + +Expected: tray icon appears, no terminal window needed, clicking tray shows log panel. + +- [ ] **Step 3: Commit** + +```bash +git add main.py +git commit -m "feat: add main.py entry point wiring all modules" +``` + +--- + +## Task 9: PyInstaller spec and build script + +**Files:** +- Create: `whisper-dictation.spec` +- Create: `build.py` + +- [ ] **Step 1: Create whisper-dictation.spec** + +```python +# whisper-dictation.spec +# -*- mode: python ; coding: utf-8 -*- +import sys + +a = Analysis( + ['main.py'], + pathex=[], + binaries=[], + datas=[], + hiddenimports=[ + 'ctranslate2', + 'faster_whisper', + 'sounddevice', + 'pynput.keyboard._win32', + 'pynput.keyboard._xorg', + 'pynput.keyboard._uinput', + ], + hookspath=[], + hooksconfig={}, + runtime_hooks=[], + excludes=[], + noarchive=False, +) +pyz = PYZ(a.pure) +exe = EXE( + pyz, + a.scripts, + [], + exclude_binaries=True, + name='whisper-dictation', + debug=False, + bootloader_ignore_signals=False, + strip=False, + upx=True, + console=False, + icon='icon.ico', +) +coll = COLLECT( + exe, + a.binaries, + a.datas, + strip=False, + upx=True, + upx_exclude=[], + name='whisper-dictation', +) +``` + +- [ ] **Step 2: Create build.py** + +```python +# build.py +import os +import shutil +import sys +import subprocess +from PIL import Image + +def generate_icon(): + """Generate icon.ico from tray icon colors (green dot on transparent).""" + sizes = [16, 32, 48, 256] + frames = [] + for size in sizes: + img = Image.new("RGBA", (size, size), (0, 0, 0, 0)) + from PIL import ImageDraw + d = ImageDraw.Draw(img) + margin = max(1, size // 16) + d.ellipse([margin, margin, size - margin, size - margin], fill=(40, 200, 80)) + frames.append(img) + frames[0].save("icon.ico", format="ICO", sizes=[(s, s) for s in sizes], + append_images=frames[1:]) + print("icon.ico generated.") + +def build(): + generate_icon() + subprocess.run([sys.executable, "-m", "PyInstaller", "whisper-dictation.spec", + "--noconfirm"], check=True) + + dist_dir = os.path.join("dist", "whisper-dictation") + for fname in ["config.json", "vocabulary.json"]: + dest = os.path.join(dist_dir, fname) + if not os.path.exists(dest): + shutil.copy(fname, dest) + print(f"Copied {fname} → {dist_dir}/") + else: + print(f"Skipped {fname} (already exists in dist — preserving user edits)") + + print(f"\nBuild complete: {dist_dir}/") + +if __name__ == "__main__": + build() +``` + +- [ ] **Step 3: Run build (on target platform)** + +```bash +python build.py +``` + +Expected output ends with: +``` +Build complete: dist/whisper-dictation/ +``` + +- [ ] **Step 4: Test the binary** + +```bash +# Windows +dist\whisper-dictation\whisper-dictation.exe +# Linux +./dist/whisper-dictation/whisper-dictation +``` + +Expected: tray icon appears, no terminal window, log panel opens on click. + +- [ ] **Step 5: Commit** + +```bash +git add whisper-dictation.spec build.py +git commit -m "feat: add PyInstaller spec and build script" +``` + +--- + +## Task 10: Delete dictate.py, update start scripts + +**Files:** +- Delete: `dictate.py` +- Modify: `start.sh` +- Modify: `start.bat` (if exists) +- Modify: `README.md` + +- [ ] **Step 1: Update start.sh** + +```bash +#!/bin/bash +cd "$(dirname "$0")" +.venv-linux/bin/python -u main.py +``` + +- [ ] **Step 2: Update start.bat** (Windows dev mode) + +```bat +@echo off +cd /d "%~dp0" +.venv-windows\Scripts\python.exe -u main.py +``` + +- [ ] **Step 3: Delete dictate.py** + +```bash +git rm dictate.py +``` + +- [ ] **Step 4: Update README.md** — add section "Build" explaining `python build.py`, and add `pywin32` to Windows requirements. Keep existing usage instructions but replace `dictate.py` references with `main.py`. + +- [ ] **Step 5: Run all tests** + +```bash +pytest tests/ -v +``` + +Expected: all PASSED + +- [ ] **Step 6: Final commit** + +```bash +git add start.sh start.bat README.md +git commit -m "feat: replace dictate.py with modular app, update start scripts" +```