From 5aaf8b59ceeaf1856ef91543abd9a4b54988e8f8 Mon Sep 17 00:00:00 2001
From: Christian Kauer <c.kauer@krah-gruppe.de>
Date: Sun, 22 Mar 2026 11:01:14 +0100
Subject: [PATCH] fix linux version

---
 .claude/settings.local.json    | 29 +++++++++++-
 .gitignore                     |  1 +
 README.md                      | 85 +++++++++++++++++++++++++++++-----
 config.json                    |  5 +-
 main.py                        |  2 +
 shared_data/.directory         |  2 +
 shared_data/vocabulary.json    | 63 +++++++++++++++++++++++++
 whisper_app/config.py          | 16 ++++++-
 whisper_app/hotkey.py          | 49 +++++++++++++-------
 whisper_app/settings_window.py | 42 +++++++++++++++++
 whisper_app/transcriber.py     |  2 +
 whisper_app/typer.py           |  9 ++--
 12 files changed, 268 insertions(+), 37 deletions(-)
 create mode 100644 shared_data/.directory
 create mode 100644 shared_data/vocabulary.json

diff --git a/.claude/settings.local.json b/.claude/settings.local.json
index 21b9c57..8ab1f28 100644
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -36,7 +36,34 @@
       "Bash(bash build-linux.sh)",
       "Bash(.venv-linux/bin/python -c \"import tkinter; print\\(''tkinter OK''\\)\")",
       "Bash(pacman -Q tk)",
-      "Bash(sudo pacman:*)"
+      "Bash(sudo pacman:*)",
+      "Bash(grep -r \"WHISPER_DATA_DIR\\\\|WHISPER_LOCAL_DIR\" /run/media/chk/Ventoy/projects/chrka/whisper-dictation --include=*.py)",
+      "Bash(grep -l \"config.load_config\\\\|config.load_vocab\" /run/media/chk/Ventoy/projects/chrka/whisper-dictation/whisper_app/*.py)",
+      "Bash(.venv-linux/bin/python -m pytest tests/ -v)",
+      "Bash(.venv-linux/bin/python -m unittest discover -s tests -v)",
+      "Bash(head -5 tests/*.py)",
+      "Bash(.venv-linux/bin/pip install:*)",
+      "Bash(./whisper-dictation)",
+      "Bash(pacman -Ss appindicator)",
+      "Bash(pacman -Q libayatana-appindicator)",
+      "Bash(echo \"$XDG_SESSION_TYPE\")",
+      "Bash(echo \"Session: $XDG_SESSION_TYPE\")",
+      "Bash(mount)",
+      "Bash(desktop-file-validate ~/.local/share/applications/whisper-dictation.desktop)",
+      "Bash(update-desktop-database ~/.local/share/applications/)",
+      "Bash(echo \"DISPLAY=$DISPLAY\")",
+      "Bash(xlsclients)",
+      "Bash(DISPLAY=:0 xdpyinfo)",
+      "Bash(pkill -f \"whisper-dictation.*resource_tracker\")",
+      "Bash(pkill -f \"dist/whisper-dictation-linux/whisper-dictation\")",
+      "Bash(pkill -9 -f whisper-dictation)",
+      "Bash(pkill -f whisper-dictation)",
+      "Bash(gtk-launch whisper-dictation:*)",
+      "Bash(pkill -9 -f resource_tracker)",
+      "Bash(echo \"Desktop: $XDG_CURRENT_DESKTOP\")",
+      "Bash(nvidia-smi)",
+      "Bash(lspci)",
+      "Bash(pacman -Q)"
     ]
   }
 }
diff --git a/.gitignore b/.gitignore
index ada5b23..611a755 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,7 @@ models/
 *.log
 build/
 dist/
+shared_data/models--Systran--faster-whisper-medium/
 icon.ico
 .claude/settings.local.json
 .superpowers/
diff --git a/README.md b/README.md
index d77775b..7f7aed3 100644
--- a/README.md
+++ b/README.md
@@ -6,9 +6,10 @@ Local GPU speech-to-text dictation tool. Hold a hotkey to record, release to tra
 
 - System tray icon with settings GUI (tkinter)
 - Configurable hotkey, model, language, audio device
+- Cross-platform: Windows and Linux builds from a single codebase
 - Shared config via git (`config.json`, `vocabulary.json`)
-- Machine-specific settings stored locally (audio device, GPU settings)
-- Windows: GPU acceleration via CUDA; Linux: CPU
+- Machine-specific settings stored locally (audio device, GPU settings, model)
+- Configurable shared paths for vocabulary and model cache (useful for dual-boot setups)
 
 ## Requirements
 
@@ -20,8 +21,38 @@ Local GPU speech-to-text dictation tool. Hold a hotkey to record, release to tra
 - `pyinstaller` (for building a standalone executable)
 
 ### Linux
+
+**System packages (install via package manager):**
+
+Arch/CachyOS:
+```bash
+sudo pacman -S tk libayatana-appindicator wl-clipboard xdotool
+```
+
+Debian/Ubuntu:
+```bash
+sudo apt install python3-tk libayatana-appindicator3-1 wl-clipboard xdotool
+```
+
+| Package | Purpose |
+|---------|---------|
+| `tk` | tkinter GUI (settings, log, vocabulary windows) |
+| `libayatana-appindicator` | System tray icon (required for KDE/GNOME on Wayland) |
+| `wl-clipboard` | Text injection on Wayland (`wl-copy`) |
+| `xdotool` | Simulates Ctrl+V paste on Wayland, text typing on X11 |
+
+**Optional (for GPU acceleration):**
+
+Arch/CachyOS:
+```bash
+sudo pacman -S nvidia cuda
+```
+
+Without CUDA, the app runs on CPU. Use `int8` compute type and a smaller model (`small` or `base`) for acceptable speed on CPU.
+
+**Python:**
 - Python 3.10+
-- PortAudio: `sudo apt install portaudio19-dev`
+- PortAudio (bundled with `sounddevice` wheels)
 
 ## Installation
 
@@ -36,11 +67,11 @@ This creates a `.venv-windows` virtual environment, installs all dependencies an
 ### Linux
 
 ```bash
-chmod +x install.sh start.sh
+chmod +x install.sh start.sh build-linux.sh
 ./install.sh
 ```
 
-Creates a `.venv-linux` virtual environment. GPU support on Linux requires a manually installed CUDA environment; by default runs on CPU.
+Creates a `.venv-linux` virtual environment with all dependencies and PyInstaller.
 
 ## Usage
 
@@ -58,33 +89,63 @@ The app starts in the system tray. Hold the hotkey (default: `Ctrl+Shift+Space`)
 
 ## Build
 
-To produce a standalone Windows executable:
+Builds are platform-specific and output to separate directories:
+- Windows: `dist/whisper-dictation-windows/`
+- Linux: `dist/whisper-dictation-linux/`
 
+### Windows
 ```bat
 .venv-windows\Scripts\python.exe build.py
 ```
 
-This uses PyInstaller to bundle the app and all dependencies into a single folder under `dist/`. The resulting executable can be run without a Python installation.
+### Linux
+```bash
+./build-linux.sh
+```
+
+Both use PyInstaller to bundle the app into a standalone folder. The resulting executable can be run without a Python installation.
 
 ## Configuration
 
-`config.json` (shared, stored in the repo):
+### Shared config (`config.json`, in app directory)
 
 | Key | Default | Description |
 |-----|---------|-------------|
 | `hotkey` | `ctrl+shift+space` | Recording trigger |
-| `model` | `medium` | Whisper model size (`tiny`, `base`, `small`, `medium`, `large-v2`, `large-v3`) |
 | `language` | `de` | Transcription language (`de`, `en`, `fr`, `es`, `it`, `null` = auto) |
 | `sample_rate` | `16000` | Audio sample rate in Hz |
+| `vocab_path` | `""` | Path to vocabulary file (empty = local `vocabulary.json`) |
+| `model_dir` | `""` | Path to shared model cache directory (empty = default HuggingFace cache) |
 
-Machine-specific settings (GPU device, compute type, audio device) are stored separately and not tracked by git:
+### Local config (`config_local.json`, per machine)
+
+Stored outside the app directory to keep machine-specific settings separate:
 - **Windows:** `%LOCALAPPDATA%\WhisperDictation\config_local.json`
 - **Linux:** `~/.local/share/WhisperDictation/config_local.json`
 
+| Key | Default | Description |
+|-----|---------|-------------|
+| `model` | `medium` | Whisper model size (`tiny`, `base`, `small`, `medium`, `large-v2`, `large-v3`) |
+| `device` | `cuda` | Inference device (`cuda` or `cpu`) |
+| `compute_type` | `float16` | Precision (`float16` for GPU, `int8` for CPU, `float32`) |
+| `audio_device` | `null` | Microphone (null = system default) |
+
+### Sharing data between Windows and Linux
+
+On a shared drive (e.g. Ventoy USB), both builds can use the same vocabulary and model files. Set `vocab_path` and `model_dir` in the Settings UI to point to a common directory:
+
+```
+shared_data/
+  vocabulary.json    <- shared vocabulary
+  models/            <- shared Whisper model cache
+```
+
+Audio settings, model selection, and compute type remain per-platform in `config_local.json`.
+
 ## Vocabulary
 
-Custom vocabulary/replacements can be added to `vocabulary.json`. These are passed as initial prompts to improve recognition of domain-specific terms.
+Custom vocabulary/replacements can be edited via the Settings UI or directly in `vocabulary.json`. Words are passed as initial prompts to improve recognition of domain-specific terms. Replacements are applied as find/replace after transcription.
 
 ## Model Download
 
-On first start the selected Whisper model is downloaded automatically from HuggingFace (~500 MB for `medium`). Subsequent starts use the cached model.
+On first start the selected Whisper model is downloaded automatically from HuggingFace (~500 MB for `medium`). Subsequent starts use the cached model. Set `model_dir` to share the cache between builds.
diff --git a/config.json b/config.json
index 3168cd1..571aec1 100644
--- a/config.json
+++ b/config.json
@@ -1,6 +1,7 @@
 {
   "hotkey": "ctrl+shift+space",
-  "model": "medium",
   "language": "de",
-  "sample_rate": 16000
+  "sample_rate": 16000,
+  "vocab_path": "/run/media/chk/Ventoy/projects/chrka/whisper-dictation/shared_data/vocabulary.json",
+  "model_dir": "/run/media/chk/Ventoy/projects/chrka/whisper-dictation/shared_data/"
 }
\ No newline at end of file
diff --git a/main.py b/main.py
index 435fe24..1d991e9 100644
--- a/main.py
+++ b/main.py
@@ -103,4 +103,6 @@ def _quit(stream, icon):
         app.overlay_tk.after(0, app.overlay_tk.quit)
 
 if __name__ == "__main__":
+    import multiprocessing
+    multiprocessing.freeze_support()
     main()
diff --git a/shared_data/.directory b/shared_data/.directory
new file mode 100644
index 0000000..e4c3043
--- /dev/null
+++ b/shared_data/.directory
@@ -0,0 +1,2 @@
+[Desktop Entry]
+Icon=folder-yellow
diff --git a/shared_data/vocabulary.json b/shared_data/vocabulary.json
new file mode 100644
index 0000000..c23e13d
--- /dev/null
+++ b/shared_data/vocabulary.json
@@ -0,0 +1,63 @@
+{
+  "words": [
+    "test"
+  ],
+  "replacements": [
+    {
+      "from": "KRA",
+      "to": "KRAH"
+    },
+    {
+      "from": "Atos",
+      "to": "ATHOS"
+    },
+    {
+      "from": "Resistec",
+      "to": "RESISTEC"
+    },
+    {
+      "from": "Resistek",
+      "to": "RESISTEC"
+    },
+    {
+      "from": "HES",
+      "to": "HEES"
+    },
+    {
+      "from": "Ackerschot",
+      "to": "Ackerschott"
+    },
+    {
+      "from": "Carrois",
+      "to": "Kauer"
+    },
+    {
+      "from": "Jouer fixe",
+      "to": "Jour-Fixe"
+    },
+    {
+      "from": "Docuware",
+      "to": "DocuWare"
+    },
+    {
+      "from": "Nates",
+      "to": "Nejc"
+    },
+    {
+      "from": "Bittzeit",
+      "to": "BitSight"
+    },
+    {
+      "from": "Kalmikow",
+      "to": "Kalmykov"
+    },
+    {
+      "from": "Leifert",
+      "to": "Leifer"
+    },
+    {
+      "from": "Kiyosa",
+      "to": "Key-User"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/whisper_app/config.py b/whisper_app/config.py
index 1f1ee54..4173da9 100644
--- a/whisper_app/config.py
+++ b/whisper_app/config.py
@@ -33,6 +33,8 @@ DEFAULT_CONFIG = {
     "language": "de",
     "audio_device": None,
     "sample_rate": 16000,
+    "vocab_path": "",
+    "model_dir": "",
 }
 
 MODELS = ["tiny", "base", "small", "medium", "large-v2", "large-v3"]
@@ -40,12 +42,22 @@ LANGUAGES = {"Deutsch": "de", "English": "en", "Français": "fr", "Español": "e
              "Italiano": "it", "Auto": None}
 DEVICES = ["cuda", "cpu"]
 COMPUTE_TYPES = {"float16 (GPU)": "float16", "int8 (CPU/GPU)": "int8", "float32": "float32"}
-LOCAL_KEYS = {"audio_device", "device", "compute_type"}
+LOCAL_KEYS = {"audio_device", "device", "compute_type", "model"}
 
 config: dict = {}
 vocab: dict = {"words": [], "replacements": []}
 
 
+def _resolve_vocab_file() -> None:
+    """Set VOCAB_FILE from config['vocab_path'], falling back to DATA_DIR."""
+    global VOCAB_FILE
+    vp = config.get("vocab_path", "")
+    if vp:
+        VOCAB_FILE = vp if os.path.isabs(vp) else os.path.join(DATA_DIR, vp)
+    else:
+        VOCAB_FILE = os.path.join(DATA_DIR, "vocabulary.json")
+
+
 def load_config() -> None:
     global config
     os.makedirs(_local_dir, exist_ok=True)
@@ -63,6 +75,7 @@ def load_config() -> None:
                 config.update(json.load(f))
             except json.JSONDecodeError:
                 print(f"Warning: could not parse {CONFIG_LOCAL_FILE}; ignoring")
+    _resolve_vocab_file()
 
 
 def save_config() -> None:
@@ -74,6 +87,7 @@ def save_config() -> None:
         json.dump(shared, f, indent=2)
     with open(CONFIG_LOCAL_FILE, "w", encoding="utf-8") as f:
         json.dump(local, f, indent=2)
+    _resolve_vocab_file()
 
 
 def load_vocab() -> None:
diff --git a/whisper_app/hotkey.py b/whisper_app/hotkey.py
index 452d50c..2ed9ef5 100644
--- a/whisper_app/hotkey.py
+++ b/whisper_app/hotkey.py
@@ -1,29 +1,42 @@
-from pynput.keyboard import Controller as KeyboardController, Listener as KeyboardListener, Key, KeyCode
+_pynput_loaded = False
+Key = KeyCode = KeyboardListener = None
 
-_MODIFIER_MAP = {
-    "ctrl": {Key.ctrl_l, Key.ctrl_r},
-    "ctrl_l": {Key.ctrl_l}, "ctrl_r": {Key.ctrl_r},
-    "shift": {Key.shift_l, Key.shift_r},
-    "shift_l": {Key.shift_l}, "shift_r": {Key.shift_r},
-    "alt": {Key.alt_l, Key.alt_r},
-    "alt_l": {Key.alt_l}, "alt_r": {Key.alt_r},
-}
+def _ensure_pynput():
+    global _pynput_loaded, Key, KeyCode, KeyboardListener, _MODIFIER_MAP, _KEY_MAP
+    if _pynput_loaded:
+        return
+    from pynput.keyboard import Listener as _Listener, Key as _Key, KeyCode as _KeyCode
+    Key = _Key
+    KeyCode = _KeyCode
+    KeyboardListener = _Listener
+    _MODIFIER_MAP.update({
+        "ctrl": {Key.ctrl_l, Key.ctrl_r},
+        "ctrl_l": {Key.ctrl_l}, "ctrl_r": {Key.ctrl_r},
+        "shift": {Key.shift_l, Key.shift_r},
+        "shift_l": {Key.shift_l}, "shift_r": {Key.shift_r},
+        "alt": {Key.alt_l, Key.alt_r},
+        "alt_l": {Key.alt_l}, "alt_r": {Key.alt_r},
+    })
+    _KEY_MAP.update({
+        "space": Key.space, "tab": Key.tab, "enter": Key.enter,
+        "esc": Key.esc, "escape": Key.esc,
+        "up": Key.up, "down": Key.down, "left": Key.left, "right": Key.right,
+        "home": Key.home, "end": Key.end, "page_up": Key.page_up, "page_down": Key.page_down,
+        "insert": Key.insert, "delete": Key.delete, "backspace": Key.backspace,
+    })
+    for i in range(1, 13):
+        _KEY_MAP[f"f{i}"] = getattr(Key, f"f{i}")
+    _pynput_loaded = True
 
-_KEY_MAP = {
-    "space": Key.space, "tab": Key.tab, "enter": Key.enter,
-    "esc": Key.esc, "escape": Key.esc,
-    "up": Key.up, "down": Key.down, "left": Key.left, "right": Key.right,
-    "home": Key.home, "end": Key.end, "page_up": Key.page_up, "page_down": Key.page_down,
-    "insert": Key.insert, "delete": Key.delete, "backspace": Key.backspace,
-}
-for i in range(1, 13):
-    _KEY_MAP[f"f{i}"] = getattr(Key, f"f{i}")
+_MODIFIER_MAP = {}
+_KEY_MAP = {}
 
 
 def _parse_hotkey(hotkey_str):
     """Parse hotkey string into (modifier_sets, trigger_key).
     Returns: (list of sets-of-pynput-keys for each modifier, pynput key for trigger)
     """
+    _ensure_pynput()
     parts = [p.strip().lower() for p in hotkey_str.split("+")]
     modifiers = []
     for p in parts[:-1]:
diff --git a/whisper_app/settings_window.py b/whisper_app/settings_window.py
index bf9e5f3..c58db58 100644
--- a/whisper_app/settings_window.py
+++ b/whisper_app/settings_window.py
@@ -1,6 +1,7 @@
 import os
 import threading
 import tkinter as tk
+from tkinter import filedialog
 
 from whisper_app import config as cfg
 
@@ -177,6 +178,45 @@ def _open_main(root: tk.Tk, on_reload) -> None:
              relief="flat", bd=6,
              highlightbackground=BORDER, highlightthickness=1).pack(side="left")
 
+    # ── PFADE ──
+    section("PFADE")
+
+    vocab_path_var = tk.StringVar(value=cfg.config.get("vocab_path", ""))
+    f_vp = row("Vocabulary-Datei", hint="leer = lokal im App-Ordner")
+    vp_entry = tk.Entry(f_vp, textvariable=vocab_path_var, font=FONT, width=30,
+                        bg=BG3, fg=FG, insertbackground=AMBER,
+                        relief="flat", bd=6,
+                        highlightbackground=BORDER, highlightthickness=1)
+    vp_entry.pack(side="left")
+
+    def browse_vocab():
+        path = filedialog.askopenfilename(
+            parent=win, title="Vocabulary-Datei wählen",
+            filetypes=[("JSON", "*.json"), ("Alle", "*.*")])
+        if path:
+            vocab_path_var.set(path)
+
+    tk.Button(f_vp, text="...", command=browse_vocab,
+              bg=BG3, fg=FG, font=FONT_S, relief="flat",
+              padx=8, pady=3, cursor="hand2", bd=0).pack(side="left", padx=(6, 0))
+
+    model_dir_var = tk.StringVar(value=cfg.config.get("model_dir", ""))
+    f_md = row("Modell-Verzeichnis", hint="leer = Standard-Cache")
+    md_entry = tk.Entry(f_md, textvariable=model_dir_var, font=FONT, width=30,
+                        bg=BG3, fg=FG, insertbackground=AMBER,
+                        relief="flat", bd=6,
+                        highlightbackground=BORDER, highlightthickness=1)
+    md_entry.pack(side="left")
+
+    def browse_model_dir():
+        path = filedialog.askdirectory(parent=win, title="Modell-Verzeichnis wählen")
+        if path:
+            model_dir_var.set(path)
+
+    tk.Button(f_md, text="...", command=browse_model_dir,
+              bg=BG3, fg=FG, font=FONT_S, relief="flat",
+              padx=8, pady=3, cursor="hand2", bd=0).pack(side="left", padx=(6, 0))
+
     # ── Buttons ──
     tk.Frame(win, bg=BORDER, height=1).pack(fill="x")
     btn_bar = tk.Frame(win, bg=BG2, pady=16, padx=32)
@@ -190,6 +230,8 @@ def _open_main(root: tk.Tk, on_reload) -> None:
         cfg.config["device"] = device_var.get()
         cfg.config["compute_type"] = cfg.COMPUTE_TYPES[ct_var.get()]
         cfg.config["hotkey"] = hotkey_var.get()
+        cfg.config["vocab_path"] = vocab_path_var.get()
+        cfg.config["model_dir"] = model_dir_var.get()
         cfg.save_config()
         win.destroy()
         threading.Thread(target=on_reload, daemon=True).start()
diff --git a/whisper_app/transcriber.py b/whisper_app/transcriber.py
index 16ef2b9..a2dda20 100644
--- a/whisper_app/transcriber.py
+++ b/whisper_app/transcriber.py
@@ -8,10 +8,12 @@ from whisper_app import app, config, typer
 
 def load_model() -> None:
     app.log(f"Loading {config.config['model']} on {config.config['device']}...")
+    model_dir = config.config.get("model_dir") or None
     app.model = WhisperModel(
         config.config["model"],
         device=config.config["device"],
         compute_type=config.config["compute_type"],
+        download_root=model_dir,
     )
     app.log("Model ready.")
 
diff --git a/whisper_app/typer.py b/whisper_app/typer.py
index 3318565..ad1ff58 100644
--- a/whisper_app/typer.py
+++ b/whisper_app/typer.py
@@ -3,13 +3,16 @@ import shutil
 import subprocess
 import time
 
-from pynput.keyboard import Controller as KeyboardController
+
+def _pynput_type(text):
+    from pynput.keyboard import Controller as KeyboardController
+    KeyboardController().type(text)
 
 
 def type_text(text):
     """Type text into the active window, cross-platform."""
     if os.name == "nt":
-        KeyboardController().type(text)
+        _pynput_type(text)
         return
     session = os.environ.get("XDG_SESSION_TYPE", "")
     if session == "wayland" and shutil.which("wl-copy"):
@@ -19,4 +22,4 @@ def type_text(text):
     elif shutil.which("xdotool"):
         subprocess.run(["xdotool", "type", "--clearmodifiers", "--", text], check=False)
     else:
-        KeyboardController().type(text)
+        _pynput_type(text)