fix linux version
This commit is contained in:
parent
11fd47946d
commit
5aaf8b59ce
|
|
@ -36,7 +36,34 @@
|
||||||
"Bash(bash build-linux.sh)",
|
"Bash(bash build-linux.sh)",
|
||||||
"Bash(.venv-linux/bin/python -c \"import tkinter; print\\(''tkinter OK''\\)\")",
|
"Bash(.venv-linux/bin/python -c \"import tkinter; print\\(''tkinter OK''\\)\")",
|
||||||
"Bash(pacman -Q tk)",
|
"Bash(pacman -Q tk)",
|
||||||
"Bash(sudo pacman:*)"
|
"Bash(sudo pacman:*)",
|
||||||
|
"Bash(grep -r \"WHISPER_DATA_DIR\\\\|WHISPER_LOCAL_DIR\" /run/media/chk/Ventoy/projects/chrka/whisper-dictation --include=*.py)",
|
||||||
|
"Bash(grep -l \"config.load_config\\\\|config.load_vocab\" /run/media/chk/Ventoy/projects/chrka/whisper-dictation/whisper_app/*.py)",
|
||||||
|
"Bash(.venv-linux/bin/python -m pytest tests/ -v)",
|
||||||
|
"Bash(.venv-linux/bin/python -m unittest discover -s tests -v)",
|
||||||
|
"Bash(head -5 tests/*.py)",
|
||||||
|
"Bash(.venv-linux/bin/pip install:*)",
|
||||||
|
"Bash(./whisper-dictation)",
|
||||||
|
"Bash(pacman -Ss appindicator)",
|
||||||
|
"Bash(pacman -Q libayatana-appindicator)",
|
||||||
|
"Bash(echo \"$XDG_SESSION_TYPE\")",
|
||||||
|
"Bash(echo \"Session: $XDG_SESSION_TYPE\")",
|
||||||
|
"Bash(mount)",
|
||||||
|
"Bash(desktop-file-validate ~/.local/share/applications/whisper-dictation.desktop)",
|
||||||
|
"Bash(update-desktop-database ~/.local/share/applications/)",
|
||||||
|
"Bash(echo \"DISPLAY=$DISPLAY\")",
|
||||||
|
"Bash(xlsclients)",
|
||||||
|
"Bash(DISPLAY=:0 xdpyinfo)",
|
||||||
|
"Bash(pkill -f \"whisper-dictation.*resource_tracker\")",
|
||||||
|
"Bash(pkill -f \"dist/whisper-dictation-linux/whisper-dictation\")",
|
||||||
|
"Bash(pkill -9 -f whisper-dictation)",
|
||||||
|
"Bash(pkill -f whisper-dictation)",
|
||||||
|
"Bash(gtk-launch whisper-dictation:*)",
|
||||||
|
"Bash(pkill -9 -f resource_tracker)",
|
||||||
|
"Bash(echo \"Desktop: $XDG_CURRENT_DESKTOP\")",
|
||||||
|
"Bash(nvidia-smi)",
|
||||||
|
"Bash(lspci)",
|
||||||
|
"Bash(pacman -Q)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ models/
|
||||||
*.log
|
*.log
|
||||||
build/
|
build/
|
||||||
dist/
|
dist/
|
||||||
|
shared_data/models--Systran--faster-whisper-medium/
|
||||||
icon.ico
|
icon.ico
|
||||||
.claude/settings.local.json
|
.claude/settings.local.json
|
||||||
.superpowers/
|
.superpowers/
|
||||||
|
|
|
||||||
85
README.md
85
README.md
|
|
@ -6,9 +6,10 @@ Local GPU speech-to-text dictation tool. Hold a hotkey to record, release to tra
|
||||||
|
|
||||||
- System tray icon with settings GUI (tkinter)
|
- System tray icon with settings GUI (tkinter)
|
||||||
- Configurable hotkey, model, language, audio device
|
- Configurable hotkey, model, language, audio device
|
||||||
|
- Cross-platform: Windows and Linux builds from a single codebase
|
||||||
- Shared config via git (`config.json`, `vocabulary.json`)
|
- Shared config via git (`config.json`, `vocabulary.json`)
|
||||||
- Machine-specific settings stored locally (audio device, GPU settings)
|
- Machine-specific settings stored locally (audio device, GPU settings, model)
|
||||||
- Windows: GPU acceleration via CUDA; Linux: CPU
|
- Configurable shared paths for vocabulary and model cache (useful for dual-boot setups)
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|
||||||
|
|
@ -20,8 +21,38 @@ Local GPU speech-to-text dictation tool. Hold a hotkey to record, release to tra
|
||||||
- `pyinstaller` (for building a standalone executable)
|
- `pyinstaller` (for building a standalone executable)
|
||||||
|
|
||||||
### Linux
|
### Linux
|
||||||
|
|
||||||
|
**System packages (install via package manager):**
|
||||||
|
|
||||||
|
Arch/CachyOS:
|
||||||
|
```bash
|
||||||
|
sudo pacman -S tk libayatana-appindicator wl-clipboard xdotool
|
||||||
|
```
|
||||||
|
|
||||||
|
Debian/Ubuntu:
|
||||||
|
```bash
|
||||||
|
sudo apt install python3-tk libayatana-appindicator3-1 wl-clipboard xdotool
|
||||||
|
```
|
||||||
|
|
||||||
|
| Package | Purpose |
|
||||||
|
|---------|---------|
|
||||||
|
| `tk` | tkinter GUI (settings, log, vocabulary windows) |
|
||||||
|
| `libayatana-appindicator` | System tray icon (required for KDE/GNOME on Wayland) |
|
||||||
|
| `wl-clipboard` | Text injection on Wayland (`wl-copy`) |
|
||||||
|
| `xdotool` | Simulates Ctrl+V paste on Wayland, text typing on X11 |
|
||||||
|
|
||||||
|
**Optional (for GPU acceleration):**
|
||||||
|
|
||||||
|
Arch/CachyOS:
|
||||||
|
```bash
|
||||||
|
sudo pacman -S nvidia cuda
|
||||||
|
```
|
||||||
|
|
||||||
|
Without CUDA, the app runs on CPU. Use `int8` compute type and a smaller model (`small` or `base`) for acceptable speed on CPU.
|
||||||
|
|
||||||
|
**Python:**
|
||||||
- Python 3.10+
|
- Python 3.10+
|
||||||
- PortAudio: `sudo apt install portaudio19-dev`
|
- PortAudio (bundled with `sounddevice` wheels)
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
|
|
@ -36,11 +67,11 @@ This creates a `.venv-windows` virtual environment, installs all dependencies an
|
||||||
### Linux
|
### Linux
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
chmod +x install.sh start.sh
|
chmod +x install.sh start.sh build-linux.sh
|
||||||
./install.sh
|
./install.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
Creates a `.venv-linux` virtual environment. GPU support on Linux requires a manually installed CUDA environment; by default runs on CPU.
|
Creates a `.venv-linux` virtual environment with all dependencies and PyInstaller.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
|
@ -58,33 +89,63 @@ The app starts in the system tray. Hold the hotkey (default: `Ctrl+Shift+Space`)
|
||||||
|
|
||||||
## Build
|
## Build
|
||||||
|
|
||||||
To produce a standalone Windows executable:
|
Builds are platform-specific and output to separate directories:
|
||||||
|
- Windows: `dist/whisper-dictation-windows/`
|
||||||
|
- Linux: `dist/whisper-dictation-linux/`
|
||||||
|
|
||||||
|
### Windows
|
||||||
```bat
|
```bat
|
||||||
.venv-windows\Scripts\python.exe build.py
|
.venv-windows\Scripts\python.exe build.py
|
||||||
```
|
```
|
||||||
|
|
||||||
This uses PyInstaller to bundle the app and all dependencies into a single folder under `dist/`. The resulting executable can be run without a Python installation.
|
### Linux
|
||||||
|
```bash
|
||||||
|
./build-linux.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Both use PyInstaller to bundle the app into a standalone folder. The resulting executable can be run without a Python installation.
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
`config.json` (shared, stored in the repo):
|
### Shared config (`config.json`, in app directory)
|
||||||
|
|
||||||
| Key | Default | Description |
|
| Key | Default | Description |
|
||||||
|-----|---------|-------------|
|
|-----|---------|-------------|
|
||||||
| `hotkey` | `ctrl+shift+space` | Recording trigger |
|
| `hotkey` | `ctrl+shift+space` | Recording trigger |
|
||||||
| `model` | `medium` | Whisper model size (`tiny`, `base`, `small`, `medium`, `large-v2`, `large-v3`) |
|
|
||||||
| `language` | `de` | Transcription language (`de`, `en`, `fr`, `es`, `it`, `null` = auto) |
|
| `language` | `de` | Transcription language (`de`, `en`, `fr`, `es`, `it`, `null` = auto) |
|
||||||
| `sample_rate` | `16000` | Audio sample rate in Hz |
|
| `sample_rate` | `16000` | Audio sample rate in Hz |
|
||||||
|
| `vocab_path` | `""` | Path to vocabulary file (empty = local `vocabulary.json`) |
|
||||||
|
| `model_dir` | `""` | Path to shared model cache directory (empty = default HuggingFace cache) |
|
||||||
|
|
||||||
Machine-specific settings (GPU device, compute type, audio device) are stored separately and not tracked by git:
|
### Local config (`config_local.json`, per machine)
|
||||||
|
|
||||||
|
Stored outside the app directory to keep machine-specific settings separate:
|
||||||
- **Windows:** `%LOCALAPPDATA%\WhisperDictation\config_local.json`
|
- **Windows:** `%LOCALAPPDATA%\WhisperDictation\config_local.json`
|
||||||
- **Linux:** `~/.local/share/WhisperDictation/config_local.json`
|
- **Linux:** `~/.local/share/WhisperDictation/config_local.json`
|
||||||
|
|
||||||
|
| Key | Default | Description |
|
||||||
|
|-----|---------|-------------|
|
||||||
|
| `model` | `medium` | Whisper model size (`tiny`, `base`, `small`, `medium`, `large-v2`, `large-v3`) |
|
||||||
|
| `device` | `cuda` | Inference device (`cuda` or `cpu`) |
|
||||||
|
| `compute_type` | `float16` | Precision (`float16` for GPU, `int8` for CPU, `float32`) |
|
||||||
|
| `audio_device` | `null` | Microphone (null = system default) |
|
||||||
|
|
||||||
|
### Sharing data between Windows and Linux
|
||||||
|
|
||||||
|
On a shared drive (e.g. Ventoy USB), both builds can use the same vocabulary and model files. Set `vocab_path` and `model_dir` in the Settings UI to point to a common directory:
|
||||||
|
|
||||||
|
```
|
||||||
|
shared_data/
|
||||||
|
vocabulary.json <- shared vocabulary
|
||||||
|
models/ <- shared Whisper model cache
|
||||||
|
```
|
||||||
|
|
||||||
|
Audio settings, model selection, and compute type remain per-platform in `config_local.json`.
|
||||||
|
|
||||||
## Vocabulary
|
## Vocabulary
|
||||||
|
|
||||||
Custom vocabulary/replacements can be added to `vocabulary.json`. These are passed as initial prompts to improve recognition of domain-specific terms.
|
Custom vocabulary/replacements can be edited via the Settings UI or directly in `vocabulary.json`. Words are passed as initial prompts to improve recognition of domain-specific terms. Replacements are applied as find/replace after transcription.
|
||||||
|
|
||||||
## Model Download
|
## Model Download
|
||||||
|
|
||||||
On first start the selected Whisper model is downloaded automatically from HuggingFace (~500 MB for `medium`). Subsequent starts use the cached model.
|
On first start the selected Whisper model is downloaded automatically from HuggingFace (~500 MB for `medium`). Subsequent starts use the cached model. Set `model_dir` to share the cache between builds.
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
{
|
{
|
||||||
"hotkey": "ctrl+shift+space",
|
"hotkey": "ctrl+shift+space",
|
||||||
"model": "medium",
|
|
||||||
"language": "de",
|
"language": "de",
|
||||||
"sample_rate": 16000
|
"sample_rate": 16000,
|
||||||
|
"vocab_path": "/run/media/chk/Ventoy/projects/chrka/whisper-dictation/shared_data/vocabulary.json",
|
||||||
|
"model_dir": "/run/media/chk/Ventoy/projects/chrka/whisper-dictation/shared_data/"
|
||||||
}
|
}
|
||||||
2
main.py
2
main.py
|
|
@ -103,4 +103,6 @@ def _quit(stream, icon):
|
||||||
app.overlay_tk.after(0, app.overlay_tk.quit)
|
app.overlay_tk.after(0, app.overlay_tk.quit)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
import multiprocessing
|
||||||
|
multiprocessing.freeze_support()
|
||||||
main()
|
main()
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,2 @@
|
||||||
|
[Desktop Entry]
|
||||||
|
Icon=folder-yellow
|
||||||
|
|
@ -0,0 +1,63 @@
|
||||||
|
{
|
||||||
|
"words": [
|
||||||
|
"test"
|
||||||
|
],
|
||||||
|
"replacements": [
|
||||||
|
{
|
||||||
|
"from": "KRA",
|
||||||
|
"to": "KRAH"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"from": "Atos",
|
||||||
|
"to": "ATHOS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"from": "Resistec",
|
||||||
|
"to": "RESISTEC"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"from": "Resistek",
|
||||||
|
"to": "RESISTEC"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"from": "HES",
|
||||||
|
"to": "HEES"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"from": "Ackerschot",
|
||||||
|
"to": "Ackerschott"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"from": "Carrois",
|
||||||
|
"to": "Kauer"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"from": "Jouer fixe",
|
||||||
|
"to": "Jour-Fixe"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"from": "Docuware",
|
||||||
|
"to": "DocuWare"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"from": "Nates",
|
||||||
|
"to": "Nejc"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"from": "Bittzeit",
|
||||||
|
"to": "BitSight"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"from": "Kalmikow",
|
||||||
|
"to": "Kalmykov"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"from": "Leifert",
|
||||||
|
"to": "Leifer"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"from": "Kiyosa",
|
||||||
|
"to": "Key-User"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -33,6 +33,8 @@ DEFAULT_CONFIG = {
|
||||||
"language": "de",
|
"language": "de",
|
||||||
"audio_device": None,
|
"audio_device": None,
|
||||||
"sample_rate": 16000,
|
"sample_rate": 16000,
|
||||||
|
"vocab_path": "",
|
||||||
|
"model_dir": "",
|
||||||
}
|
}
|
||||||
|
|
||||||
MODELS = ["tiny", "base", "small", "medium", "large-v2", "large-v3"]
|
MODELS = ["tiny", "base", "small", "medium", "large-v2", "large-v3"]
|
||||||
|
|
@ -40,12 +42,22 @@ LANGUAGES = {"Deutsch": "de", "English": "en", "Français": "fr", "Español": "e
|
||||||
"Italiano": "it", "Auto": None}
|
"Italiano": "it", "Auto": None}
|
||||||
DEVICES = ["cuda", "cpu"]
|
DEVICES = ["cuda", "cpu"]
|
||||||
COMPUTE_TYPES = {"float16 (GPU)": "float16", "int8 (CPU/GPU)": "int8", "float32": "float32"}
|
COMPUTE_TYPES = {"float16 (GPU)": "float16", "int8 (CPU/GPU)": "int8", "float32": "float32"}
|
||||||
LOCAL_KEYS = {"audio_device", "device", "compute_type"}
|
LOCAL_KEYS = {"audio_device", "device", "compute_type", "model"}
|
||||||
|
|
||||||
config: dict = {}
|
config: dict = {}
|
||||||
vocab: dict = {"words": [], "replacements": []}
|
vocab: dict = {"words": [], "replacements": []}
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_vocab_file() -> None:
|
||||||
|
"""Set VOCAB_FILE from config['vocab_path'], falling back to DATA_DIR."""
|
||||||
|
global VOCAB_FILE
|
||||||
|
vp = config.get("vocab_path", "")
|
||||||
|
if vp:
|
||||||
|
VOCAB_FILE = vp if os.path.isabs(vp) else os.path.join(DATA_DIR, vp)
|
||||||
|
else:
|
||||||
|
VOCAB_FILE = os.path.join(DATA_DIR, "vocabulary.json")
|
||||||
|
|
||||||
|
|
||||||
def load_config() -> None:
|
def load_config() -> None:
|
||||||
global config
|
global config
|
||||||
os.makedirs(_local_dir, exist_ok=True)
|
os.makedirs(_local_dir, exist_ok=True)
|
||||||
|
|
@ -63,6 +75,7 @@ def load_config() -> None:
|
||||||
config.update(json.load(f))
|
config.update(json.load(f))
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
print(f"Warning: could not parse {CONFIG_LOCAL_FILE}; ignoring")
|
print(f"Warning: could not parse {CONFIG_LOCAL_FILE}; ignoring")
|
||||||
|
_resolve_vocab_file()
|
||||||
|
|
||||||
|
|
||||||
def save_config() -> None:
|
def save_config() -> None:
|
||||||
|
|
@ -74,6 +87,7 @@ def save_config() -> None:
|
||||||
json.dump(shared, f, indent=2)
|
json.dump(shared, f, indent=2)
|
||||||
with open(CONFIG_LOCAL_FILE, "w", encoding="utf-8") as f:
|
with open(CONFIG_LOCAL_FILE, "w", encoding="utf-8") as f:
|
||||||
json.dump(local, f, indent=2)
|
json.dump(local, f, indent=2)
|
||||||
|
_resolve_vocab_file()
|
||||||
|
|
||||||
|
|
||||||
def load_vocab() -> None:
|
def load_vocab() -> None:
|
||||||
|
|
|
||||||
|
|
@ -1,29 +1,42 @@
|
||||||
from pynput.keyboard import Controller as KeyboardController, Listener as KeyboardListener, Key, KeyCode
|
_pynput_loaded = False
|
||||||
|
Key = KeyCode = KeyboardListener = None
|
||||||
|
|
||||||
_MODIFIER_MAP = {
|
def _ensure_pynput():
|
||||||
"ctrl": {Key.ctrl_l, Key.ctrl_r},
|
global _pynput_loaded, Key, KeyCode, KeyboardListener, _MODIFIER_MAP, _KEY_MAP
|
||||||
"ctrl_l": {Key.ctrl_l}, "ctrl_r": {Key.ctrl_r},
|
if _pynput_loaded:
|
||||||
"shift": {Key.shift_l, Key.shift_r},
|
return
|
||||||
"shift_l": {Key.shift_l}, "shift_r": {Key.shift_r},
|
from pynput.keyboard import Listener as _Listener, Key as _Key, KeyCode as _KeyCode
|
||||||
"alt": {Key.alt_l, Key.alt_r},
|
Key = _Key
|
||||||
"alt_l": {Key.alt_l}, "alt_r": {Key.alt_r},
|
KeyCode = _KeyCode
|
||||||
}
|
KeyboardListener = _Listener
|
||||||
|
_MODIFIER_MAP.update({
|
||||||
|
"ctrl": {Key.ctrl_l, Key.ctrl_r},
|
||||||
|
"ctrl_l": {Key.ctrl_l}, "ctrl_r": {Key.ctrl_r},
|
||||||
|
"shift": {Key.shift_l, Key.shift_r},
|
||||||
|
"shift_l": {Key.shift_l}, "shift_r": {Key.shift_r},
|
||||||
|
"alt": {Key.alt_l, Key.alt_r},
|
||||||
|
"alt_l": {Key.alt_l}, "alt_r": {Key.alt_r},
|
||||||
|
})
|
||||||
|
_KEY_MAP.update({
|
||||||
|
"space": Key.space, "tab": Key.tab, "enter": Key.enter,
|
||||||
|
"esc": Key.esc, "escape": Key.esc,
|
||||||
|
"up": Key.up, "down": Key.down, "left": Key.left, "right": Key.right,
|
||||||
|
"home": Key.home, "end": Key.end, "page_up": Key.page_up, "page_down": Key.page_down,
|
||||||
|
"insert": Key.insert, "delete": Key.delete, "backspace": Key.backspace,
|
||||||
|
})
|
||||||
|
for i in range(1, 13):
|
||||||
|
_KEY_MAP[f"f{i}"] = getattr(Key, f"f{i}")
|
||||||
|
_pynput_loaded = True
|
||||||
|
|
||||||
_KEY_MAP = {
|
_MODIFIER_MAP = {}
|
||||||
"space": Key.space, "tab": Key.tab, "enter": Key.enter,
|
_KEY_MAP = {}
|
||||||
"esc": Key.esc, "escape": Key.esc,
|
|
||||||
"up": Key.up, "down": Key.down, "left": Key.left, "right": Key.right,
|
|
||||||
"home": Key.home, "end": Key.end, "page_up": Key.page_up, "page_down": Key.page_down,
|
|
||||||
"insert": Key.insert, "delete": Key.delete, "backspace": Key.backspace,
|
|
||||||
}
|
|
||||||
for i in range(1, 13):
|
|
||||||
_KEY_MAP[f"f{i}"] = getattr(Key, f"f{i}")
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_hotkey(hotkey_str):
|
def _parse_hotkey(hotkey_str):
|
||||||
"""Parse hotkey string into (modifier_sets, trigger_key).
|
"""Parse hotkey string into (modifier_sets, trigger_key).
|
||||||
Returns: (list of sets-of-pynput-keys for each modifier, pynput key for trigger)
|
Returns: (list of sets-of-pynput-keys for each modifier, pynput key for trigger)
|
||||||
"""
|
"""
|
||||||
|
_ensure_pynput()
|
||||||
parts = [p.strip().lower() for p in hotkey_str.split("+")]
|
parts = [p.strip().lower() for p in hotkey_str.split("+")]
|
||||||
modifiers = []
|
modifiers = []
|
||||||
for p in parts[:-1]:
|
for p in parts[:-1]:
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
import os
|
import os
|
||||||
import threading
|
import threading
|
||||||
import tkinter as tk
|
import tkinter as tk
|
||||||
|
from tkinter import filedialog
|
||||||
|
|
||||||
from whisper_app import config as cfg
|
from whisper_app import config as cfg
|
||||||
|
|
||||||
|
|
@ -177,6 +178,45 @@ def _open_main(root: tk.Tk, on_reload) -> None:
|
||||||
relief="flat", bd=6,
|
relief="flat", bd=6,
|
||||||
highlightbackground=BORDER, highlightthickness=1).pack(side="left")
|
highlightbackground=BORDER, highlightthickness=1).pack(side="left")
|
||||||
|
|
||||||
|
# ── PFADE ──
|
||||||
|
section("PFADE")
|
||||||
|
|
||||||
|
vocab_path_var = tk.StringVar(value=cfg.config.get("vocab_path", ""))
|
||||||
|
f_vp = row("Vocabulary-Datei", hint="leer = lokal im App-Ordner")
|
||||||
|
vp_entry = tk.Entry(f_vp, textvariable=vocab_path_var, font=FONT, width=30,
|
||||||
|
bg=BG3, fg=FG, insertbackground=AMBER,
|
||||||
|
relief="flat", bd=6,
|
||||||
|
highlightbackground=BORDER, highlightthickness=1)
|
||||||
|
vp_entry.pack(side="left")
|
||||||
|
|
||||||
|
def browse_vocab():
|
||||||
|
path = filedialog.askopenfilename(
|
||||||
|
parent=win, title="Vocabulary-Datei wählen",
|
||||||
|
filetypes=[("JSON", "*.json"), ("Alle", "*.*")])
|
||||||
|
if path:
|
||||||
|
vocab_path_var.set(path)
|
||||||
|
|
||||||
|
tk.Button(f_vp, text="...", command=browse_vocab,
|
||||||
|
bg=BG3, fg=FG, font=FONT_S, relief="flat",
|
||||||
|
padx=8, pady=3, cursor="hand2", bd=0).pack(side="left", padx=(6, 0))
|
||||||
|
|
||||||
|
model_dir_var = tk.StringVar(value=cfg.config.get("model_dir", ""))
|
||||||
|
f_md = row("Modell-Verzeichnis", hint="leer = Standard-Cache")
|
||||||
|
md_entry = tk.Entry(f_md, textvariable=model_dir_var, font=FONT, width=30,
|
||||||
|
bg=BG3, fg=FG, insertbackground=AMBER,
|
||||||
|
relief="flat", bd=6,
|
||||||
|
highlightbackground=BORDER, highlightthickness=1)
|
||||||
|
md_entry.pack(side="left")
|
||||||
|
|
||||||
|
def browse_model_dir():
|
||||||
|
path = filedialog.askdirectory(parent=win, title="Modell-Verzeichnis wählen")
|
||||||
|
if path:
|
||||||
|
model_dir_var.set(path)
|
||||||
|
|
||||||
|
tk.Button(f_md, text="...", command=browse_model_dir,
|
||||||
|
bg=BG3, fg=FG, font=FONT_S, relief="flat",
|
||||||
|
padx=8, pady=3, cursor="hand2", bd=0).pack(side="left", padx=(6, 0))
|
||||||
|
|
||||||
# ── Buttons ──
|
# ── Buttons ──
|
||||||
tk.Frame(win, bg=BORDER, height=1).pack(fill="x")
|
tk.Frame(win, bg=BORDER, height=1).pack(fill="x")
|
||||||
btn_bar = tk.Frame(win, bg=BG2, pady=16, padx=32)
|
btn_bar = tk.Frame(win, bg=BG2, pady=16, padx=32)
|
||||||
|
|
@ -190,6 +230,8 @@ def _open_main(root: tk.Tk, on_reload) -> None:
|
||||||
cfg.config["device"] = device_var.get()
|
cfg.config["device"] = device_var.get()
|
||||||
cfg.config["compute_type"] = cfg.COMPUTE_TYPES[ct_var.get()]
|
cfg.config["compute_type"] = cfg.COMPUTE_TYPES[ct_var.get()]
|
||||||
cfg.config["hotkey"] = hotkey_var.get()
|
cfg.config["hotkey"] = hotkey_var.get()
|
||||||
|
cfg.config["vocab_path"] = vocab_path_var.get()
|
||||||
|
cfg.config["model_dir"] = model_dir_var.get()
|
||||||
cfg.save_config()
|
cfg.save_config()
|
||||||
win.destroy()
|
win.destroy()
|
||||||
threading.Thread(target=on_reload, daemon=True).start()
|
threading.Thread(target=on_reload, daemon=True).start()
|
||||||
|
|
|
||||||
|
|
@ -8,10 +8,12 @@ from whisper_app import app, config, typer
|
||||||
|
|
||||||
def load_model() -> None:
|
def load_model() -> None:
|
||||||
app.log(f"Loading {config.config['model']} on {config.config['device']}...")
|
app.log(f"Loading {config.config['model']} on {config.config['device']}...")
|
||||||
|
model_dir = config.config.get("model_dir") or None
|
||||||
app.model = WhisperModel(
|
app.model = WhisperModel(
|
||||||
config.config["model"],
|
config.config["model"],
|
||||||
device=config.config["device"],
|
device=config.config["device"],
|
||||||
compute_type=config.config["compute_type"],
|
compute_type=config.config["compute_type"],
|
||||||
|
download_root=model_dir,
|
||||||
)
|
)
|
||||||
app.log("Model ready.")
|
app.log("Model ready.")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,13 +3,16 @@ import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from pynput.keyboard import Controller as KeyboardController
|
|
||||||
|
def _pynput_type(text):
|
||||||
|
from pynput.keyboard import Controller as KeyboardController
|
||||||
|
KeyboardController().type(text)
|
||||||
|
|
||||||
|
|
||||||
def type_text(text):
|
def type_text(text):
|
||||||
"""Type text into the active window, cross-platform."""
|
"""Type text into the active window, cross-platform."""
|
||||||
if os.name == "nt":
|
if os.name == "nt":
|
||||||
KeyboardController().type(text)
|
_pynput_type(text)
|
||||||
return
|
return
|
||||||
session = os.environ.get("XDG_SESSION_TYPE", "")
|
session = os.environ.get("XDG_SESSION_TYPE", "")
|
||||||
if session == "wayland" and shutil.which("wl-copy"):
|
if session == "wayland" and shutil.which("wl-copy"):
|
||||||
|
|
@ -19,4 +22,4 @@ def type_text(text):
|
||||||
elif shutil.which("xdotool"):
|
elif shutil.which("xdotool"):
|
||||||
subprocess.run(["xdotool", "type", "--clearmodifiers", "--", text], check=False)
|
subprocess.run(["xdotool", "type", "--clearmodifiers", "--", text], check=False)
|
||||||
else:
|
else:
|
||||||
KeyboardController().type(text)
|
_pynput_type(text)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue