diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..bccf852 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,8 @@ +{ + "permissions": { + "allow": [ + "Bash(git remote:*)", + "Bash(git pull:*)" + ] + } +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..e7db090 --- /dev/null +++ b/README.md @@ -0,0 +1,78 @@ +# Whisper Dictation + +Local GPU speech-to-text dictation tool. Hold a hotkey to record, release to transcribe and type the result into the active window. Runs fully offline — no cloud, no API key. + +## Features + +- System tray icon with settings GUI (tkinter) +- Configurable hotkey, model, language, audio device +- Shared config via git (`config.json`, `vocabulary.json`) +- Machine-specific settings stored locally (audio device, GPU settings) +- Windows: GPU acceleration via CUDA; Linux: CPU + +## Requirements + +### Windows +- Python 3.13 +- NVIDIA GPU with CUDA 12 drivers +- [PortAudio](http://www.portaudio.com/) (bundled with most Python sounddevice wheels) + +### Linux +- Python 3.10+ +- PortAudio: `sudo apt install portaudio19-dev` + +## Installation + +### Windows + +```bat +install.bat +``` + +This creates a `.venv-windows` virtual environment, installs all dependencies and the CUDA 12 DLLs required by faster-whisper. + +### Linux + +```bash +chmod +x install.sh start.sh +./install.sh +``` + +Creates a `.venv-linux` virtual environment. GPU support on Linux requires a manually installed CUDA environment; by default runs on CPU. + +## Usage + +### Windows +```bat +start.bat +``` + +### Linux +```bash +./start.sh +``` + +The app starts in the system tray. Hold the hotkey (default: `Ctrl+Shift+Space`) to record, release to transcribe and type into the active window. + +## Configuration + +`config.json` (shared, stored in the repo): + +| Key | Default | Description | +|-----|---------|-------------| +| `hotkey` | `ctrl+shift+space` | Recording trigger | +| `model` | `medium` | Whisper model size (`tiny`, `base`, `small`, `medium`, `large-v2`, `large-v3`) | +| `language` | `de` | Transcription language (`de`, `en`, `fr`, `es`, `it`, `null` = auto) | +| `sample_rate` | `16000` | Audio sample rate in Hz | + +Machine-specific settings (GPU device, compute type, audio device) are stored separately and not tracked by git: +- **Windows:** `%LOCALAPPDATA%\WhisperDictation\config_local.json` +- **Linux:** `~/.local/share/WhisperDictation/config_local.json` + +## Vocabulary + +Custom vocabulary/replacements can be added to `vocabulary.json`. These are passed as initial prompts to improve recognition of domain-specific terms. + +## Model Download + +On first start the selected Whisper model is downloaded automatically from HuggingFace (~500 MB for `medium`). Subsequent starts use the cached model. diff --git a/vocabulary.json b/vocabulary.json index c457ac9..a6fcc5f 100644 --- a/vocabulary.json +++ b/vocabulary.json @@ -1,18 +1,59 @@ { - "words": [], + "words": [ + "test" + ], "replacements": [ - {"from": "KRA", "to": "KRAH"}, - {"from": "Atos", "to": "ATHOS"}, - {"from": "Resistec", "to": "RESISTEC"}, - {"from": "Resistek", "to": "RESISTEC"}, - {"from": "HES", "to": "HEES"}, - {"from": "Ackerschot", "to": "Ackerschott"}, - {"from": "Carrois", "to": "Kauer"}, - {"from": "Jouer fixe", "to": "Jour-Fixe"}, - {"from": "Docuware", "to": "DocuWare"}, - {"from": "Nates", "to": "Nejc"}, - {"from": "Bittzeit", "to": "BitSight"}, - {"from": "Kalmikow", "to": "Kalmykov"}, - {"from": "Leifert", "to": "Leifer"} + { + "from": "KRA", + "to": "KRAH" + }, + { + "from": "Atos", + "to": "ATHOS" + }, + { + "from": "Resistec", + "to": "RESISTEC" + }, + { + "from": "Resistek", + "to": "RESISTEC" + }, + { + "from": "HES", + "to": "HEES" + }, + { + "from": "Ackerschot", + "to": "Ackerschott" + }, + { + "from": "Carrois", + "to": "Kauer" + }, + { + "from": "Jouer fixe", + "to": "Jour-Fixe" + }, + { + "from": "Docuware", + "to": "DocuWare" + }, + { + "from": "Nates", + "to": "Nejc" + }, + { + "from": "Bittzeit", + "to": "BitSight" + }, + { + "from": "Kalmikow", + "to": "Kalmykov" + }, + { + "from": "Leifert", + "to": "Leifer" + } ] -} +} \ No newline at end of file