|
|
|
|
@ -0,0 +1,688 @@
|
|
|
|
|
"""
|
|
|
|
|
Whisper Dictation — local GPU speech-to-text with system tray and settings GUI.
|
|
|
|
|
Hold hotkey to record, release to transcribe and type into active window.
|
|
|
|
|
"""
|
|
|
|
|
import json
|
|
|
|
|
import os
|
|
|
|
|
import threading
|
|
|
|
|
import time
|
|
|
|
|
import tkinter as tk
|
|
|
|
|
from tkinter import ttk
|
|
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
|
import sounddevice as sd
|
|
|
|
|
import keyboard
|
|
|
|
|
import pystray
|
|
|
|
|
from PIL import Image, ImageDraw
|
|
|
|
|
from pynput.keyboard import Controller as KeyboardController
|
|
|
|
|
from faster_whisper import WhisperModel
|
|
|
|
|
|
|
|
|
|
# Shared data dir: script directory (= git repo root, synced via git pull).
|
|
|
|
|
_script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
|
DATA_DIR = os.environ.get("WHISPER_DATA_DIR", _script_dir)
|
|
|
|
|
os.makedirs(DATA_DIR, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
# Local config dir: machine-specific settings (audio device, device, compute_type).
|
|
|
|
|
# Windows: %LOCALAPPDATA%\WhisperDictation
|
|
|
|
|
# Linux: ~/.local/share/WhisperDictation
|
|
|
|
|
_env_local = os.environ.get("WHISPER_LOCAL_DIR")
|
|
|
|
|
if _env_local:
|
|
|
|
|
_local_dir = _env_local
|
|
|
|
|
elif os.name == "nt":
|
|
|
|
|
_local_dir = os.path.join(os.environ.get("LOCALAPPDATA", _script_dir), "WhisperDictation")
|
|
|
|
|
else:
|
|
|
|
|
_local_dir = os.path.join(os.path.expanduser("~"), ".local", "share", "WhisperDictation")
|
|
|
|
|
os.makedirs(_local_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
CONFIG_FILE = os.path.join(DATA_DIR, "config.json") # shared via git
|
|
|
|
|
CONFIG_LOCAL_FILE = os.path.join(_local_dir, "config_local.json") # machine-specific, not in git
|
|
|
|
|
VOCAB_FILE = os.path.join(DATA_DIR, "vocabulary.json") # shared via git
|
|
|
|
|
|
|
|
|
|
DEFAULT_CONFIG = {
|
|
|
|
|
"hotkey": "ctrl+shift+space",
|
|
|
|
|
"model": "medium",
|
|
|
|
|
"device": "cuda",
|
|
|
|
|
"compute_type": "float16",
|
|
|
|
|
"language": "de",
|
|
|
|
|
"audio_device": None,
|
|
|
|
|
"sample_rate": 16000,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
MODELS = ["tiny", "base", "small", "medium", "large-v2", "large-v3"]
|
|
|
|
|
LANGUAGES = {"Deutsch": "de", "English": "en", "Français": "fr", "Español": "es", "Italiano": "it", "Auto": None}
|
|
|
|
|
DEVICES = ["cuda", "cpu"]
|
|
|
|
|
COMPUTE_TYPES = {"float16 (GPU)": "float16", "int8 (CPU/GPU)": "int8", "float32": "float32"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ── State ─────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
class AppState:
|
|
|
|
|
IDLE = "idle"
|
|
|
|
|
RECORDING = "recording"
|
|
|
|
|
TRANSCRIBING = "transcribing"
|
|
|
|
|
|
|
|
|
|
state = AppState.IDLE
|
|
|
|
|
audio_chunks = []
|
|
|
|
|
model = None
|
|
|
|
|
typer = KeyboardController()
|
|
|
|
|
config = {}
|
|
|
|
|
tray_icon = None
|
|
|
|
|
overlay_window = None
|
|
|
|
|
overlay_tk = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ── Config ────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
LOCAL_KEYS = {"audio_device", "device", "compute_type"} # keys stored only in config_local.json
|
|
|
|
|
|
|
|
|
|
def load_config():
|
|
|
|
|
global config
|
|
|
|
|
config = dict(DEFAULT_CONFIG)
|
|
|
|
|
if os.path.exists(CONFIG_FILE):
|
|
|
|
|
with open(CONFIG_FILE) as f:
|
|
|
|
|
config.update(json.load(f))
|
|
|
|
|
if os.path.exists(CONFIG_LOCAL_FILE):
|
|
|
|
|
with open(CONFIG_LOCAL_FILE) as f:
|
|
|
|
|
config.update(json.load(f))
|
|
|
|
|
|
|
|
|
|
def save_config():
|
|
|
|
|
shared = {k: v for k, v in config.items() if k not in LOCAL_KEYS}
|
|
|
|
|
local = {k: v for k, v in config.items() if k in LOCAL_KEYS}
|
|
|
|
|
with open(CONFIG_FILE, "w") as f:
|
|
|
|
|
json.dump(shared, f, indent=2)
|
|
|
|
|
with open(CONFIG_LOCAL_FILE, "w") as f:
|
|
|
|
|
json.dump(local, f, indent=2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ── Vocabulary ────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
vocab = {"words": [], "replacements": []} # {from, to}
|
|
|
|
|
|
|
|
|
|
def load_vocab():
|
|
|
|
|
global vocab
|
|
|
|
|
if os.path.exists(VOCAB_FILE):
|
|
|
|
|
with open(VOCAB_FILE) as f:
|
|
|
|
|
vocab = json.load(f)
|
|
|
|
|
else:
|
|
|
|
|
vocab = {"words": [], "replacements": []}
|
|
|
|
|
|
|
|
|
|
def save_vocab():
|
|
|
|
|
with open(VOCAB_FILE, "w") as f:
|
|
|
|
|
json.dump(vocab, f, indent=2, ensure_ascii=False)
|
|
|
|
|
|
|
|
|
|
def apply_vocab(text: str) -> str:
|
|
|
|
|
for r in vocab.get("replacements", []):
|
|
|
|
|
text = text.replace(r["from"], r["to"])
|
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
|
def get_initial_prompt() -> str:
|
|
|
|
|
words = vocab.get("words", [])
|
|
|
|
|
return ", ".join(words) if words else ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ── Tray icon ─────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
def make_icon(color):
|
|
|
|
|
img = Image.new("RGBA", (64, 64), (0, 0, 0, 0))
|
|
|
|
|
d = ImageDraw.Draw(img)
|
|
|
|
|
d.ellipse([4, 4, 60, 60], fill=color)
|
|
|
|
|
return img
|
|
|
|
|
|
|
|
|
|
ICONS = {
|
|
|
|
|
AppState.IDLE: make_icon((40, 200, 80)),
|
|
|
|
|
AppState.RECORDING: make_icon((220, 50, 50)),
|
|
|
|
|
AppState.TRANSCRIBING: make_icon((220, 180, 30)),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def set_state(new_state):
|
|
|
|
|
global state
|
|
|
|
|
state = new_state
|
|
|
|
|
if tray_icon:
|
|
|
|
|
tray_icon.icon = ICONS[new_state]
|
|
|
|
|
if new_state == AppState.RECORDING:
|
|
|
|
|
show_overlay()
|
|
|
|
|
else:
|
|
|
|
|
hide_overlay()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ── Overlay window ────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
def show_overlay():
|
|
|
|
|
if overlay_tk is None:
|
|
|
|
|
return
|
|
|
|
|
overlay_tk.after(0, _show_overlay_main)
|
|
|
|
|
|
|
|
|
|
def hide_overlay():
|
|
|
|
|
if overlay_tk is None:
|
|
|
|
|
return
|
|
|
|
|
overlay_tk.after(0, _hide_overlay_main)
|
|
|
|
|
|
|
|
|
|
def _show_overlay_main():
|
|
|
|
|
overlay_window.deiconify()
|
|
|
|
|
# Position bottom-right
|
|
|
|
|
sw = overlay_tk.winfo_screenwidth()
|
|
|
|
|
sh = overlay_tk.winfo_screenheight()
|
|
|
|
|
overlay_window.geometry(f"220x54+{sw - 240}+{sh - 100}")
|
|
|
|
|
overlay_window.lift()
|
|
|
|
|
|
|
|
|
|
def _hide_overlay_main():
|
|
|
|
|
overlay_window.withdraw()
|
|
|
|
|
|
|
|
|
|
def create_overlay(root):
|
|
|
|
|
global overlay_window
|
|
|
|
|
win = tk.Toplevel(root)
|
|
|
|
|
win.withdraw()
|
|
|
|
|
win.overrideredirect(True)
|
|
|
|
|
win.attributes("-topmost", True)
|
|
|
|
|
win.attributes("-alpha", 0.92)
|
|
|
|
|
win.configure(bg="#1a1a1a")
|
|
|
|
|
|
|
|
|
|
frame = tk.Frame(win, bg="#1a1a1a", padx=12, pady=10)
|
|
|
|
|
frame.pack(fill="both", expand=True)
|
|
|
|
|
|
|
|
|
|
dot = tk.Canvas(frame, width=14, height=14, bg="#1a1a1a", highlightthickness=0)
|
|
|
|
|
dot.create_oval(2, 2, 12, 12, fill="#e03030", outline="")
|
|
|
|
|
dot.pack(side="left", padx=(0, 8))
|
|
|
|
|
|
|
|
|
|
tk.Label(frame, text="Aufnahme läuft …", fg="white", bg="#1a1a1a",
|
|
|
|
|
font=("Segoe UI", 11)).pack(side="left")
|
|
|
|
|
|
|
|
|
|
overlay_window = win
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ── Audio ─────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
def audio_callback(indata, frames, time_info, status):
|
|
|
|
|
if state == AppState.RECORDING:
|
|
|
|
|
audio_chunks.append(indata.copy())
|
|
|
|
|
|
|
|
|
|
def get_audio_stream():
|
|
|
|
|
device = config.get("audio_device")
|
|
|
|
|
return sd.InputStream(
|
|
|
|
|
samplerate=config["sample_rate"],
|
|
|
|
|
channels=1,
|
|
|
|
|
device=device,
|
|
|
|
|
callback=audio_callback,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ── Recording & transcription ─────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
def start_recording():
|
|
|
|
|
global audio_chunks
|
|
|
|
|
if state == AppState.RECORDING:
|
|
|
|
|
return
|
|
|
|
|
audio_chunks = []
|
|
|
|
|
set_state(AppState.RECORDING)
|
|
|
|
|
print("Recording...", flush=True)
|
|
|
|
|
|
|
|
|
|
def stop_and_transcribe():
|
|
|
|
|
if state != AppState.RECORDING:
|
|
|
|
|
return
|
|
|
|
|
set_state(AppState.TRANSCRIBING)
|
|
|
|
|
chunks = list(audio_chunks)
|
|
|
|
|
|
|
|
|
|
if not chunks:
|
|
|
|
|
set_state(AppState.IDLE)
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
audio = np.concatenate(chunks, axis=0).flatten().astype(np.float32)
|
|
|
|
|
duration = len(audio) / config["sample_rate"]
|
|
|
|
|
rms = float(np.sqrt(np.mean(audio ** 2)))
|
|
|
|
|
print(f"Audio: {duration:.1f}s RMS: {rms:.5f}", flush=True)
|
|
|
|
|
|
|
|
|
|
if duration < 0.3 or rms < 0.0005:
|
|
|
|
|
print("Too short or silent — skipped.", flush=True)
|
|
|
|
|
set_state(AppState.IDLE)
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
lang = config["language"] if config["language"] else None
|
|
|
|
|
prompt = get_initial_prompt()
|
|
|
|
|
segments, _ = model.transcribe(
|
|
|
|
|
audio, language=lang, beam_size=5, vad_filter=True,
|
|
|
|
|
initial_prompt=prompt if prompt else None,
|
|
|
|
|
)
|
|
|
|
|
text = " ".join(s.text for s in segments).strip()
|
|
|
|
|
text = apply_vocab(text)
|
|
|
|
|
print(f"Result: {repr(text)}", flush=True)
|
|
|
|
|
|
|
|
|
|
set_state(AppState.IDLE)
|
|
|
|
|
if text:
|
|
|
|
|
time.sleep(0.15)
|
|
|
|
|
typer.type(text)
|
|
|
|
|
|
|
|
|
|
def on_space_release(e):
|
|
|
|
|
if state == AppState.RECORDING:
|
|
|
|
|
threading.Thread(target=stop_and_transcribe, daemon=True).start()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ── Model loading ─────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
def load_model():
|
|
|
|
|
global model
|
|
|
|
|
print(f"Loading {config['model']} on {config['device']}...", flush=True)
|
|
|
|
|
model = WhisperModel(
|
|
|
|
|
config["model"],
|
|
|
|
|
device=config["device"],
|
|
|
|
|
compute_type=config["compute_type"],
|
|
|
|
|
)
|
|
|
|
|
print("Model ready.", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ── Settings window ───────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
def open_settings():
|
|
|
|
|
if overlay_tk is None:
|
|
|
|
|
return
|
|
|
|
|
overlay_tk.after(0, _open_settings_main)
|
|
|
|
|
|
|
|
|
|
def _open_settings_main():
|
|
|
|
|
# ── Palette: "Precision Audio" ──────────────────────────────────────────
|
|
|
|
|
BG = "#18181f" # deep void
|
|
|
|
|
BG2 = "#22222c" # panel
|
|
|
|
|
BG3 = "#2c2c38" # elevated
|
|
|
|
|
BORDER = "#38384a"
|
|
|
|
|
FG = "#e8e8f0"
|
|
|
|
|
FG2 = "#7878a0"
|
|
|
|
|
AMBER = "#f5a623"
|
|
|
|
|
AMBER2 = "#c8831a"
|
|
|
|
|
GREEN = "#4ade80"
|
|
|
|
|
FONT = ("Consolas", 11)
|
|
|
|
|
FONT_UI = ("Segoe UI", 11)
|
|
|
|
|
FONT_B = ("Segoe UI", 11, "bold")
|
|
|
|
|
FONT_S = ("Segoe UI", 9)
|
|
|
|
|
FONT_H = ("Segoe UI Semibold", 16)
|
|
|
|
|
|
|
|
|
|
win = tk.Toplevel(overlay_tk)
|
|
|
|
|
win.title("Whisper Dictation")
|
|
|
|
|
win.configure(bg=BG)
|
|
|
|
|
win.attributes("-topmost", True)
|
|
|
|
|
win.resizable(False, False)
|
|
|
|
|
|
|
|
|
|
# Center
|
|
|
|
|
W, H = 680, 660
|
|
|
|
|
win.update_idletasks()
|
|
|
|
|
sw, sh = win.winfo_screenwidth(), win.winfo_screenheight()
|
|
|
|
|
win.geometry(f"{W}x{H}+{(sw-W)//2}+{(sh-H)//2}")
|
|
|
|
|
|
|
|
|
|
# Global option for OptionMenu dropdowns (dark listbox)
|
|
|
|
|
win.option_add("*Menu.background", BG3)
|
|
|
|
|
win.option_add("*Menu.foreground", FG)
|
|
|
|
|
win.option_add("*Menu.activeBackground", AMBER)
|
|
|
|
|
win.option_add("*Menu.activeForeground", BG)
|
|
|
|
|
win.option_add("*Menu.font", FONT_UI)
|
|
|
|
|
|
|
|
|
|
# ── Header ──
|
|
|
|
|
hdr = tk.Frame(win, bg=BG2, pady=20)
|
|
|
|
|
hdr.pack(fill="x")
|
|
|
|
|
# Amber accent bar
|
|
|
|
|
tk.Frame(hdr, bg=AMBER, height=3).pack(fill="x")
|
|
|
|
|
tk.Label(hdr, text="WHISPER DICTATION", font=FONT_H,
|
|
|
|
|
bg=BG2, fg=FG, pady=12).pack()
|
|
|
|
|
tk.Label(hdr, text="Lokale GPU-Transkription · offline · privat",
|
|
|
|
|
font=FONT_S, bg=BG2, fg=FG2).pack()
|
|
|
|
|
|
|
|
|
|
# ── Scrollable content ──
|
|
|
|
|
canvas = tk.Canvas(win, bg=BG, highlightthickness=0)
|
|
|
|
|
canvas.pack(fill="both", expand=True)
|
|
|
|
|
content = tk.Frame(canvas, bg=BG, padx=36, pady=16)
|
|
|
|
|
canvas.create_window((0, 0), window=content, anchor="nw")
|
|
|
|
|
|
|
|
|
|
def section(label):
|
|
|
|
|
f = tk.Frame(content, bg=BG)
|
|
|
|
|
f.pack(fill="x", pady=(18, 6))
|
|
|
|
|
tk.Label(f, text=label, font=("Consolas", 9, "bold"),
|
|
|
|
|
bg=BG, fg=AMBER).pack(side="left")
|
|
|
|
|
tk.Frame(f, bg=BORDER, height=1).pack(side="left", fill="x", expand=True, padx=(10, 0), pady=6)
|
|
|
|
|
|
|
|
|
|
def dd(frame, var, values, width=14):
|
|
|
|
|
"""Create dark OptionMenu directly in frame as parent."""
|
|
|
|
|
m = tk.OptionMenu(frame, var, *values)
|
|
|
|
|
m.config(bg=BG3, fg=FG, activebackground=BG3, activeforeground=FG,
|
|
|
|
|
highlightbackground=BORDER, highlightthickness=1,
|
|
|
|
|
relief="flat", font=FONT_UI, anchor="w", bd=0, width=width)
|
|
|
|
|
m["menu"].config(bg=BG3, fg=FG, activebackground=AMBER,
|
|
|
|
|
activeforeground=BG, relief="flat", bd=0)
|
|
|
|
|
return m
|
|
|
|
|
|
|
|
|
|
def row(label, hint=None):
|
|
|
|
|
"""Returns frame — add controls to frame after calling."""
|
|
|
|
|
f = tk.Frame(content, bg=BG)
|
|
|
|
|
f.pack(fill="x", pady=5)
|
|
|
|
|
tk.Label(f, text=label, width=17, anchor="w", font=FONT_UI,
|
|
|
|
|
bg=BG, fg=FG2).pack(side="left")
|
|
|
|
|
if hint:
|
|
|
|
|
tk.Label(f, text=hint, font=FONT_S, bg=BG, fg=FG2).pack(side="right")
|
|
|
|
|
return f
|
|
|
|
|
|
|
|
|
|
# ── AUDIO ──
|
|
|
|
|
section("AUDIO")
|
|
|
|
|
devices = [(i, d["name"]) for i, d in enumerate(sd.query_devices())
|
|
|
|
|
if d["max_input_channels"] > 0]
|
|
|
|
|
dev_names = ["Standard"] + [f"{i}: {n}" for i, n in devices]
|
|
|
|
|
dev_var = tk.StringVar()
|
|
|
|
|
cur_dev = config.get("audio_device")
|
|
|
|
|
dev_var.set("Standard" if cur_dev is None else
|
|
|
|
|
next((f"{i}: {n}" for i, n in devices if i == cur_dev), "Standard"))
|
|
|
|
|
f = row("Mikrofon")
|
|
|
|
|
dd(f, dev_var, dev_names, width=44).pack(side="left")
|
|
|
|
|
|
|
|
|
|
# ── MODELL ──
|
|
|
|
|
section("MODELL")
|
|
|
|
|
model_hints = {
|
|
|
|
|
"tiny": "~1 GB VRAM · sehr schnell",
|
|
|
|
|
"base": "~1 GB VRAM",
|
|
|
|
|
"small": "~2 GB VRAM",
|
|
|
|
|
"medium": "~5 GB VRAM · empfohlen ✓",
|
|
|
|
|
"large-v2": "~10 GB VRAM",
|
|
|
|
|
"large-v3": "~10 GB VRAM · bestes Ergebnis",
|
|
|
|
|
}
|
|
|
|
|
model_var = tk.StringVar(value=config["model"])
|
|
|
|
|
f_model = row("Modell")
|
|
|
|
|
dd(f_model, model_var, MODELS, 14).pack(side="left")
|
|
|
|
|
hint_lbl = tk.Label(f_model, text=model_hints.get(config["model"], ""),
|
|
|
|
|
font=FONT_S, bg=BG, fg=FG2)
|
|
|
|
|
hint_lbl.pack(side="left", padx=(14, 0))
|
|
|
|
|
model_var.trace_add("write", lambda *_: hint_lbl.config(text=model_hints.get(model_var.get(), "")))
|
|
|
|
|
|
|
|
|
|
lang_display = {v: k for k, v in LANGUAGES.items()}
|
|
|
|
|
lang_var = tk.StringVar(value=lang_display.get(config["language"], "Deutsch"))
|
|
|
|
|
f = row("Sprache")
|
|
|
|
|
dd(f, lang_var, list(LANGUAGES.keys()), 14).pack(side="left")
|
|
|
|
|
|
|
|
|
|
# ── LEISTUNG ──
|
|
|
|
|
section("LEISTUNG")
|
|
|
|
|
device_var = tk.StringVar(value=config["device"])
|
|
|
|
|
f = row("Gerät (GPU/CPU)", hint="cuda = NVIDIA GPU empfohlen")
|
|
|
|
|
dd(f, device_var, DEVICES, 8).pack(side="left")
|
|
|
|
|
|
|
|
|
|
ct_display = {v: k for k, v in COMPUTE_TYPES.items()}
|
|
|
|
|
ct_var = tk.StringVar(value=ct_display.get(config["compute_type"], "float16 (GPU)"))
|
|
|
|
|
f = row("Compute Type")
|
|
|
|
|
dd(f, ct_var, list(COMPUTE_TYPES.keys()), 18).pack(side="left")
|
|
|
|
|
|
|
|
|
|
# ── STEUERUNG ──
|
|
|
|
|
section("STEUERUNG")
|
|
|
|
|
hotkey_var = tk.StringVar(value=config["hotkey"])
|
|
|
|
|
f_hk = row("Hotkey", hint="z.B. ctrl+shift+space")
|
|
|
|
|
tk.Entry(f_hk, textvariable=hotkey_var, font=FONT, width=24,
|
|
|
|
|
bg=BG3, fg=FG, insertbackground=AMBER,
|
|
|
|
|
relief="flat", bd=6,
|
|
|
|
|
highlightbackground=BORDER, highlightthickness=1).pack(side="left")
|
|
|
|
|
|
|
|
|
|
# ── Buttons ──
|
|
|
|
|
tk.Frame(win, bg=BORDER, height=1).pack(fill="x")
|
|
|
|
|
btn_bar = tk.Frame(win, bg=BG2, pady=16, padx=32)
|
|
|
|
|
btn_bar.pack(fill="x")
|
|
|
|
|
|
|
|
|
|
def save():
|
|
|
|
|
sel = dev_var.get()
|
|
|
|
|
config["audio_device"] = None if sel == "Standard" else int(sel.split(":")[0])
|
|
|
|
|
config["model"] = model_var.get()
|
|
|
|
|
config["language"] = LANGUAGES[lang_var.get()]
|
|
|
|
|
config["device"] = device_var.get()
|
|
|
|
|
config["compute_type"] = COMPUTE_TYPES[ct_var.get()]
|
|
|
|
|
config["hotkey"] = hotkey_var.get()
|
|
|
|
|
save_config()
|
|
|
|
|
win.destroy()
|
|
|
|
|
threading.Thread(target=reload_model_and_hotkey, daemon=True).start()
|
|
|
|
|
|
|
|
|
|
def btn_hover(b, c_in, c_out):
|
|
|
|
|
b.bind("<Enter>", lambda _: b.config(bg=c_in))
|
|
|
|
|
b.bind("<Leave>", lambda _: b.config(bg=c_out))
|
|
|
|
|
|
|
|
|
|
save_btn = tk.Button(btn_bar, text="Speichern & Neuladen", command=save,
|
|
|
|
|
bg=AMBER, fg=BG, font=FONT_B,
|
|
|
|
|
relief="flat", padx=20, pady=9, cursor="hand2", bd=0)
|
|
|
|
|
save_btn.pack(side="right")
|
|
|
|
|
btn_hover(save_btn, AMBER2, AMBER)
|
|
|
|
|
|
|
|
|
|
cancel_btn = tk.Button(btn_bar, text="Abbrechen", command=win.destroy,
|
|
|
|
|
bg=BG3, fg=FG2, font=FONT_UI,
|
|
|
|
|
relief="flat", padx=20, pady=9, cursor="hand2", bd=0)
|
|
|
|
|
cancel_btn.pack(side="right", padx=(0, 10))
|
|
|
|
|
btn_hover(cancel_btn, BORDER, BG3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def open_vocab():
|
|
|
|
|
if overlay_tk is None:
|
|
|
|
|
return
|
|
|
|
|
overlay_tk.after(0, _open_vocab_main)
|
|
|
|
|
|
|
|
|
|
def _open_vocab_main():
|
|
|
|
|
BG = "#18181f"
|
|
|
|
|
BG2 = "#22222c"
|
|
|
|
|
BG3 = "#2c2c38"
|
|
|
|
|
BORDER = "#38384a"
|
|
|
|
|
FG = "#e8e8f0"
|
|
|
|
|
FG2 = "#7878a0"
|
|
|
|
|
AMBER = "#f5a623"
|
|
|
|
|
AMBER2 = "#c8831a"
|
|
|
|
|
RED = "#f87171"
|
|
|
|
|
FONT = ("Segoe UI", 11)
|
|
|
|
|
FONT_B = ("Segoe UI", 11, "bold")
|
|
|
|
|
FONT_S = ("Segoe UI", 9)
|
|
|
|
|
FONT_H = ("Segoe UI Semibold", 14)
|
|
|
|
|
FONT_M = ("Consolas", 10)
|
|
|
|
|
|
|
|
|
|
win = tk.Toplevel(overlay_tk)
|
|
|
|
|
win.title("Vokabular")
|
|
|
|
|
win.configure(bg=BG)
|
|
|
|
|
win.attributes("-topmost", True)
|
|
|
|
|
win.resizable(False, False)
|
|
|
|
|
W, H = 600, 620
|
|
|
|
|
win.update_idletasks()
|
|
|
|
|
sw, sh = win.winfo_screenwidth(), win.winfo_screenheight()
|
|
|
|
|
win.geometry(f"{W}x{H}+{(sw-W)//2}+{(sh-H)//2}")
|
|
|
|
|
win.option_add("*Menu.background", BG3)
|
|
|
|
|
win.option_add("*Menu.foreground", FG)
|
|
|
|
|
win.option_add("*Menu.activeBackground", AMBER)
|
|
|
|
|
win.option_add("*Menu.activeForeground", BG)
|
|
|
|
|
|
|
|
|
|
# ── Header ──
|
|
|
|
|
hdr = tk.Frame(win, bg=BG2)
|
|
|
|
|
hdr.pack(fill="x")
|
|
|
|
|
tk.Frame(hdr, bg=AMBER, height=3).pack(fill="x")
|
|
|
|
|
tk.Label(hdr, text="VOKABULAR & KORREKTUREN", font=FONT_H,
|
|
|
|
|
bg=BG2, fg=FG, pady=14).pack()
|
|
|
|
|
tk.Label(hdr, text="Wörter lernen · Ersetzungen definieren",
|
|
|
|
|
font=FONT_S, bg=BG2, fg=FG2).pack(pady=(0, 10))
|
|
|
|
|
|
|
|
|
|
content = tk.Frame(win, bg=BG, padx=28, pady=12)
|
|
|
|
|
content.pack(fill="both", expand=True)
|
|
|
|
|
|
|
|
|
|
# ── Add-word form ─────────────────────────────────────────────────────────
|
|
|
|
|
is_correction = tk.BooleanVar(value=False)
|
|
|
|
|
|
|
|
|
|
form = tk.Frame(content, bg=BG3, padx=16, pady=14)
|
|
|
|
|
form.pack(fill="x", pady=(0, 16))
|
|
|
|
|
|
|
|
|
|
# Toggle row
|
|
|
|
|
tog_row = tk.Frame(form, bg=BG3)
|
|
|
|
|
tog_row.pack(fill="x", pady=(0, 10))
|
|
|
|
|
tk.Label(tog_row, text="Korrektur (falsch → richtig)", font=FONT,
|
|
|
|
|
bg=BG3, fg=FG).pack(side="left")
|
|
|
|
|
|
|
|
|
|
def toggle_form(*_):
|
|
|
|
|
if is_correction.get():
|
|
|
|
|
entry_from.pack(side="left", padx=(0, 6))
|
|
|
|
|
arrow_lbl.pack(side="left", padx=4)
|
|
|
|
|
entry_to.pack(side="left")
|
|
|
|
|
entry_word.pack_forget()
|
|
|
|
|
else:
|
|
|
|
|
entry_word.pack(side="left", fill="x", expand=True)
|
|
|
|
|
entry_from.pack_forget()
|
|
|
|
|
arrow_lbl.pack_forget()
|
|
|
|
|
entry_to.pack_forget()
|
|
|
|
|
|
|
|
|
|
tog_btn = tk.Checkbutton(tog_row, variable=is_correction, command=toggle_form,
|
|
|
|
|
bg=BG3, fg=FG2, activebackground=BG3,
|
|
|
|
|
selectcolor=AMBER, relief="flat", bd=0,
|
|
|
|
|
indicatoron=True)
|
|
|
|
|
tog_btn.pack(side="right")
|
|
|
|
|
|
|
|
|
|
# Input row
|
|
|
|
|
inp_row = tk.Frame(form, bg=BG3)
|
|
|
|
|
inp_row.pack(fill="x")
|
|
|
|
|
|
|
|
|
|
entry_style = dict(font=FONT_M, bg=BG, fg=FG, insertbackground=AMBER,
|
|
|
|
|
relief="flat", bd=6, highlightbackground=BORDER, highlightthickness=1)
|
|
|
|
|
|
|
|
|
|
entry_word = tk.Entry(inp_row, width=32, **entry_style)
|
|
|
|
|
entry_word.insert(0, "")
|
|
|
|
|
entry_from = tk.Entry(inp_row, width=14, **entry_style)
|
|
|
|
|
arrow_lbl = tk.Label(inp_row, text="→", font=("Segoe UI", 14), bg=BG3, fg=AMBER)
|
|
|
|
|
entry_to = tk.Entry(inp_row, width=14, **entry_style)
|
|
|
|
|
entry_word.pack(side="left", fill="x", expand=True)
|
|
|
|
|
|
|
|
|
|
def add_entry():
|
|
|
|
|
if is_correction.get():
|
|
|
|
|
frm = entry_from.get().strip()
|
|
|
|
|
to = entry_to.get().strip()
|
|
|
|
|
if frm and to:
|
|
|
|
|
vocab["replacements"].append({"from": frm, "to": to})
|
|
|
|
|
entry_from.delete(0, tk.END)
|
|
|
|
|
entry_to.delete(0, tk.END)
|
|
|
|
|
else:
|
|
|
|
|
w = entry_word.get().strip()
|
|
|
|
|
if w and w not in vocab["words"]:
|
|
|
|
|
vocab["words"].append(w)
|
|
|
|
|
entry_word.delete(0, tk.END)
|
|
|
|
|
save_vocab()
|
|
|
|
|
refresh_lists()
|
|
|
|
|
|
|
|
|
|
win.bind("<Return>", lambda _: add_entry())
|
|
|
|
|
|
|
|
|
|
add_btn = tk.Button(inp_row, text="Hinzufügen", command=add_entry,
|
|
|
|
|
bg=AMBER, fg=BG, font=FONT_B,
|
|
|
|
|
relief="flat", padx=14, pady=5, cursor="hand2", bd=0)
|
|
|
|
|
add_btn.pack(side="right", padx=(10, 0))
|
|
|
|
|
add_btn.bind("<Enter>", lambda _: add_btn.config(bg=AMBER2))
|
|
|
|
|
add_btn.bind("<Leave>", lambda _: add_btn.config(bg=AMBER))
|
|
|
|
|
|
|
|
|
|
# ── Lists ─────────────────────────────────────────────────────────────────
|
|
|
|
|
lists_frame = tk.Frame(content, bg=BG)
|
|
|
|
|
lists_frame.pack(fill="both", expand=True)
|
|
|
|
|
lists_frame.columnconfigure(0, weight=1)
|
|
|
|
|
lists_frame.columnconfigure(1, weight=2)
|
|
|
|
|
|
|
|
|
|
def section_label(parent, text):
|
|
|
|
|
tk.Label(parent, text=text, font=("Consolas", 9, "bold"),
|
|
|
|
|
bg=BG, fg=AMBER).pack(anchor="w", pady=(0, 6))
|
|
|
|
|
|
|
|
|
|
# Words column
|
|
|
|
|
col_w = tk.Frame(lists_frame, bg=BG)
|
|
|
|
|
col_w.grid(row=0, column=0, sticky="nsew", padx=(0, 12))
|
|
|
|
|
section_label(col_w, "WÖRTER")
|
|
|
|
|
|
|
|
|
|
words_box = tk.Listbox(col_w, font=FONT_M, bg=BG3, fg=FG,
|
|
|
|
|
selectbackground=AMBER, selectforeground=BG,
|
|
|
|
|
relief="flat", bd=0, highlightthickness=0,
|
|
|
|
|
activestyle="none", height=10)
|
|
|
|
|
words_box.pack(fill="both", expand=True)
|
|
|
|
|
|
|
|
|
|
def del_word():
|
|
|
|
|
sel = words_box.curselection()
|
|
|
|
|
if sel:
|
|
|
|
|
vocab["words"].pop(sel[0])
|
|
|
|
|
save_vocab()
|
|
|
|
|
refresh_lists()
|
|
|
|
|
|
|
|
|
|
tk.Button(col_w, text="− Entfernen", command=del_word,
|
|
|
|
|
bg=BG3, fg=RED, font=FONT_S, relief="flat",
|
|
|
|
|
padx=8, pady=3, cursor="hand2", bd=0).pack(anchor="e", pady=(4, 0))
|
|
|
|
|
|
|
|
|
|
# Replacements column
|
|
|
|
|
col_r = tk.Frame(lists_frame, bg=BG)
|
|
|
|
|
col_r.grid(row=0, column=1, sticky="nsew")
|
|
|
|
|
section_label(col_r, "KORREKTUREN")
|
|
|
|
|
|
|
|
|
|
repl_box = tk.Listbox(col_r, font=FONT_M, bg=BG3, fg=FG,
|
|
|
|
|
selectbackground=AMBER, selectforeground=BG,
|
|
|
|
|
relief="flat", bd=0, highlightthickness=0,
|
|
|
|
|
activestyle="none", height=10)
|
|
|
|
|
repl_box.pack(fill="both", expand=True)
|
|
|
|
|
|
|
|
|
|
def del_repl():
|
|
|
|
|
sel = repl_box.curselection()
|
|
|
|
|
if sel:
|
|
|
|
|
vocab["replacements"].pop(sel[0])
|
|
|
|
|
save_vocab()
|
|
|
|
|
refresh_lists()
|
|
|
|
|
|
|
|
|
|
tk.Button(col_r, text="− Entfernen", command=del_repl,
|
|
|
|
|
bg=BG3, fg=RED, font=FONT_S, relief="flat",
|
|
|
|
|
padx=8, pady=3, cursor="hand2", bd=0).pack(anchor="e", pady=(4, 0))
|
|
|
|
|
|
|
|
|
|
def refresh_lists():
|
|
|
|
|
words_box.delete(0, tk.END)
|
|
|
|
|
for w in vocab.get("words", []):
|
|
|
|
|
words_box.insert(tk.END, f" {w}")
|
|
|
|
|
repl_box.delete(0, tk.END)
|
|
|
|
|
for r in vocab.get("replacements", []):
|
|
|
|
|
repl_box.insert(tk.END, f" {r['from']} → {r['to']}")
|
|
|
|
|
|
|
|
|
|
refresh_lists()
|
|
|
|
|
|
|
|
|
|
# ── Footer ──
|
|
|
|
|
tk.Frame(win, bg=BORDER, height=1).pack(fill="x")
|
|
|
|
|
tk.Label(win, text="Wörter fließen als Kontext in Whisper ein · Korrekturen werden nach der Transkription angewendet",
|
|
|
|
|
font=FONT_S, bg=BG2, fg=FG2, pady=8).pack()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def reload_model_and_hotkey():
|
|
|
|
|
keyboard.unhook_all()
|
|
|
|
|
load_model()
|
|
|
|
|
keyboard.add_hotkey(config["hotkey"], start_recording, suppress=True)
|
|
|
|
|
keyboard.on_release_key(config["hotkey"].split("+")[-1], on_space_release)
|
|
|
|
|
print(f"Hotkey updated: {config['hotkey']}", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ── Main ──────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
global tray_icon, overlay_tk
|
|
|
|
|
|
|
|
|
|
load_config()
|
|
|
|
|
load_vocab()
|
|
|
|
|
load_model()
|
|
|
|
|
|
|
|
|
|
# Tkinter root (hidden) for overlay and settings
|
|
|
|
|
root = tk.Tk()
|
|
|
|
|
root.withdraw()
|
|
|
|
|
overlay_tk = root
|
|
|
|
|
create_overlay(root)
|
|
|
|
|
|
|
|
|
|
# Audio stream
|
|
|
|
|
stream = get_audio_stream()
|
|
|
|
|
stream.start()
|
|
|
|
|
|
|
|
|
|
# Hotkey
|
|
|
|
|
last_key = config["hotkey"].split("+")[-1]
|
|
|
|
|
keyboard.add_hotkey(config["hotkey"], start_recording, suppress=True)
|
|
|
|
|
keyboard.on_release_key(last_key, on_space_release)
|
|
|
|
|
|
|
|
|
|
# Tray
|
|
|
|
|
menu = pystray.Menu(
|
|
|
|
|
pystray.MenuItem("Einstellungen", lambda: open_settings()),
|
|
|
|
|
pystray.MenuItem("Vokabular", lambda: open_vocab()),
|
|
|
|
|
pystray.Menu.SEPARATOR,
|
|
|
|
|
pystray.MenuItem("Beenden", lambda: quit_app(stream)),
|
|
|
|
|
)
|
|
|
|
|
tray_icon = pystray.Icon("whisper", ICONS[AppState.IDLE], "Whisper Dictation", menu)
|
|
|
|
|
|
|
|
|
|
threading.Thread(target=tray_icon.run, daemon=True).start()
|
|
|
|
|
|
|
|
|
|
print(f"Ready. Hotkey: {config['hotkey']}", flush=True)
|
|
|
|
|
root.mainloop()
|
|
|
|
|
|
|
|
|
|
stream.stop()
|
|
|
|
|
|
|
|
|
|
def quit_app(stream):
|
|
|
|
|
stream.stop()
|
|
|
|
|
tray_icon.stop()
|
|
|
|
|
overlay_tk.after(0, overlay_tk.quit)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|