Compare commits

...

3 Commits

Author SHA1 Message Date
Christian Kauer 6172cc0601 upd settings-window 2026-03-23 16:24:41 +01:00
Christian Kauer e06d7a555d fix timer setting 2026-03-23 16:06:40 +01:00
Christian Kauer 9a0d080fb8 added grammar check 2026-03-23 14:54:17 +01:00
9 changed files with 271 additions and 11 deletions

View File

@ -67,7 +67,11 @@
"Bash(/run/media/chk/Ventoy/projects/chrka/whisper-dictation/.venv-linux/bin/pip install:*)",
"Bash(.venv-linux/bin/python build.py)",
"Bash(.venv-linux/bin/pip list:*)",
"Bash(.venv-linux/bin/python -c \":*)"
"Bash(.venv-linux/bin/python -c \":*)",
"Bash(.venv-linux/bin/pyinstaller whisper-dictation.spec --clean)",
"Bash(.venv-linux/bin/pyinstaller whisper-dictation.spec --clean -y)",
"Bash(pactl --version)",
"Bash(pactl list:*)"
]
}
}

View File

@ -62,6 +62,10 @@
{
"from": "Kashi",
"to": "Cachy"
},
{
"from": "SHP",
"to": "SAP"
}
]
}

View File

@ -34,6 +34,7 @@ _hiddenimports = [
'ctranslate2',
'faster_whisper',
'sounddevice',
'language_tool_python',
]
if _is_windows:
_hiddenimports.append('pynput.keyboard._win32')

View File

@ -35,6 +35,10 @@ DEFAULT_CONFIG = {
"sample_rate": 16000,
"vocab_path": "",
"model_dir": "",
"grammar_check": True,
"paste_delay_ms": 300,
"media_duck": True,
"duck_percent": 20,
}
MODELS = ["tiny", "base", "small", "medium", "large-v2", "large-v3"]
@ -115,6 +119,23 @@ def apply_vocab(text: str) -> str:
return text
_STYLE_HINTS = {
"de": "Hallo, wie geht es Ihnen? Ich arbeite an einem wichtigen Projekt. "
"Die Ergebnisse der Analyse zeigen deutliche Verbesserungen.",
"en": "Hello, how are you? I am working on an important project. "
"The analysis results show clear improvements.",
"fr": "Bonjour, comment allez-vous ? Je travaille sur un projet important. "
"Les résultats de l'analyse montrent des améliorations nettes.",
}
def get_initial_prompt() -> str:
parts = []
lang = config.get("language")
hint = _STYLE_HINTS.get(lang)
if hint:
parts.append(hint)
words = vocab.get("words", [])
return ", ".join(words) if words else ""
if words:
parts.append(", ".join(words))
return " ".join(parts) if parts else ""

41
whisper_app/grammar.py Normal file
View File

@ -0,0 +1,41 @@
"""Optional grammar correction using LanguageTool."""
_tool = None
_lang = None
_LANG_MAP = {
"de": "de-DE",
"en": "en-US",
"fr": "fr-FR",
"es": "es",
"it": "it",
}
def init(lang, log=print):
"""Pre-initialize LanguageTool. Call once at startup."""
global _tool, _lang
if lang == _lang and _tool is not None:
return
_lang = lang
try:
import language_tool_python
lt_lang = _LANG_MAP.get(lang, lang or "de-DE")
_tool = language_tool_python.LanguageTool(lt_lang)
log("Grammar checker ready.")
except ImportError:
_tool = None
log("language_tool_python not installed — grammar check disabled.")
except Exception as e:
_tool = None
log(f"Grammar checker init failed: {e}")
def correct(text):
"""Correct grammar, capitalization, and punctuation."""
if _tool is None:
return text
try:
return _tool.correct(text)
except Exception:
return text

74
whisper_app/media_duck.py Normal file
View File

@ -0,0 +1,74 @@
"""Duck (lower) media volume during recording via PulseAudio/PipeWire."""
import re
import shutil
import subprocess
_saved_volumes: dict[int, str] = {}
def _pactl_available() -> bool:
return shutil.which("pactl") is not None
def _get_sink_inputs() -> list[tuple[int, str]]:
"""Return list of (sink_input_index, current_volume_string)."""
try:
out = subprocess.run(
["pactl", "list", "sink-inputs"],
capture_output=True, text=True, timeout=3,
).stdout
except (subprocess.TimeoutExpired, FileNotFoundError):
return []
results = []
current_idx = None
for line in out.splitlines():
m = re.match(r"Sink Input #(\d+)", line)
if m:
current_idx = int(m.group(1))
continue
if current_idx is not None and "Volume:" in line:
results.append((current_idx, line.strip()))
current_idx = None
return results
def _parse_percent(vol_line: str) -> int | None:
"""Extract first percentage value from a Volume: line."""
m = re.search(r"(\d+)%", vol_line)
return int(m.group(1)) if m else None
def duck(duck_percent: int = 20) -> None:
"""Lower all sink inputs to duck_percent of their current volume."""
_saved_volumes.clear()
if not _pactl_available():
return
for idx, vol_line in _get_sink_inputs():
pct = _parse_percent(vol_line)
if pct is not None:
_saved_volumes[idx] = f"{pct}%"
ducked = max(1, int(pct * duck_percent / 100))
try:
subprocess.run(
["pactl", "set-sink-input-volume", str(idx), f"{ducked}%"],
check=False, timeout=2,
)
except (subprocess.TimeoutExpired, FileNotFoundError):
pass
def unduck() -> None:
"""Restore all sink inputs to their saved volumes."""
if not _pactl_available():
return
for idx, vol in _saved_volumes.items():
try:
subprocess.run(
["pactl", "set-sink-input-volume", str(idx), vol],
check=False, timeout=2,
)
except (subprocess.TimeoutExpired, FileNotFoundError):
pass
_saved_volumes.clear()

View File

@ -34,8 +34,8 @@ def _open_main(root: tk.Tk, on_reload) -> None:
win.title("Whisper Dictation")
win.configure(bg=BG)
win.attributes("-topmost", True)
win.resizable(False, False)
win.minsize(700, 0)
win.resizable(True, True)
win.minsize(700, 500)
# Global option for OptionMenu dropdowns (dark listbox)
win.option_add("*Menu.background", BG3)
@ -53,9 +53,43 @@ def _open_main(root: tk.Tk, on_reload) -> None:
tk.Label(hdr, text="Lokale GPU-Transkription · offline · privat",
font=FONT_S, bg=BG2, fg=FG2).pack()
# ── Content ──
content = tk.Frame(win, bg=BG, padx=36, pady=16)
content.pack(fill="both", expand=True)
# ── Scrollable content ──
outer = tk.Frame(win, bg=BG)
outer.pack(fill="both", expand=True)
canvas = tk.Canvas(outer, bg=BG, highlightthickness=0, bd=0)
scrollbar = tk.Scrollbar(outer, orient="vertical", command=canvas.yview,
bg=BG3, troughcolor=BG, highlightthickness=0, bd=0)
canvas.configure(yscrollcommand=scrollbar.set)
scrollbar.pack(side="right", fill="y")
canvas.pack(side="left", fill="both", expand=True)
content = tk.Frame(canvas, bg=BG, padx=36, pady=16)
content_id = canvas.create_window((0, 0), window=content, anchor="nw")
def _on_content_configure(event):
canvas.configure(scrollregion=canvas.bbox("all"))
content.bind("<Configure>", _on_content_configure)
def _on_canvas_configure(event):
canvas.itemconfigure(content_id, width=event.width)
canvas.bind("<Configure>", _on_canvas_configure)
def _on_mousewheel(event):
canvas.yview_scroll(-1 if event.delta > 0 else 1, "units")
def _on_button4(event):
canvas.yview_scroll(-3, "units")
def _on_button5(event):
canvas.yview_scroll(3, "units")
canvas.bind_all("<MouseWheel>", _on_mousewheel)
canvas.bind_all("<Button-4>", _on_button4)
canvas.bind_all("<Button-5>", _on_button5)
def _cleanup_binds():
try:
canvas.unbind_all("<MouseWheel>")
canvas.unbind_all("<Button-4>")
canvas.unbind_all("<Button-5>")
except tk.TclError:
pass
win.bind("<Destroy>", lambda _: _cleanup_binds())
def section(label):
f = tk.Frame(content, bg=BG)
@ -158,6 +192,44 @@ def _open_main(root: tk.Tk, on_reload) -> None:
f = row("Sprache")
dd(f, lang_var, list(cfg.LANGUAGES.keys()), 14).pack(side="left")
# ── TEXTVERARBEITUNG ──
section("TEXTVERARBEITUNG")
grammar_var = tk.BooleanVar(value=cfg.config.get("grammar_check", True))
f_gc = row("Grammatikkorrektur", hint="pip install language_tool_python")
tk.Checkbutton(f_gc, variable=grammar_var, text="Aktiviert",
bg=BG, fg=FG, selectcolor=BG3, activebackground=BG,
activeforeground=FG, font=FONT_UI,
highlightthickness=0, bd=0).pack(side="left")
paste_delay_var = tk.IntVar(value=cfg.config.get("paste_delay_ms", 300))
f_pd = row("Paste-Verzögerung", hint="ms — höher bei langsamen Apps (z.B. Teams)")
paste_delay_lbl = tk.Label(f_pd, text=f"{paste_delay_var.get()} ms", font=FONT,
bg=BG, fg=FG, width=7, anchor="w")
tk.Scale(f_pd, variable=paste_delay_var, from_=50, to=2000, orient="horizontal",
length=200, bg=BG, fg=FG, troughcolor=BG3, highlightthickness=0,
showvalue=False, bd=0, sliderrelief="flat",
command=lambda v: paste_delay_lbl.config(text=f"{int(float(v))} ms")
).pack(side="left")
paste_delay_lbl.pack(side="left", padx=(8, 0))
duck_var = tk.BooleanVar(value=cfg.config.get("media_duck", True))
f_dk = row("Medien leiser stellen", hint="bei Aufnahme via PulseAudio/PipeWire")
tk.Checkbutton(f_dk, variable=duck_var, text="Aktiviert",
bg=BG, fg=FG, selectcolor=BG3, activebackground=BG,
activeforeground=FG, font=FONT_UI,
highlightthickness=0, bd=0).pack(side="left")
duck_pct_var = tk.IntVar(value=cfg.config.get("duck_percent", 20))
f_dp = row("Ducking-Stärke", hint="% der Originallautstärke")
duck_pct_lbl = tk.Label(f_dp, text=f"{duck_pct_var.get()} %", font=FONT,
bg=BG, fg=FG, width=7, anchor="w")
tk.Scale(f_dp, variable=duck_pct_var, from_=0, to=100, orient="horizontal",
length=200, bg=BG, fg=FG, troughcolor=BG3, highlightthickness=0,
showvalue=False, bd=0, sliderrelief="flat",
command=lambda v: duck_pct_lbl.config(text=f"{int(float(v))} %")
).pack(side="left")
duck_pct_lbl.pack(side="left", padx=(8, 0))
# ── LEISTUNG ──
section("LEISTUNG")
device_var = tk.StringVar(value=cfg.config["device"])
@ -232,6 +304,10 @@ def _open_main(root: tk.Tk, on_reload) -> None:
cfg.config["hotkey"] = hotkey_var.get()
cfg.config["vocab_path"] = vocab_path_var.get()
cfg.config["model_dir"] = model_dir_var.get()
cfg.config["grammar_check"] = grammar_var.get()
cfg.config["paste_delay_ms"] = paste_delay_var.get()
cfg.config["media_duck"] = duck_var.get()
cfg.config["duck_percent"] = duck_pct_var.get()
cfg.save_config()
win.destroy()
threading.Thread(target=on_reload, daemon=True).start()
@ -260,9 +336,9 @@ def _open_main(root: tk.Tk, on_reload) -> None:
win.update_idletasks()
sw = win.winfo_screenwidth()
sh = win.winfo_screenheight()
w = win.winfo_reqwidth()
h = win.winfo_reqheight()
win.geometry(f"+{(sw-w)//2}+{(sh-h)//2}")
w = max(win.winfo_reqwidth(), 700)
h = min(win.winfo_reqheight(), sh - 100)
win.geometry(f"{w}x{h}+{(sw-w)//2}+{(sh-h)//2}")
def _add_installation_section(win, content, section, row, BG, BG3, BORDER, FG, FG2, AMBER, FONT_UI, FONT_S, FONT_B) -> None:

View File

@ -3,7 +3,7 @@ import time
import numpy as np
from faster_whisper import WhisperModel
from whisper_app import app, config, typer
from whisper_app import app, config, grammar, media_duck, typer
def load_model() -> None:
@ -16,6 +16,8 @@ def load_model() -> None:
download_root=model_dir,
)
app.log("Model ready.")
if config.config.get("grammar_check"):
grammar.init(config.config.get("language") or "de", log=app.log)
def stop_and_transcribe() -> None:
@ -62,6 +64,8 @@ def _do_transcribe() -> None:
)
text = " ".join(s.text for s in segments).strip()
text = config.apply_vocab(text)
if config.config.get("grammar_check"):
text = grammar.correct(text)
app.log(f"Result: {repr(text)}")
if text:
@ -77,6 +81,9 @@ def set_state(new_state: app.AppState) -> None: # semi-public, used by main.py
if new_state == app.AppState.RECORDING:
from whisper_app import overlay
overlay.show()
if config.config.get("media_duck"):
media_duck.duck(config.config.get("duck_percent", 20))
else:
from whisper_app import overlay
overlay.hide()
media_duck.unduck()

View File

@ -3,12 +3,39 @@ import shutil
import subprocess
import time
from whisper_app import config
def _pynput_type(text):
from pynput.keyboard import Controller as KeyboardController
KeyboardController().type(text)
def _wl_paste():
"""Read current clipboard contents, returns None on failure."""
try:
result = subprocess.run(
["wl-paste", "--no-newline"],
capture_output=True, timeout=2,
)
if result.returncode == 0:
return result.stdout
except (subprocess.TimeoutExpired, FileNotFoundError):
pass
return None
def _wl_copy_bytes(data):
"""Restore clipboard from raw bytes."""
try:
subprocess.run(
["wl-copy"],
input=data, check=False, timeout=2,
)
except (subprocess.TimeoutExpired, FileNotFoundError):
pass
def type_text(text):
"""Type text into the active window, cross-platform."""
if os.name == "nt":
@ -16,9 +43,14 @@ def type_text(text):
return
session = os.environ.get("XDG_SESSION_TYPE", "")
if session == "wayland" and shutil.which("wl-copy"):
delay = config.config.get("paste_delay_ms", 300) / 1000.0
old_clipboard = _wl_paste()
subprocess.run(["wl-copy", "--", text], check=False)
time.sleep(0.05)
subprocess.run(["xdotool", "key", "ctrl+v"], check=False)
time.sleep(delay)
if old_clipboard is not None:
_wl_copy_bytes(old_clipboard)
elif shutil.which("xdotool"):
subprocess.run(["xdotool", "type", "--clearmodifiers", "--", text], check=False)
else: