Compare commits
3 Commits
e802ab2b3c
...
6172cc0601
| Author | SHA1 | Date |
|---|---|---|
|
|
6172cc0601 | |
|
|
e06d7a555d | |
|
|
9a0d080fb8 |
|
|
@ -67,7 +67,11 @@
|
|||
"Bash(/run/media/chk/Ventoy/projects/chrka/whisper-dictation/.venv-linux/bin/pip install:*)",
|
||||
"Bash(.venv-linux/bin/python build.py)",
|
||||
"Bash(.venv-linux/bin/pip list:*)",
|
||||
"Bash(.venv-linux/bin/python -c \":*)"
|
||||
"Bash(.venv-linux/bin/python -c \":*)",
|
||||
"Bash(.venv-linux/bin/pyinstaller whisper-dictation.spec --clean)",
|
||||
"Bash(.venv-linux/bin/pyinstaller whisper-dictation.spec --clean -y)",
|
||||
"Bash(pactl --version)",
|
||||
"Bash(pactl list:*)"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -62,6 +62,10 @@
|
|||
{
|
||||
"from": "Kashi",
|
||||
"to": "Cachy"
|
||||
},
|
||||
{
|
||||
"from": "SHP",
|
||||
"to": "SAP"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -34,6 +34,7 @@ _hiddenimports = [
|
|||
'ctranslate2',
|
||||
'faster_whisper',
|
||||
'sounddevice',
|
||||
'language_tool_python',
|
||||
]
|
||||
if _is_windows:
|
||||
_hiddenimports.append('pynput.keyboard._win32')
|
||||
|
|
|
|||
|
|
@ -35,6 +35,10 @@ DEFAULT_CONFIG = {
|
|||
"sample_rate": 16000,
|
||||
"vocab_path": "",
|
||||
"model_dir": "",
|
||||
"grammar_check": True,
|
||||
"paste_delay_ms": 300,
|
||||
"media_duck": True,
|
||||
"duck_percent": 20,
|
||||
}
|
||||
|
||||
MODELS = ["tiny", "base", "small", "medium", "large-v2", "large-v3"]
|
||||
|
|
@ -115,6 +119,23 @@ def apply_vocab(text: str) -> str:
|
|||
return text
|
||||
|
||||
|
||||
_STYLE_HINTS = {
|
||||
"de": "Hallo, wie geht es Ihnen? Ich arbeite an einem wichtigen Projekt. "
|
||||
"Die Ergebnisse der Analyse zeigen deutliche Verbesserungen.",
|
||||
"en": "Hello, how are you? I am working on an important project. "
|
||||
"The analysis results show clear improvements.",
|
||||
"fr": "Bonjour, comment allez-vous ? Je travaille sur un projet important. "
|
||||
"Les résultats de l'analyse montrent des améliorations nettes.",
|
||||
}
|
||||
|
||||
|
||||
def get_initial_prompt() -> str:
|
||||
parts = []
|
||||
lang = config.get("language")
|
||||
hint = _STYLE_HINTS.get(lang)
|
||||
if hint:
|
||||
parts.append(hint)
|
||||
words = vocab.get("words", [])
|
||||
return ", ".join(words) if words else ""
|
||||
if words:
|
||||
parts.append(", ".join(words))
|
||||
return " ".join(parts) if parts else ""
|
||||
|
|
|
|||
|
|
@ -0,0 +1,41 @@
|
|||
"""Optional grammar correction using LanguageTool."""
|
||||
|
||||
_tool = None
|
||||
_lang = None
|
||||
|
||||
_LANG_MAP = {
|
||||
"de": "de-DE",
|
||||
"en": "en-US",
|
||||
"fr": "fr-FR",
|
||||
"es": "es",
|
||||
"it": "it",
|
||||
}
|
||||
|
||||
|
||||
def init(lang, log=print):
|
||||
"""Pre-initialize LanguageTool. Call once at startup."""
|
||||
global _tool, _lang
|
||||
if lang == _lang and _tool is not None:
|
||||
return
|
||||
_lang = lang
|
||||
try:
|
||||
import language_tool_python
|
||||
lt_lang = _LANG_MAP.get(lang, lang or "de-DE")
|
||||
_tool = language_tool_python.LanguageTool(lt_lang)
|
||||
log("Grammar checker ready.")
|
||||
except ImportError:
|
||||
_tool = None
|
||||
log("language_tool_python not installed — grammar check disabled.")
|
||||
except Exception as e:
|
||||
_tool = None
|
||||
log(f"Grammar checker init failed: {e}")
|
||||
|
||||
|
||||
def correct(text):
|
||||
"""Correct grammar, capitalization, and punctuation."""
|
||||
if _tool is None:
|
||||
return text
|
||||
try:
|
||||
return _tool.correct(text)
|
||||
except Exception:
|
||||
return text
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
"""Duck (lower) media volume during recording via PulseAudio/PipeWire."""
|
||||
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
_saved_volumes: dict[int, str] = {}
|
||||
|
||||
|
||||
def _pactl_available() -> bool:
|
||||
return shutil.which("pactl") is not None
|
||||
|
||||
|
||||
def _get_sink_inputs() -> list[tuple[int, str]]:
|
||||
"""Return list of (sink_input_index, current_volume_string)."""
|
||||
try:
|
||||
out = subprocess.run(
|
||||
["pactl", "list", "sink-inputs"],
|
||||
capture_output=True, text=True, timeout=3,
|
||||
).stdout
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError):
|
||||
return []
|
||||
|
||||
results = []
|
||||
current_idx = None
|
||||
for line in out.splitlines():
|
||||
m = re.match(r"Sink Input #(\d+)", line)
|
||||
if m:
|
||||
current_idx = int(m.group(1))
|
||||
continue
|
||||
if current_idx is not None and "Volume:" in line:
|
||||
results.append((current_idx, line.strip()))
|
||||
current_idx = None
|
||||
return results
|
||||
|
||||
|
||||
def _parse_percent(vol_line: str) -> int | None:
|
||||
"""Extract first percentage value from a Volume: line."""
|
||||
m = re.search(r"(\d+)%", vol_line)
|
||||
return int(m.group(1)) if m else None
|
||||
|
||||
|
||||
def duck(duck_percent: int = 20) -> None:
|
||||
"""Lower all sink inputs to duck_percent of their current volume."""
|
||||
_saved_volumes.clear()
|
||||
if not _pactl_available():
|
||||
return
|
||||
for idx, vol_line in _get_sink_inputs():
|
||||
pct = _parse_percent(vol_line)
|
||||
if pct is not None:
|
||||
_saved_volumes[idx] = f"{pct}%"
|
||||
ducked = max(1, int(pct * duck_percent / 100))
|
||||
try:
|
||||
subprocess.run(
|
||||
["pactl", "set-sink-input-volume", str(idx), f"{ducked}%"],
|
||||
check=False, timeout=2,
|
||||
)
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError):
|
||||
pass
|
||||
|
||||
|
||||
def unduck() -> None:
|
||||
"""Restore all sink inputs to their saved volumes."""
|
||||
if not _pactl_available():
|
||||
return
|
||||
for idx, vol in _saved_volumes.items():
|
||||
try:
|
||||
subprocess.run(
|
||||
["pactl", "set-sink-input-volume", str(idx), vol],
|
||||
check=False, timeout=2,
|
||||
)
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError):
|
||||
pass
|
||||
_saved_volumes.clear()
|
||||
|
|
@ -34,8 +34,8 @@ def _open_main(root: tk.Tk, on_reload) -> None:
|
|||
win.title("Whisper Dictation")
|
||||
win.configure(bg=BG)
|
||||
win.attributes("-topmost", True)
|
||||
win.resizable(False, False)
|
||||
win.minsize(700, 0)
|
||||
win.resizable(True, True)
|
||||
win.minsize(700, 500)
|
||||
|
||||
# Global option for OptionMenu dropdowns (dark listbox)
|
||||
win.option_add("*Menu.background", BG3)
|
||||
|
|
@ -53,9 +53,43 @@ def _open_main(root: tk.Tk, on_reload) -> None:
|
|||
tk.Label(hdr, text="Lokale GPU-Transkription · offline · privat",
|
||||
font=FONT_S, bg=BG2, fg=FG2).pack()
|
||||
|
||||
# ── Content ──
|
||||
content = tk.Frame(win, bg=BG, padx=36, pady=16)
|
||||
content.pack(fill="both", expand=True)
|
||||
# ── Scrollable content ──
|
||||
outer = tk.Frame(win, bg=BG)
|
||||
outer.pack(fill="both", expand=True)
|
||||
canvas = tk.Canvas(outer, bg=BG, highlightthickness=0, bd=0)
|
||||
scrollbar = tk.Scrollbar(outer, orient="vertical", command=canvas.yview,
|
||||
bg=BG3, troughcolor=BG, highlightthickness=0, bd=0)
|
||||
canvas.configure(yscrollcommand=scrollbar.set)
|
||||
scrollbar.pack(side="right", fill="y")
|
||||
canvas.pack(side="left", fill="both", expand=True)
|
||||
content = tk.Frame(canvas, bg=BG, padx=36, pady=16)
|
||||
content_id = canvas.create_window((0, 0), window=content, anchor="nw")
|
||||
|
||||
def _on_content_configure(event):
|
||||
canvas.configure(scrollregion=canvas.bbox("all"))
|
||||
content.bind("<Configure>", _on_content_configure)
|
||||
|
||||
def _on_canvas_configure(event):
|
||||
canvas.itemconfigure(content_id, width=event.width)
|
||||
canvas.bind("<Configure>", _on_canvas_configure)
|
||||
|
||||
def _on_mousewheel(event):
|
||||
canvas.yview_scroll(-1 if event.delta > 0 else 1, "units")
|
||||
def _on_button4(event):
|
||||
canvas.yview_scroll(-3, "units")
|
||||
def _on_button5(event):
|
||||
canvas.yview_scroll(3, "units")
|
||||
canvas.bind_all("<MouseWheel>", _on_mousewheel)
|
||||
canvas.bind_all("<Button-4>", _on_button4)
|
||||
canvas.bind_all("<Button-5>", _on_button5)
|
||||
def _cleanup_binds():
|
||||
try:
|
||||
canvas.unbind_all("<MouseWheel>")
|
||||
canvas.unbind_all("<Button-4>")
|
||||
canvas.unbind_all("<Button-5>")
|
||||
except tk.TclError:
|
||||
pass
|
||||
win.bind("<Destroy>", lambda _: _cleanup_binds())
|
||||
|
||||
def section(label):
|
||||
f = tk.Frame(content, bg=BG)
|
||||
|
|
@ -158,6 +192,44 @@ def _open_main(root: tk.Tk, on_reload) -> None:
|
|||
f = row("Sprache")
|
||||
dd(f, lang_var, list(cfg.LANGUAGES.keys()), 14).pack(side="left")
|
||||
|
||||
# ── TEXTVERARBEITUNG ──
|
||||
section("TEXTVERARBEITUNG")
|
||||
grammar_var = tk.BooleanVar(value=cfg.config.get("grammar_check", True))
|
||||
f_gc = row("Grammatikkorrektur", hint="pip install language_tool_python")
|
||||
tk.Checkbutton(f_gc, variable=grammar_var, text="Aktiviert",
|
||||
bg=BG, fg=FG, selectcolor=BG3, activebackground=BG,
|
||||
activeforeground=FG, font=FONT_UI,
|
||||
highlightthickness=0, bd=0).pack(side="left")
|
||||
|
||||
paste_delay_var = tk.IntVar(value=cfg.config.get("paste_delay_ms", 300))
|
||||
f_pd = row("Paste-Verzögerung", hint="ms — höher bei langsamen Apps (z.B. Teams)")
|
||||
paste_delay_lbl = tk.Label(f_pd, text=f"{paste_delay_var.get()} ms", font=FONT,
|
||||
bg=BG, fg=FG, width=7, anchor="w")
|
||||
tk.Scale(f_pd, variable=paste_delay_var, from_=50, to=2000, orient="horizontal",
|
||||
length=200, bg=BG, fg=FG, troughcolor=BG3, highlightthickness=0,
|
||||
showvalue=False, bd=0, sliderrelief="flat",
|
||||
command=lambda v: paste_delay_lbl.config(text=f"{int(float(v))} ms")
|
||||
).pack(side="left")
|
||||
paste_delay_lbl.pack(side="left", padx=(8, 0))
|
||||
|
||||
duck_var = tk.BooleanVar(value=cfg.config.get("media_duck", True))
|
||||
f_dk = row("Medien leiser stellen", hint="bei Aufnahme via PulseAudio/PipeWire")
|
||||
tk.Checkbutton(f_dk, variable=duck_var, text="Aktiviert",
|
||||
bg=BG, fg=FG, selectcolor=BG3, activebackground=BG,
|
||||
activeforeground=FG, font=FONT_UI,
|
||||
highlightthickness=0, bd=0).pack(side="left")
|
||||
|
||||
duck_pct_var = tk.IntVar(value=cfg.config.get("duck_percent", 20))
|
||||
f_dp = row("Ducking-Stärke", hint="% der Originallautstärke")
|
||||
duck_pct_lbl = tk.Label(f_dp, text=f"{duck_pct_var.get()} %", font=FONT,
|
||||
bg=BG, fg=FG, width=7, anchor="w")
|
||||
tk.Scale(f_dp, variable=duck_pct_var, from_=0, to=100, orient="horizontal",
|
||||
length=200, bg=BG, fg=FG, troughcolor=BG3, highlightthickness=0,
|
||||
showvalue=False, bd=0, sliderrelief="flat",
|
||||
command=lambda v: duck_pct_lbl.config(text=f"{int(float(v))} %")
|
||||
).pack(side="left")
|
||||
duck_pct_lbl.pack(side="left", padx=(8, 0))
|
||||
|
||||
# ── LEISTUNG ──
|
||||
section("LEISTUNG")
|
||||
device_var = tk.StringVar(value=cfg.config["device"])
|
||||
|
|
@ -232,6 +304,10 @@ def _open_main(root: tk.Tk, on_reload) -> None:
|
|||
cfg.config["hotkey"] = hotkey_var.get()
|
||||
cfg.config["vocab_path"] = vocab_path_var.get()
|
||||
cfg.config["model_dir"] = model_dir_var.get()
|
||||
cfg.config["grammar_check"] = grammar_var.get()
|
||||
cfg.config["paste_delay_ms"] = paste_delay_var.get()
|
||||
cfg.config["media_duck"] = duck_var.get()
|
||||
cfg.config["duck_percent"] = duck_pct_var.get()
|
||||
cfg.save_config()
|
||||
win.destroy()
|
||||
threading.Thread(target=on_reload, daemon=True).start()
|
||||
|
|
@ -260,9 +336,9 @@ def _open_main(root: tk.Tk, on_reload) -> None:
|
|||
win.update_idletasks()
|
||||
sw = win.winfo_screenwidth()
|
||||
sh = win.winfo_screenheight()
|
||||
w = win.winfo_reqwidth()
|
||||
h = win.winfo_reqheight()
|
||||
win.geometry(f"+{(sw-w)//2}+{(sh-h)//2}")
|
||||
w = max(win.winfo_reqwidth(), 700)
|
||||
h = min(win.winfo_reqheight(), sh - 100)
|
||||
win.geometry(f"{w}x{h}+{(sw-w)//2}+{(sh-h)//2}")
|
||||
|
||||
|
||||
def _add_installation_section(win, content, section, row, BG, BG3, BORDER, FG, FG2, AMBER, FONT_UI, FONT_S, FONT_B) -> None:
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import time
|
|||
import numpy as np
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
from whisper_app import app, config, typer
|
||||
from whisper_app import app, config, grammar, media_duck, typer
|
||||
|
||||
|
||||
def load_model() -> None:
|
||||
|
|
@ -16,6 +16,8 @@ def load_model() -> None:
|
|||
download_root=model_dir,
|
||||
)
|
||||
app.log("Model ready.")
|
||||
if config.config.get("grammar_check"):
|
||||
grammar.init(config.config.get("language") or "de", log=app.log)
|
||||
|
||||
|
||||
def stop_and_transcribe() -> None:
|
||||
|
|
@ -62,6 +64,8 @@ def _do_transcribe() -> None:
|
|||
)
|
||||
text = " ".join(s.text for s in segments).strip()
|
||||
text = config.apply_vocab(text)
|
||||
if config.config.get("grammar_check"):
|
||||
text = grammar.correct(text)
|
||||
app.log(f"Result: {repr(text)}")
|
||||
|
||||
if text:
|
||||
|
|
@ -77,6 +81,9 @@ def set_state(new_state: app.AppState) -> None: # semi-public, used by main.py
|
|||
if new_state == app.AppState.RECORDING:
|
||||
from whisper_app import overlay
|
||||
overlay.show()
|
||||
if config.config.get("media_duck"):
|
||||
media_duck.duck(config.config.get("duck_percent", 20))
|
||||
else:
|
||||
from whisper_app import overlay
|
||||
overlay.hide()
|
||||
media_duck.unduck()
|
||||
|
|
|
|||
|
|
@ -3,12 +3,39 @@ import shutil
|
|||
import subprocess
|
||||
import time
|
||||
|
||||
from whisper_app import config
|
||||
|
||||
|
||||
def _pynput_type(text):
|
||||
from pynput.keyboard import Controller as KeyboardController
|
||||
KeyboardController().type(text)
|
||||
|
||||
|
||||
def _wl_paste():
|
||||
"""Read current clipboard contents, returns None on failure."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["wl-paste", "--no-newline"],
|
||||
capture_output=True, timeout=2,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
return result.stdout
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _wl_copy_bytes(data):
|
||||
"""Restore clipboard from raw bytes."""
|
||||
try:
|
||||
subprocess.run(
|
||||
["wl-copy"],
|
||||
input=data, check=False, timeout=2,
|
||||
)
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError):
|
||||
pass
|
||||
|
||||
|
||||
def type_text(text):
|
||||
"""Type text into the active window, cross-platform."""
|
||||
if os.name == "nt":
|
||||
|
|
@ -16,9 +43,14 @@ def type_text(text):
|
|||
return
|
||||
session = os.environ.get("XDG_SESSION_TYPE", "")
|
||||
if session == "wayland" and shutil.which("wl-copy"):
|
||||
delay = config.config.get("paste_delay_ms", 300) / 1000.0
|
||||
old_clipboard = _wl_paste()
|
||||
subprocess.run(["wl-copy", "--", text], check=False)
|
||||
time.sleep(0.05)
|
||||
subprocess.run(["xdotool", "key", "ctrl+v"], check=False)
|
||||
time.sleep(delay)
|
||||
if old_clipboard is not None:
|
||||
_wl_copy_bytes(old_clipboard)
|
||||
elif shutil.which("xdotool"):
|
||||
subprocess.run(["xdotool", "type", "--clearmodifiers", "--", text], check=False)
|
||||
else:
|
||||
|
|
|
|||
Loading…
Reference in New Issue