diff --git a/.claude/settings.local.json b/.claude/settings.local.json index cd1b9c0..b7e39cc 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -69,7 +69,9 @@ "Bash(.venv-linux/bin/pip list:*)", "Bash(.venv-linux/bin/python -c \":*)", "Bash(.venv-linux/bin/pyinstaller whisper-dictation.spec --clean)", - "Bash(.venv-linux/bin/pyinstaller whisper-dictation.spec --clean -y)" + "Bash(.venv-linux/bin/pyinstaller whisper-dictation.spec --clean -y)", + "Bash(pactl --version)", + "Bash(pactl list:*)" ] } } diff --git a/whisper_app/config.py b/whisper_app/config.py index f4159ca..24cfe21 100644 --- a/whisper_app/config.py +++ b/whisper_app/config.py @@ -37,6 +37,8 @@ DEFAULT_CONFIG = { "model_dir": "", "grammar_check": True, "paste_delay_ms": 300, + "media_duck": True, + "duck_percent": 20, } MODELS = ["tiny", "base", "small", "medium", "large-v2", "large-v3"] diff --git a/whisper_app/media_duck.py b/whisper_app/media_duck.py new file mode 100644 index 0000000..b54bc9c --- /dev/null +++ b/whisper_app/media_duck.py @@ -0,0 +1,74 @@ +"""Duck (lower) media volume during recording via PulseAudio/PipeWire.""" + +import re +import shutil +import subprocess + +_saved_volumes: dict[int, str] = {} + + +def _pactl_available() -> bool: + return shutil.which("pactl") is not None + + +def _get_sink_inputs() -> list[tuple[int, str]]: + """Return list of (sink_input_index, current_volume_string).""" + try: + out = subprocess.run( + ["pactl", "list", "sink-inputs"], + capture_output=True, text=True, timeout=3, + ).stdout + except (subprocess.TimeoutExpired, FileNotFoundError): + return [] + + results = [] + current_idx = None + for line in out.splitlines(): + m = re.match(r"Sink Input #(\d+)", line) + if m: + current_idx = int(m.group(1)) + continue + if current_idx is not None and "Volume:" in line: + results.append((current_idx, line.strip())) + current_idx = None + return results + + +def _parse_percent(vol_line: str) -> int | None: + """Extract first percentage value from a Volume: line.""" + m = re.search(r"(\d+)%", vol_line) + return int(m.group(1)) if m else None + + +def duck(duck_percent: int = 20) -> None: + """Lower all sink inputs to duck_percent of their current volume.""" + _saved_volumes.clear() + if not _pactl_available(): + return + for idx, vol_line in _get_sink_inputs(): + pct = _parse_percent(vol_line) + if pct is not None: + _saved_volumes[idx] = f"{pct}%" + ducked = max(1, int(pct * duck_percent / 100)) + try: + subprocess.run( + ["pactl", "set-sink-input-volume", str(idx), f"{ducked}%"], + check=False, timeout=2, + ) + except (subprocess.TimeoutExpired, FileNotFoundError): + pass + + +def unduck() -> None: + """Restore all sink inputs to their saved volumes.""" + if not _pactl_available(): + return + for idx, vol in _saved_volumes.items(): + try: + subprocess.run( + ["pactl", "set-sink-input-volume", str(idx), vol], + check=False, timeout=2, + ) + except (subprocess.TimeoutExpired, FileNotFoundError): + pass + _saved_volumes.clear() diff --git a/whisper_app/settings_window.py b/whisper_app/settings_window.py index d536a24..65e2b99 100644 --- a/whisper_app/settings_window.py +++ b/whisper_app/settings_window.py @@ -34,8 +34,8 @@ def _open_main(root: tk.Tk, on_reload) -> None: win.title("Whisper Dictation") win.configure(bg=BG) win.attributes("-topmost", True) - win.resizable(False, False) - win.minsize(700, 0) + win.resizable(True, True) + win.minsize(700, 500) # Global option for OptionMenu dropdowns (dark listbox) win.option_add("*Menu.background", BG3) @@ -53,9 +53,43 @@ def _open_main(root: tk.Tk, on_reload) -> None: tk.Label(hdr, text="Lokale GPU-Transkription · offline · privat", font=FONT_S, bg=BG2, fg=FG2).pack() - # ── Content ── - content = tk.Frame(win, bg=BG, padx=36, pady=16) - content.pack(fill="both", expand=True) + # ── Scrollable content ── + outer = tk.Frame(win, bg=BG) + outer.pack(fill="both", expand=True) + canvas = tk.Canvas(outer, bg=BG, highlightthickness=0, bd=0) + scrollbar = tk.Scrollbar(outer, orient="vertical", command=canvas.yview, + bg=BG3, troughcolor=BG, highlightthickness=0, bd=0) + canvas.configure(yscrollcommand=scrollbar.set) + scrollbar.pack(side="right", fill="y") + canvas.pack(side="left", fill="both", expand=True) + content = tk.Frame(canvas, bg=BG, padx=36, pady=16) + content_id = canvas.create_window((0, 0), window=content, anchor="nw") + + def _on_content_configure(event): + canvas.configure(scrollregion=canvas.bbox("all")) + content.bind("", _on_content_configure) + + def _on_canvas_configure(event): + canvas.itemconfigure(content_id, width=event.width) + canvas.bind("", _on_canvas_configure) + + def _on_mousewheel(event): + canvas.yview_scroll(-1 if event.delta > 0 else 1, "units") + def _on_button4(event): + canvas.yview_scroll(-3, "units") + def _on_button5(event): + canvas.yview_scroll(3, "units") + canvas.bind_all("", _on_mousewheel) + canvas.bind_all("", _on_button4) + canvas.bind_all("", _on_button5) + def _cleanup_binds(): + try: + canvas.unbind_all("") + canvas.unbind_all("") + canvas.unbind_all("") + except tk.TclError: + pass + win.bind("", lambda _: _cleanup_binds()) def section(label): f = tk.Frame(content, bg=BG) @@ -178,6 +212,24 @@ def _open_main(root: tk.Tk, on_reload) -> None: ).pack(side="left") paste_delay_lbl.pack(side="left", padx=(8, 0)) + duck_var = tk.BooleanVar(value=cfg.config.get("media_duck", True)) + f_dk = row("Medien leiser stellen", hint="bei Aufnahme via PulseAudio/PipeWire") + tk.Checkbutton(f_dk, variable=duck_var, text="Aktiviert", + bg=BG, fg=FG, selectcolor=BG3, activebackground=BG, + activeforeground=FG, font=FONT_UI, + highlightthickness=0, bd=0).pack(side="left") + + duck_pct_var = tk.IntVar(value=cfg.config.get("duck_percent", 20)) + f_dp = row("Ducking-Stärke", hint="% der Originallautstärke") + duck_pct_lbl = tk.Label(f_dp, text=f"{duck_pct_var.get()} %", font=FONT, + bg=BG, fg=FG, width=7, anchor="w") + tk.Scale(f_dp, variable=duck_pct_var, from_=0, to=100, orient="horizontal", + length=200, bg=BG, fg=FG, troughcolor=BG3, highlightthickness=0, + showvalue=False, bd=0, sliderrelief="flat", + command=lambda v: duck_pct_lbl.config(text=f"{int(float(v))} %") + ).pack(side="left") + duck_pct_lbl.pack(side="left", padx=(8, 0)) + # ── LEISTUNG ── section("LEISTUNG") device_var = tk.StringVar(value=cfg.config["device"]) @@ -254,6 +306,8 @@ def _open_main(root: tk.Tk, on_reload) -> None: cfg.config["model_dir"] = model_dir_var.get() cfg.config["grammar_check"] = grammar_var.get() cfg.config["paste_delay_ms"] = paste_delay_var.get() + cfg.config["media_duck"] = duck_var.get() + cfg.config["duck_percent"] = duck_pct_var.get() cfg.save_config() win.destroy() threading.Thread(target=on_reload, daemon=True).start() @@ -282,9 +336,9 @@ def _open_main(root: tk.Tk, on_reload) -> None: win.update_idletasks() sw = win.winfo_screenwidth() sh = win.winfo_screenheight() - w = win.winfo_reqwidth() - h = win.winfo_reqheight() - win.geometry(f"+{(sw-w)//2}+{(sh-h)//2}") + w = max(win.winfo_reqwidth(), 700) + h = min(win.winfo_reqheight(), sh - 100) + win.geometry(f"{w}x{h}+{(sw-w)//2}+{(sh-h)//2}") def _add_installation_section(win, content, section, row, BG, BG3, BORDER, FG, FG2, AMBER, FONT_UI, FONT_S, FONT_B) -> None: diff --git a/whisper_app/transcriber.py b/whisper_app/transcriber.py index 26f101e..69ed968 100644 --- a/whisper_app/transcriber.py +++ b/whisper_app/transcriber.py @@ -3,7 +3,7 @@ import time import numpy as np from faster_whisper import WhisperModel -from whisper_app import app, config, grammar, typer +from whisper_app import app, config, grammar, media_duck, typer def load_model() -> None: @@ -81,6 +81,9 @@ def set_state(new_state: app.AppState) -> None: # semi-public, used by main.py if new_state == app.AppState.RECORDING: from whisper_app import overlay overlay.show() + if config.config.get("media_duck"): + media_duck.duck(config.config.get("duck_percent", 20)) else: from whisper_app import overlay overlay.hide() + media_duck.unduck()