diff --git a/config.json b/config.json index f8fdbcb..e1b0456 100644 --- a/config.json +++ b/config.json @@ -3,4 +3,4 @@ "model": "medium", "language": "de", "sample_rate": 16000 -} +} \ No newline at end of file diff --git a/dictate.py b/dictate.py index bc5bb10..96db8b1 100644 --- a/dictate.py +++ b/dictate.py @@ -231,11 +231,17 @@ def stop_and_transcribe(): rms = float(np.sqrt(np.mean(audio ** 2))) print(f"Audio: {duration:.1f}s RMS: {rms:.5f}", flush=True) - if duration < 0.3 or rms < 0.0005: + if duration < 0.3 or rms < 0.0001: print("Too short or silent — skipped.", flush=True) set_state(AppState.IDLE) return + # Normalize to target RMS so Whisper gets consistent signal level + target_rms = 0.05 + if rms > 0: + audio = audio * (target_rms / rms) + audio = np.clip(audio, -1.0, 1.0) + lang = config["language"] if config["language"] else None prompt = get_initial_prompt() segments, _ = model.transcribe( diff --git a/install.bat b/install.bat index 7af6d26..c0dbcf6 100644 --- a/install.bat +++ b/install.bat @@ -6,11 +6,11 @@ py -3.13 -m venv .venv-windows set "VENV=%~dp0.venv-windows" echo Installing dependencies... -"%VENV%\Scripts\pip" install --upgrade pip -"%VENV%\Scripts\pip" install -r requirements.txt +"%VENV%\Scripts\python.exe" -m pip install --upgrade pip +"%VENV%\Scripts\python.exe" -m pip install -r requirements.txt echo Installing CUDA 12 DLLs (required for GPU acceleration)... -"%VENV%\Scripts\pip" install -r requirements-cuda.txt +"%VENV%\Scripts\python.exe" -m pip install -r requirements-cuda.txt echo. echo Done. Run start.bat to launch.