whisper-dictation/whisper_app/transcriber.py

87 lines
2.5 KiB
Python

import time
import numpy as np
from faster_whisper import WhisperModel
from whisper_app import app, config, grammar, typer
def load_model() -> None:
app.log(f"Loading {config.config['model']} on {config.config['device']}...")
model_dir = config.config.get("model_dir") or None
app.model = WhisperModel(
config.config["model"],
device=config.config["device"],
compute_type=config.config["compute_type"],
download_root=model_dir,
)
app.log("Model ready.")
if config.config.get("grammar_check"):
grammar.init(config.config.get("language") or "de", log=app.log)
def stop_and_transcribe() -> None:
if app.state != app.AppState.RECORDING:
return
set_state(app.AppState.TRANSCRIBING)
try:
_do_transcribe()
except Exception as e:
app.log(f"Transcription error: {e}")
finally:
set_state(app.AppState.IDLE)
def _do_transcribe() -> None:
chunks = list(app.audio_chunks)
if not chunks:
return
if app.model is None:
app.log("Model not loaded yet — skipped.")
return
audio = np.concatenate(chunks, axis=0).flatten().astype(np.float32)
duration = len(audio) / config.config["sample_rate"]
rms = float(np.sqrt(np.mean(audio ** 2)))
app.log(f"Audio: {duration:.1f}s RMS: {rms:.5f}")
if duration < 0.3 or rms < 0.0001:
app.log("Too short or silent — skipped.")
return
target_rms = 0.05
if rms > 0:
audio = audio * (target_rms / rms)
audio = np.clip(audio, -1.0, 1.0)
lang = config.config["language"] if config.config["language"] else None
prompt = config.get_initial_prompt()
segments, _ = app.model.transcribe(
audio, language=lang, beam_size=5, vad_filter=True,
initial_prompt=prompt if prompt else None,
)
text = " ".join(s.text for s in segments).strip()
text = config.apply_vocab(text)
if config.config.get("grammar_check"):
text = grammar.correct(text)
app.log(f"Result: {repr(text)}")
if text:
time.sleep(0.15)
typer.type_text(text)
def set_state(new_state: app.AppState) -> None: # semi-public, used by main.py
app.state = new_state
if app.tray_icon:
from whisper_app import tray
tray.update_icon(new_state)
if new_state == app.AppState.RECORDING:
from whisper_app import overlay
overlay.show()
else:
from whisper_app import overlay
overlay.hide()