43 lines
1.4 KiB
Python
43 lines
1.4 KiB
Python
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Callable, Optional
|
|
|
|
|
|
@dataclass
|
|
class TranscriptionResult:
|
|
success: bool
|
|
text: str
|
|
source: str # "whisper" | "premium" | "none"
|
|
|
|
|
|
class Transcriber:
|
|
"""Wraps faster-whisper; injectable model_factory for tests."""
|
|
|
|
def __init__(self, model_factory: Optional[Callable[[], object]] = None,
|
|
model_name: str = "large-v3", device: str = "cpu"):
|
|
self._factory = model_factory
|
|
self._model_name = model_name
|
|
self._device = device
|
|
self._model = None
|
|
|
|
def _get_model(self):
|
|
if self._factory is not None:
|
|
return self._factory()
|
|
if self._model is None:
|
|
from faster_whisper import WhisperModel
|
|
self._model = WhisperModel(self._model_name, device=self._device)
|
|
return self._model
|
|
|
|
def transcribe(self, audio_path: Path, premium_text: Optional[str] = None) -> TranscriptionResult:
|
|
try:
|
|
model = self._get_model()
|
|
segments, _info = model.transcribe(str(audio_path), language="de", beam_size=5)
|
|
text = " ".join(s.text.strip() for s in segments).strip()
|
|
if text:
|
|
return TranscriptionResult(True, text, "whisper")
|
|
except Exception:
|
|
pass
|
|
if premium_text:
|
|
return TranscriptionResult(True, premium_text, "premium")
|
|
return TranscriptionResult(False, "", "none")
|