whisper-dictation/whisper_app/audio.py

import logging
import threading

import numpy as np
import sounddevice as sd

from whisper_app import app, config

log = logging.getLogger(__name__)


def audio_callback(indata, frames, time_info, status):
    if app.state == app.AppState.RECORDING:
        app.audio_chunks.append(indata.copy())


def resolve_device(name: str | None) -> int | None:
    """Resolve a device name to its current PortAudio index, or None for default."""
    if not name:
        return None
    for i, d in enumerate(sd.query_devices()):
        if d["max_input_channels"] > 0 and d["name"] == name:
            return i
    log.warning("Audio device '%s' not found, using default", name)
    return None


def get_input_devices() -> list[tuple[int, str]]:
    """Return list of (index, name) for input devices on the default host API."""
    default_api = sd.query_hostapis(sd.default.hostapi)["name"]
    return [(i, d["name"]) for i, d in enumerate(sd.query_devices())
            if d["max_input_channels"] > 0
            and sd.query_hostapis(d["hostapi"])["name"] == default_api]


def test_device(device_name: str | None, duration: float,
                on_level: callable, on_done: callable) -> None:
    """Record from device for *duration* seconds, calling on_level(float 0..1) periodically."""
    device = resolve_device(device_name)

    def _run():
        try:
            sr = config.config["sample_rate"]
            block = int(sr * 0.05)  # 50 ms blocks
            peak = 0.0

            def _cb(indata, frames, time_info, status):
                nonlocal peak
                level = float(np.abs(indata).max())
                peak = max(peak, level)
                on_level(min(level / 0.1, 1.0))  # normalize: 0.1 amplitude = 100%

            with sd.InputStream(samplerate=sr, channels=1, device=device,
                                callback=_cb, blocksize=block):
                sd.sleep(int(duration * 1000))
            on_done(peak > 0.001)
        except Exception as e:
            log.error("Mic test failed: %s", e)
            on_done(False)

    threading.Thread(target=_run, daemon=True).start()


def get_audio_stream():
    device = resolve_device(config.config.get("audio_device"))
    sr = config.config["sample_rate"]
    try:
        return sd.InputStream(
            samplerate=sr, channels=1, device=device, callback=audio_callback,
        )
    except sd.PortAudioError:
        log.warning("Audio device %s failed, falling back to default", device)
        return sd.InputStream(
            samplerate=sr, channels=1, device=None, callback=audio_callback,
        )