Fix audio type conversion (#259)

* Fix conversion between audio dtypes * Run Pytest in CI * Add pytest tests path in pyproject.toml * Fix usages * Use other PR's test format (more or less) * Support legacy arguments * Fix pyproject.toml and test location * Omit `test` arg in CI, given by pyproject.toml --------- Co-authored-by: Freddy Boulton <alfonsoboulton@gmail.com>
2026-02-05 18:09:23 +08:00 · 2025-04-09 16:00:23 +02:00
parent fdf6bea1c6
commit 58bccddd93
9 changed files with 128 additions and 43 deletions
--- a/backend/fastrtc/pause_detection/silero.py
+++ b/backend/fastrtc/pause_detection/silero.py
@@ -8,7 +8,7 @@ import numpy as np
 from huggingface_hub import hf_hub_download
 from numpy.typing import NDArray

-from ..utils import AudioChunk
+from ..utils import AudioChunk, audio_to_float32
 from .protocol import PauseDetectionModel

 logger = logging.getLogger(__name__)
@@ -274,8 +274,7 @@ class SileroVADModel:
        sampling_rate, audio_ = audio
        logger.debug("VAD audio shape input: %s", audio_.shape)
        try:
-            if audio_.dtype != np.float32:
-                audio_ = audio_.astype(np.float32) / 32768.0
+            audio_ = audio_to_float32(audio_)
            sr = 16000
            if sr != sampling_rate:
                try: