Fix audio type conversion (#259)

* Fix conversion between audio dtypes

* Run Pytest in CI

* Add pytest tests path in pyproject.toml

* Fix usages

* Use other PR's test format (more or less)

* Support legacy arguments

* Fix pyproject.toml and test location

* Omit `test` arg in CI, given by pyproject.toml

---------

Co-authored-by: Freddy Boulton <alfonsoboulton@gmail.com>
This commit is contained in:
Václav Volhejn
2025-04-09 16:00:23 +02:00
committed by GitHub
parent fdf6bea1c6
commit 58bccddd93
9 changed files with 128 additions and 43 deletions

View File

@@ -8,7 +8,7 @@ import numpy as np
from huggingface_hub import hf_hub_download
from numpy.typing import NDArray
from ..utils import AudioChunk
from ..utils import AudioChunk, audio_to_float32
from .protocol import PauseDetectionModel
logger = logging.getLogger(__name__)
@@ -274,8 +274,7 @@ class SileroVADModel:
sampling_rate, audio_ = audio
logger.debug("VAD audio shape input: %s", audio_.shape)
try:
if audio_.dtype != np.float32:
audio_ = audio_.astype(np.float32) / 32768.0
audio_ = audio_to_float32(audio_)
sr = 16000
if sr != sampling_rate:
try: