Fix audio type conversion (#259)

* Fix conversion between audio dtypes * Run Pytest in CI * Add pytest tests path in pyproject.toml * Fix usages * Use other PR's test format (more or less) * Support legacy arguments * Fix pyproject.toml and test location * Omit `test` arg in CI, given by pyproject.toml --------- Co-authored-by: Freddy Boulton <alfonsoboulton@gmail.com>
2026-02-05 18:09:23 +08:00 · 2025-04-09 16:00:23 +02:00
parent fdf6bea1c6
commit 58bccddd93
9 changed files with 128 additions and 43 deletions
--- a/backend/fastrtc/utils.py
+++ b/backend/fastrtc/utils.py
@@ -7,6 +7,7 @@ import json
 import logging
 import tempfile
 import traceback
+import warnings
 from collections.abc import Callable, Coroutine
 from contextvars import ContextVar
 from dataclasses import dataclass
@@ -211,7 +212,7 @@ async def player_worker_decode(
                first_sample_rate = sample_rate

            if format == "s16":
-                audio_array = audio_to_float32((sample_rate, audio_array))
+                audio_array = audio_to_float32(audio_array)

            if first_sample_rate != sample_rate:
                audio_array = librosa.resample(
@@ -319,17 +320,15 @@ def audio_to_file(audio: tuple[int, NDArray[np.int16 | np.float32]]) -> str:


 def audio_to_float32(
-    audio: tuple[int, NDArray[np.int16 | np.float32]],
+    audio: NDArray[np.int16 | np.float32] | tuple[int, NDArray[np.int16 | np.float32]],
 ) -> NDArray[np.float32]:
    """
    Convert an audio tuple containing sample rate (int16) and numpy array data to float32.

    Parameters
    ----------
-    audio : tuple[int, np.ndarray]
-        A tuple containing:
-            - sample_rate (int): The audio sample rate in Hz
-            - data (np.ndarray): The audio data as a numpy array
+    audio : np.ndarray
+        The audio data as a numpy array

    Returns
    -------
@@ -338,26 +337,39 @@ def audio_to_float32(

    Example
    -------
-    >>> sample_rate = 44100
    >>> audio_data = np.array([0.1, -0.2, 0.3])  # Example audio samples
-    >>> audio_tuple = (sample_rate, audio_data)
-    >>> audio_float32 = audio_to_float32(audio_tuple)
+    >>> audio_float32 = audio_to_float32(audio_data)
    """
-    return audio[1].astype(np.float32) / 32768.0
+    if isinstance(audio, tuple):
+        warnings.warn(
+            UserWarning(
+                "Passing a (sr, audio) tuple to audio_to_float32() is deprecated "
+                "and will be removed in a future release. Pass only the audio array."
+            ),
+            stacklevel=2,  # So that the warning points to the user's code
+        )
+        _sr, audio = audio
+
+    if audio.dtype == np.int16:
+        # Divide by 32768.0 so that the values are in the range [-1.0, 1.0).
+        # 1.0 can actually never be reached because the int16 range is [-32768, 32767].
+        return audio.astype(np.float32) / 32768.0
+    elif audio.dtype == np.float32:
+        return audio  # type: ignore
+    else:
+        raise TypeError(f"Unsupported audio data type: {audio.dtype}")


 def audio_to_int16(
-    audio: tuple[int, NDArray[np.int16 | np.float32]],
+    audio: NDArray[np.int16 | np.float32] | tuple[int, NDArray[np.int16 | np.float32]],
 ) -> NDArray[np.int16]:
    """
    Convert an audio tuple containing sample rate and numpy array data to int16.

    Parameters
    ----------
-    audio : tuple[int, np.ndarray]
-        A tuple containing:
-            - sample_rate (int): The audio sample rate in Hz
-            - data (np.ndarray): The audio data as a numpy array
+    audio : np.ndarray
+        The audio data as a numpy array

    Returns
    -------
@@ -366,18 +378,27 @@ def audio_to_int16(

    Example
    -------
-    >>> sample_rate = 44100
    >>> audio_data = np.array([0.1, -0.2, 0.3], dtype=np.float32)  # Example audio samples
-    >>> audio_tuple = (sample_rate, audio_data)
-    >>> audio_int16 = audio_to_int16(audio_tuple)
+    >>> audio_int16 = audio_to_int16(audio_data)
    """
-    if audio[1].dtype == np.int16:
-        return audio[1]  # type: ignore
-    elif audio[1].dtype == np.float32:
-        # Convert float32 to int16 by scaling to the int16 range
-        return (audio[1] * 32767.0).astype(np.int16)
+    if isinstance(audio, tuple):
+        warnings.warn(
+            UserWarning(
+                "Passing a (sr, audio) tuple to audio_to_float32() is deprecated "
+                "and will be removed in a future release. Pass only the audio array."
+            ),
+            stacklevel=2,  # So that the warning points to the user's code
+        )
+        _sr, audio = audio
+
+    if audio.dtype == np.int16:
+        return audio  # type: ignore
+    elif audio.dtype == np.float32:
+        # Convert float32 to int16 by scaling to the int16 range.
+        # Multiply by 32767 and not 32768 so that int16 doesn't overflow.
+        return (audio * 32767.0).astype(np.int16)
    else:
-        raise TypeError(f"Unsupported audio data type: {audio[1].dtype}")
+        raise TypeError(f"Unsupported audio data type: {audio.dtype}")


 def aggregate_bytes_to_16bit(chunks_iterator):