Ignore output_frame_size parameter (#210)

2026-02-05 18:09:23 +08:00 · 2025-04-01 20:10:27 +02:00
parent 1f0462371e
commit 06885d06c4
9 changed files with 31 additions and 24 deletions
--- a/backend/fastrtc/reply_on_pause.py
+++ b/backend/fastrtc/reply_on_pause.py
@@ -74,7 +74,7 @@ class ReplyOnPause(StreamHandler):
        can_interrupt: bool = True,
        expected_layout: Literal["mono", "stereo"] = "mono",
        output_sample_rate: int = 24000,
-        output_frame_size: int = 480,
+        output_frame_size: int | None = None,  # Deprecated
        input_sample_rate: int = 48000,
        model: PauseDetectionModel | None = None,
    ):
@@ -86,8 +86,6 @@ class ReplyOnPause(StreamHandler):
        )
        self.can_interrupt = can_interrupt
        self.expected_layout: Literal["mono", "stereo"] = expected_layout
-        self.output_sample_rate = output_sample_rate
-        self.output_frame_size = output_frame_size
        self.model = model or get_silero_model()
        self.fn = fn
        self.is_async = inspect.isasyncgenfunction(fn)
--- a/backend/fastrtc/reply_on_stopwords.py
+++ b/backend/fastrtc/reply_on_stopwords.py
@@ -39,7 +39,7 @@ class ReplyOnStopWords(ReplyOnPause):
        can_interrupt: bool = True,
        expected_layout: Literal["mono", "stereo"] = "mono",
        output_sample_rate: int = 24000,
-        output_frame_size: int = 480,
+        output_frame_size: int | None = None,  # Deprecated
        input_sample_rate: int = 48000,
        model: PauseDetectionModel | None = None,
    ):
--- a/backend/fastrtc/tracks.py
+++ b/backend/fastrtc/tracks.py
@@ -10,6 +10,7 @@ import logging
 import threading
 import time
 import traceback
+import warnings
 from abc import ABC, abstractmethod
 from collections.abc import Callable
 from dataclasses import dataclass
@@ -239,13 +240,12 @@ class StreamHandlerBase(ABC):
        self,
        expected_layout: Literal["mono", "stereo"] = "mono",
        output_sample_rate: int = 24000,
-        output_frame_size: int = 960,
+        output_frame_size: int | None = None,
        input_sample_rate: int = 48000,
        fps: int = 30,
    ) -> None:
        self.expected_layout = expected_layout
        self.output_sample_rate = output_sample_rate
-        self.output_frame_size = output_frame_size
        self.input_sample_rate = input_sample_rate
        self.fps = fps
        self.latest_args: list[Any] = []
@@ -257,6 +257,30 @@ class StreamHandlerBase(ABC):
        self._phone_mode = False
        self._clear_queue: Callable | None = None

+        sample_rate_to_frame_size_coef = 50
+        if output_sample_rate % sample_rate_to_frame_size_coef != 0:
+            raise ValueError(
+                "output_sample_rate must be a multiple of "
+                f"{sample_rate_to_frame_size_coef}, got {output_sample_rate}"
+            )
+
+        actual_output_frame_size = output_sample_rate // sample_rate_to_frame_size_coef
+        if (
+            output_frame_size is not None
+            and output_frame_size != actual_output_frame_size
+        ):
+            warnings.warn(
+                "The output_frame_size parameter is deprecated and will be removed "
+                "in a future release. The value passed in will be ignored. "
+                f"The actual output frame size is {actual_output_frame_size}, "
+                f"corresponding to {1 / sample_rate_to_frame_size_coef:.2f}s "
+                f"at {output_sample_rate=}Hz.",
+                # DeprecationWarning is filtered out by default, so use UserWarning
+                UserWarning,
+                stacklevel=2,  # So that the warning points to the user's code
+            )
+        self.output_frame_size = actual_output_frame_size
+
    @property
    def clear_queue(self) -> Callable:
        return cast(Callable, self._clear_queue)
--- a/demo/gemini_audio_video/app.py
+++ b/demo/gemini_audio_video/app.py
@@ -44,7 +44,6 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
        super().__init__(
            "mono",
            output_sample_rate=24000,
-            output_frame_size=480,
            input_sample_rate=16000,
        )
        self.audio_queue = asyncio.Queue()
--- a/demo/gemini_conversation/app.py
+++ b/demo/gemini_conversation/app.py
@@ -42,7 +42,6 @@ class GeminiHandler(AsyncStreamHandler):
        super().__init__(
            expected_layout="mono",
            output_sample_rate=24000,
-            output_frame_size=480,
            input_sample_rate=24000,
        )
        self.input_queue: asyncio.Queue = asyncio.Queue()
--- a/demo/talk_to_azure_openai/app.py
+++ b/demo/talk_to_azure_openai/app.py
@@ -38,7 +38,6 @@ class AzureAudioHandler(AsyncStreamHandler):
        super().__init__(
            expected_layout="mono",
            output_sample_rate=SAMPLE_RATE,
-            output_frame_size=480,
            input_sample_rate=SAMPLE_RATE,
        )
        self.ws = None
--- a/demo/talk_to_gemini/app.py
+++ b/demo/talk_to_gemini/app.py
@@ -43,12 +43,10 @@ class GeminiHandler(AsyncStreamHandler):
        self,
        expected_layout: Literal["mono"] = "mono",
        output_sample_rate: int = 24000,
-        output_frame_size: int = 480,
    ) -> None:
        super().__init__(
            expected_layout,
            output_sample_rate,
-            output_frame_size,
            input_sample_rate=16000,
        )
        self.input_queue: asyncio.Queue = asyncio.Queue()
@@ -59,7 +57,6 @@ class GeminiHandler(AsyncStreamHandler):
        return GeminiHandler(
            expected_layout="mono",
            output_sample_rate=self.output_sample_rate,
-            output_frame_size=self.output_frame_size,
        )

    async def start_up(self):
--- a/demo/talk_to_openai/app.py
+++ b/demo/talk_to_openai/app.py
@@ -33,7 +33,6 @@ class OpenAIHandler(AsyncStreamHandler):
        super().__init__(
            expected_layout="mono",
            output_sample_rate=SAMPLE_RATE,
-            output_frame_size=480,
            input_sample_rate=SAMPLE_RATE,
        )
        self.connection = None
--- a/docs/advanced-configuration.md
+++ b/docs/advanced-configuration.md
@@ -92,27 +92,19 @@ stream = Stream(

 ## Stream Handler Output Audio

-You can configure the output audio chunk size of `ReplyOnPause` (and any `StreamHandler`) 
-with the `output_sample_rate` and `output_frame_size` parameters.
-
-The following code (which uses the default values of these parameters), states that each output chunk will be a frame of 960 samples at a frame rate of `24,000` hz. So it will correspond to `0.04` seconds.
+You can configure the output sampling rate of `ReplyOnPause` (and any `StreamHandler`) 
+with the `output_sample_rate` and parameter. For example:

 ```python
 from fastrtc import ReplyOnPause, Stream

 stream = Stream(
-    handler=ReplyOnPause(..., output_sample_rate=24000, output_frame_size=960),
+    handler=ReplyOnPause(..., output_sample_rate=16000),
    modality="audio",
    mode="send-receive"
 )
 ```

-!!! tip
-
-    In general it is best to leave these settings untouched. In some cases,
-    lowering the output_frame_size can yield smoother audio playback.
-
-
 ## Audio Icon

 You can display an icon of your choice instead of the default wave animation for audio streaming.