Ignore output_frame_size parameter (#210)

This commit is contained in:
Václav Volhejn
2025-04-01 20:10:27 +02:00
committed by GitHub
parent 1f0462371e
commit 06885d06c4
9 changed files with 31 additions and 24 deletions

View File

@@ -74,7 +74,7 @@ class ReplyOnPause(StreamHandler):
can_interrupt: bool = True, can_interrupt: bool = True,
expected_layout: Literal["mono", "stereo"] = "mono", expected_layout: Literal["mono", "stereo"] = "mono",
output_sample_rate: int = 24000, output_sample_rate: int = 24000,
output_frame_size: int = 480, output_frame_size: int | None = None, # Deprecated
input_sample_rate: int = 48000, input_sample_rate: int = 48000,
model: PauseDetectionModel | None = None, model: PauseDetectionModel | None = None,
): ):
@@ -86,8 +86,6 @@ class ReplyOnPause(StreamHandler):
) )
self.can_interrupt = can_interrupt self.can_interrupt = can_interrupt
self.expected_layout: Literal["mono", "stereo"] = expected_layout self.expected_layout: Literal["mono", "stereo"] = expected_layout
self.output_sample_rate = output_sample_rate
self.output_frame_size = output_frame_size
self.model = model or get_silero_model() self.model = model or get_silero_model()
self.fn = fn self.fn = fn
self.is_async = inspect.isasyncgenfunction(fn) self.is_async = inspect.isasyncgenfunction(fn)

View File

@@ -39,7 +39,7 @@ class ReplyOnStopWords(ReplyOnPause):
can_interrupt: bool = True, can_interrupt: bool = True,
expected_layout: Literal["mono", "stereo"] = "mono", expected_layout: Literal["mono", "stereo"] = "mono",
output_sample_rate: int = 24000, output_sample_rate: int = 24000,
output_frame_size: int = 480, output_frame_size: int | None = None, # Deprecated
input_sample_rate: int = 48000, input_sample_rate: int = 48000,
model: PauseDetectionModel | None = None, model: PauseDetectionModel | None = None,
): ):

View File

@@ -10,6 +10,7 @@ import logging
import threading import threading
import time import time
import traceback import traceback
import warnings
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from collections.abc import Callable from collections.abc import Callable
from dataclasses import dataclass from dataclasses import dataclass
@@ -239,13 +240,12 @@ class StreamHandlerBase(ABC):
self, self,
expected_layout: Literal["mono", "stereo"] = "mono", expected_layout: Literal["mono", "stereo"] = "mono",
output_sample_rate: int = 24000, output_sample_rate: int = 24000,
output_frame_size: int = 960, output_frame_size: int | None = None,
input_sample_rate: int = 48000, input_sample_rate: int = 48000,
fps: int = 30, fps: int = 30,
) -> None: ) -> None:
self.expected_layout = expected_layout self.expected_layout = expected_layout
self.output_sample_rate = output_sample_rate self.output_sample_rate = output_sample_rate
self.output_frame_size = output_frame_size
self.input_sample_rate = input_sample_rate self.input_sample_rate = input_sample_rate
self.fps = fps self.fps = fps
self.latest_args: list[Any] = [] self.latest_args: list[Any] = []
@@ -257,6 +257,30 @@ class StreamHandlerBase(ABC):
self._phone_mode = False self._phone_mode = False
self._clear_queue: Callable | None = None self._clear_queue: Callable | None = None
sample_rate_to_frame_size_coef = 50
if output_sample_rate % sample_rate_to_frame_size_coef != 0:
raise ValueError(
"output_sample_rate must be a multiple of "
f"{sample_rate_to_frame_size_coef}, got {output_sample_rate}"
)
actual_output_frame_size = output_sample_rate // sample_rate_to_frame_size_coef
if (
output_frame_size is not None
and output_frame_size != actual_output_frame_size
):
warnings.warn(
"The output_frame_size parameter is deprecated and will be removed "
"in a future release. The value passed in will be ignored. "
f"The actual output frame size is {actual_output_frame_size}, "
f"corresponding to {1 / sample_rate_to_frame_size_coef:.2f}s "
f"at {output_sample_rate=}Hz.",
# DeprecationWarning is filtered out by default, so use UserWarning
UserWarning,
stacklevel=2, # So that the warning points to the user's code
)
self.output_frame_size = actual_output_frame_size
@property @property
def clear_queue(self) -> Callable: def clear_queue(self) -> Callable:
return cast(Callable, self._clear_queue) return cast(Callable, self._clear_queue)

View File

@@ -44,7 +44,6 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
super().__init__( super().__init__(
"mono", "mono",
output_sample_rate=24000, output_sample_rate=24000,
output_frame_size=480,
input_sample_rate=16000, input_sample_rate=16000,
) )
self.audio_queue = asyncio.Queue() self.audio_queue = asyncio.Queue()

View File

@@ -42,7 +42,6 @@ class GeminiHandler(AsyncStreamHandler):
super().__init__( super().__init__(
expected_layout="mono", expected_layout="mono",
output_sample_rate=24000, output_sample_rate=24000,
output_frame_size=480,
input_sample_rate=24000, input_sample_rate=24000,
) )
self.input_queue: asyncio.Queue = asyncio.Queue() self.input_queue: asyncio.Queue = asyncio.Queue()

View File

@@ -38,7 +38,6 @@ class AzureAudioHandler(AsyncStreamHandler):
super().__init__( super().__init__(
expected_layout="mono", expected_layout="mono",
output_sample_rate=SAMPLE_RATE, output_sample_rate=SAMPLE_RATE,
output_frame_size=480,
input_sample_rate=SAMPLE_RATE, input_sample_rate=SAMPLE_RATE,
) )
self.ws = None self.ws = None

View File

@@ -43,12 +43,10 @@ class GeminiHandler(AsyncStreamHandler):
self, self,
expected_layout: Literal["mono"] = "mono", expected_layout: Literal["mono"] = "mono",
output_sample_rate: int = 24000, output_sample_rate: int = 24000,
output_frame_size: int = 480,
) -> None: ) -> None:
super().__init__( super().__init__(
expected_layout, expected_layout,
output_sample_rate, output_sample_rate,
output_frame_size,
input_sample_rate=16000, input_sample_rate=16000,
) )
self.input_queue: asyncio.Queue = asyncio.Queue() self.input_queue: asyncio.Queue = asyncio.Queue()
@@ -59,7 +57,6 @@ class GeminiHandler(AsyncStreamHandler):
return GeminiHandler( return GeminiHandler(
expected_layout="mono", expected_layout="mono",
output_sample_rate=self.output_sample_rate, output_sample_rate=self.output_sample_rate,
output_frame_size=self.output_frame_size,
) )
async def start_up(self): async def start_up(self):

View File

@@ -33,7 +33,6 @@ class OpenAIHandler(AsyncStreamHandler):
super().__init__( super().__init__(
expected_layout="mono", expected_layout="mono",
output_sample_rate=SAMPLE_RATE, output_sample_rate=SAMPLE_RATE,
output_frame_size=480,
input_sample_rate=SAMPLE_RATE, input_sample_rate=SAMPLE_RATE,
) )
self.connection = None self.connection = None

View File

@@ -92,27 +92,19 @@ stream = Stream(
## Stream Handler Output Audio ## Stream Handler Output Audio
You can configure the output audio chunk size of `ReplyOnPause` (and any `StreamHandler`) You can configure the output sampling rate of `ReplyOnPause` (and any `StreamHandler`)
with the `output_sample_rate` and `output_frame_size` parameters. with the `output_sample_rate` and parameter. For example:
The following code (which uses the default values of these parameters), states that each output chunk will be a frame of 960 samples at a frame rate of `24,000` hz. So it will correspond to `0.04` seconds.
```python ```python
from fastrtc import ReplyOnPause, Stream from fastrtc import ReplyOnPause, Stream
stream = Stream( stream = Stream(
handler=ReplyOnPause(..., output_sample_rate=24000, output_frame_size=960), handler=ReplyOnPause(..., output_sample_rate=16000),
modality="audio", modality="audio",
mode="send-receive" mode="send-receive"
) )
``` ```
!!! tip
In general it is best to leave these settings untouched. In some cases,
lowering the output_frame_size can yield smoother audio playback.
## Audio Icon ## Audio Icon
You can display an icon of your choice instead of the default wave animation for audio streaming. You can display an icon of your choice instead of the default wave animation for audio streaming.