Ignore output_frame_size parameter (#210)

This commit is contained in:
Václav Volhejn
2025-04-01 20:10:27 +02:00
committed by GitHub
parent 1f0462371e
commit 06885d06c4
9 changed files with 31 additions and 24 deletions

View File

@@ -74,7 +74,7 @@ class ReplyOnPause(StreamHandler):
can_interrupt: bool = True,
expected_layout: Literal["mono", "stereo"] = "mono",
output_sample_rate: int = 24000,
output_frame_size: int = 480,
output_frame_size: int | None = None, # Deprecated
input_sample_rate: int = 48000,
model: PauseDetectionModel | None = None,
):
@@ -86,8 +86,6 @@ class ReplyOnPause(StreamHandler):
)
self.can_interrupt = can_interrupt
self.expected_layout: Literal["mono", "stereo"] = expected_layout
self.output_sample_rate = output_sample_rate
self.output_frame_size = output_frame_size
self.model = model or get_silero_model()
self.fn = fn
self.is_async = inspect.isasyncgenfunction(fn)

View File

@@ -39,7 +39,7 @@ class ReplyOnStopWords(ReplyOnPause):
can_interrupt: bool = True,
expected_layout: Literal["mono", "stereo"] = "mono",
output_sample_rate: int = 24000,
output_frame_size: int = 480,
output_frame_size: int | None = None, # Deprecated
input_sample_rate: int = 48000,
model: PauseDetectionModel | None = None,
):

View File

@@ -10,6 +10,7 @@ import logging
import threading
import time
import traceback
import warnings
from abc import ABC, abstractmethod
from collections.abc import Callable
from dataclasses import dataclass
@@ -239,13 +240,12 @@ class StreamHandlerBase(ABC):
self,
expected_layout: Literal["mono", "stereo"] = "mono",
output_sample_rate: int = 24000,
output_frame_size: int = 960,
output_frame_size: int | None = None,
input_sample_rate: int = 48000,
fps: int = 30,
) -> None:
self.expected_layout = expected_layout
self.output_sample_rate = output_sample_rate
self.output_frame_size = output_frame_size
self.input_sample_rate = input_sample_rate
self.fps = fps
self.latest_args: list[Any] = []
@@ -257,6 +257,30 @@ class StreamHandlerBase(ABC):
self._phone_mode = False
self._clear_queue: Callable | None = None
sample_rate_to_frame_size_coef = 50
if output_sample_rate % sample_rate_to_frame_size_coef != 0:
raise ValueError(
"output_sample_rate must be a multiple of "
f"{sample_rate_to_frame_size_coef}, got {output_sample_rate}"
)
actual_output_frame_size = output_sample_rate // sample_rate_to_frame_size_coef
if (
output_frame_size is not None
and output_frame_size != actual_output_frame_size
):
warnings.warn(
"The output_frame_size parameter is deprecated and will be removed "
"in a future release. The value passed in will be ignored. "
f"The actual output frame size is {actual_output_frame_size}, "
f"corresponding to {1 / sample_rate_to_frame_size_coef:.2f}s "
f"at {output_sample_rate=}Hz.",
# DeprecationWarning is filtered out by default, so use UserWarning
UserWarning,
stacklevel=2, # So that the warning points to the user's code
)
self.output_frame_size = actual_output_frame_size
@property
def clear_queue(self) -> Callable:
return cast(Callable, self._clear_queue)

View File

@@ -44,7 +44,6 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
super().__init__(
"mono",
output_sample_rate=24000,
output_frame_size=480,
input_sample_rate=16000,
)
self.audio_queue = asyncio.Queue()

View File

@@ -42,7 +42,6 @@ class GeminiHandler(AsyncStreamHandler):
super().__init__(
expected_layout="mono",
output_sample_rate=24000,
output_frame_size=480,
input_sample_rate=24000,
)
self.input_queue: asyncio.Queue = asyncio.Queue()

View File

@@ -38,7 +38,6 @@ class AzureAudioHandler(AsyncStreamHandler):
super().__init__(
expected_layout="mono",
output_sample_rate=SAMPLE_RATE,
output_frame_size=480,
input_sample_rate=SAMPLE_RATE,
)
self.ws = None

View File

@@ -43,12 +43,10 @@ class GeminiHandler(AsyncStreamHandler):
self,
expected_layout: Literal["mono"] = "mono",
output_sample_rate: int = 24000,
output_frame_size: int = 480,
) -> None:
super().__init__(
expected_layout,
output_sample_rate,
output_frame_size,
input_sample_rate=16000,
)
self.input_queue: asyncio.Queue = asyncio.Queue()
@@ -59,7 +57,6 @@ class GeminiHandler(AsyncStreamHandler):
return GeminiHandler(
expected_layout="mono",
output_sample_rate=self.output_sample_rate,
output_frame_size=self.output_frame_size,
)
async def start_up(self):

View File

@@ -33,7 +33,6 @@ class OpenAIHandler(AsyncStreamHandler):
super().__init__(
expected_layout="mono",
output_sample_rate=SAMPLE_RATE,
output_frame_size=480,
input_sample_rate=SAMPLE_RATE,
)
self.connection = None

View File

@@ -92,27 +92,19 @@ stream = Stream(
## Stream Handler Output Audio
You can configure the output audio chunk size of `ReplyOnPause` (and any `StreamHandler`)
with the `output_sample_rate` and `output_frame_size` parameters.
The following code (which uses the default values of these parameters), states that each output chunk will be a frame of 960 samples at a frame rate of `24,000` hz. So it will correspond to `0.04` seconds.
You can configure the output sampling rate of `ReplyOnPause` (and any `StreamHandler`)
with the `output_sample_rate` and parameter. For example:
```python
from fastrtc import ReplyOnPause, Stream
stream = Stream(
handler=ReplyOnPause(..., output_sample_rate=24000, output_frame_size=960),
handler=ReplyOnPause(..., output_sample_rate=16000),
modality="audio",
mode="send-receive"
)
```
!!! tip
In general it is best to leave these settings untouched. In some cases,
lowering the output_frame_size can yield smoother audio playback.
## Audio Icon
You can display an icon of your choice instead of the default wave animation for audio streaming.