mirror of
https://github.com/HumanAIGC-Engineering/gradio-webrtc.git
synced 2026-02-05 01:49:23 +08:00
Ignore output_frame_size parameter (#210)
This commit is contained in:
@@ -74,7 +74,7 @@ class ReplyOnPause(StreamHandler):
|
||||
can_interrupt: bool = True,
|
||||
expected_layout: Literal["mono", "stereo"] = "mono",
|
||||
output_sample_rate: int = 24000,
|
||||
output_frame_size: int = 480,
|
||||
output_frame_size: int | None = None, # Deprecated
|
||||
input_sample_rate: int = 48000,
|
||||
model: PauseDetectionModel | None = None,
|
||||
):
|
||||
@@ -86,8 +86,6 @@ class ReplyOnPause(StreamHandler):
|
||||
)
|
||||
self.can_interrupt = can_interrupt
|
||||
self.expected_layout: Literal["mono", "stereo"] = expected_layout
|
||||
self.output_sample_rate = output_sample_rate
|
||||
self.output_frame_size = output_frame_size
|
||||
self.model = model or get_silero_model()
|
||||
self.fn = fn
|
||||
self.is_async = inspect.isasyncgenfunction(fn)
|
||||
|
||||
@@ -39,7 +39,7 @@ class ReplyOnStopWords(ReplyOnPause):
|
||||
can_interrupt: bool = True,
|
||||
expected_layout: Literal["mono", "stereo"] = "mono",
|
||||
output_sample_rate: int = 24000,
|
||||
output_frame_size: int = 480,
|
||||
output_frame_size: int | None = None, # Deprecated
|
||||
input_sample_rate: int = 48000,
|
||||
model: PauseDetectionModel | None = None,
|
||||
):
|
||||
|
||||
@@ -10,6 +10,7 @@ import logging
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
import warnings
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Callable
|
||||
from dataclasses import dataclass
|
||||
@@ -239,13 +240,12 @@ class StreamHandlerBase(ABC):
|
||||
self,
|
||||
expected_layout: Literal["mono", "stereo"] = "mono",
|
||||
output_sample_rate: int = 24000,
|
||||
output_frame_size: int = 960,
|
||||
output_frame_size: int | None = None,
|
||||
input_sample_rate: int = 48000,
|
||||
fps: int = 30,
|
||||
) -> None:
|
||||
self.expected_layout = expected_layout
|
||||
self.output_sample_rate = output_sample_rate
|
||||
self.output_frame_size = output_frame_size
|
||||
self.input_sample_rate = input_sample_rate
|
||||
self.fps = fps
|
||||
self.latest_args: list[Any] = []
|
||||
@@ -257,6 +257,30 @@ class StreamHandlerBase(ABC):
|
||||
self._phone_mode = False
|
||||
self._clear_queue: Callable | None = None
|
||||
|
||||
sample_rate_to_frame_size_coef = 50
|
||||
if output_sample_rate % sample_rate_to_frame_size_coef != 0:
|
||||
raise ValueError(
|
||||
"output_sample_rate must be a multiple of "
|
||||
f"{sample_rate_to_frame_size_coef}, got {output_sample_rate}"
|
||||
)
|
||||
|
||||
actual_output_frame_size = output_sample_rate // sample_rate_to_frame_size_coef
|
||||
if (
|
||||
output_frame_size is not None
|
||||
and output_frame_size != actual_output_frame_size
|
||||
):
|
||||
warnings.warn(
|
||||
"The output_frame_size parameter is deprecated and will be removed "
|
||||
"in a future release. The value passed in will be ignored. "
|
||||
f"The actual output frame size is {actual_output_frame_size}, "
|
||||
f"corresponding to {1 / sample_rate_to_frame_size_coef:.2f}s "
|
||||
f"at {output_sample_rate=}Hz.",
|
||||
# DeprecationWarning is filtered out by default, so use UserWarning
|
||||
UserWarning,
|
||||
stacklevel=2, # So that the warning points to the user's code
|
||||
)
|
||||
self.output_frame_size = actual_output_frame_size
|
||||
|
||||
@property
|
||||
def clear_queue(self) -> Callable:
|
||||
return cast(Callable, self._clear_queue)
|
||||
|
||||
@@ -44,7 +44,6 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
|
||||
super().__init__(
|
||||
"mono",
|
||||
output_sample_rate=24000,
|
||||
output_frame_size=480,
|
||||
input_sample_rate=16000,
|
||||
)
|
||||
self.audio_queue = asyncio.Queue()
|
||||
|
||||
@@ -42,7 +42,6 @@ class GeminiHandler(AsyncStreamHandler):
|
||||
super().__init__(
|
||||
expected_layout="mono",
|
||||
output_sample_rate=24000,
|
||||
output_frame_size=480,
|
||||
input_sample_rate=24000,
|
||||
)
|
||||
self.input_queue: asyncio.Queue = asyncio.Queue()
|
||||
|
||||
@@ -38,7 +38,6 @@ class AzureAudioHandler(AsyncStreamHandler):
|
||||
super().__init__(
|
||||
expected_layout="mono",
|
||||
output_sample_rate=SAMPLE_RATE,
|
||||
output_frame_size=480,
|
||||
input_sample_rate=SAMPLE_RATE,
|
||||
)
|
||||
self.ws = None
|
||||
|
||||
@@ -43,12 +43,10 @@ class GeminiHandler(AsyncStreamHandler):
|
||||
self,
|
||||
expected_layout: Literal["mono"] = "mono",
|
||||
output_sample_rate: int = 24000,
|
||||
output_frame_size: int = 480,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
expected_layout,
|
||||
output_sample_rate,
|
||||
output_frame_size,
|
||||
input_sample_rate=16000,
|
||||
)
|
||||
self.input_queue: asyncio.Queue = asyncio.Queue()
|
||||
@@ -59,7 +57,6 @@ class GeminiHandler(AsyncStreamHandler):
|
||||
return GeminiHandler(
|
||||
expected_layout="mono",
|
||||
output_sample_rate=self.output_sample_rate,
|
||||
output_frame_size=self.output_frame_size,
|
||||
)
|
||||
|
||||
async def start_up(self):
|
||||
|
||||
@@ -33,7 +33,6 @@ class OpenAIHandler(AsyncStreamHandler):
|
||||
super().__init__(
|
||||
expected_layout="mono",
|
||||
output_sample_rate=SAMPLE_RATE,
|
||||
output_frame_size=480,
|
||||
input_sample_rate=SAMPLE_RATE,
|
||||
)
|
||||
self.connection = None
|
||||
|
||||
@@ -92,27 +92,19 @@ stream = Stream(
|
||||
|
||||
## Stream Handler Output Audio
|
||||
|
||||
You can configure the output audio chunk size of `ReplyOnPause` (and any `StreamHandler`)
|
||||
with the `output_sample_rate` and `output_frame_size` parameters.
|
||||
|
||||
The following code (which uses the default values of these parameters), states that each output chunk will be a frame of 960 samples at a frame rate of `24,000` hz. So it will correspond to `0.04` seconds.
|
||||
You can configure the output sampling rate of `ReplyOnPause` (and any `StreamHandler`)
|
||||
with the `output_sample_rate` and parameter. For example:
|
||||
|
||||
```python
|
||||
from fastrtc import ReplyOnPause, Stream
|
||||
|
||||
stream = Stream(
|
||||
handler=ReplyOnPause(..., output_sample_rate=24000, output_frame_size=960),
|
||||
handler=ReplyOnPause(..., output_sample_rate=16000),
|
||||
modality="audio",
|
||||
mode="send-receive"
|
||||
)
|
||||
```
|
||||
|
||||
!!! tip
|
||||
|
||||
In general it is best to leave these settings untouched. In some cases,
|
||||
lowering the output_frame_size can yield smoother audio playback.
|
||||
|
||||
|
||||
## Audio Icon
|
||||
|
||||
You can display an icon of your choice instead of the default wave animation for audio streaming.
|
||||
|
||||
Reference in New Issue
Block a user