mirror of
https://github.com/HumanAIGC-Engineering/gradio-webrtc.git
synced 2026-02-05 18:09:23 +08:00
Ignore output_frame_size parameter (#210)
This commit is contained in:
@@ -74,7 +74,7 @@ class ReplyOnPause(StreamHandler):
|
|||||||
can_interrupt: bool = True,
|
can_interrupt: bool = True,
|
||||||
expected_layout: Literal["mono", "stereo"] = "mono",
|
expected_layout: Literal["mono", "stereo"] = "mono",
|
||||||
output_sample_rate: int = 24000,
|
output_sample_rate: int = 24000,
|
||||||
output_frame_size: int = 480,
|
output_frame_size: int | None = None, # Deprecated
|
||||||
input_sample_rate: int = 48000,
|
input_sample_rate: int = 48000,
|
||||||
model: PauseDetectionModel | None = None,
|
model: PauseDetectionModel | None = None,
|
||||||
):
|
):
|
||||||
@@ -86,8 +86,6 @@ class ReplyOnPause(StreamHandler):
|
|||||||
)
|
)
|
||||||
self.can_interrupt = can_interrupt
|
self.can_interrupt = can_interrupt
|
||||||
self.expected_layout: Literal["mono", "stereo"] = expected_layout
|
self.expected_layout: Literal["mono", "stereo"] = expected_layout
|
||||||
self.output_sample_rate = output_sample_rate
|
|
||||||
self.output_frame_size = output_frame_size
|
|
||||||
self.model = model or get_silero_model()
|
self.model = model or get_silero_model()
|
||||||
self.fn = fn
|
self.fn = fn
|
||||||
self.is_async = inspect.isasyncgenfunction(fn)
|
self.is_async = inspect.isasyncgenfunction(fn)
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ class ReplyOnStopWords(ReplyOnPause):
|
|||||||
can_interrupt: bool = True,
|
can_interrupt: bool = True,
|
||||||
expected_layout: Literal["mono", "stereo"] = "mono",
|
expected_layout: Literal["mono", "stereo"] = "mono",
|
||||||
output_sample_rate: int = 24000,
|
output_sample_rate: int = 24000,
|
||||||
output_frame_size: int = 480,
|
output_frame_size: int | None = None, # Deprecated
|
||||||
input_sample_rate: int = 48000,
|
input_sample_rate: int = 48000,
|
||||||
model: PauseDetectionModel | None = None,
|
model: PauseDetectionModel | None = None,
|
||||||
):
|
):
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import logging
|
|||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
|
import warnings
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
@@ -239,13 +240,12 @@ class StreamHandlerBase(ABC):
|
|||||||
self,
|
self,
|
||||||
expected_layout: Literal["mono", "stereo"] = "mono",
|
expected_layout: Literal["mono", "stereo"] = "mono",
|
||||||
output_sample_rate: int = 24000,
|
output_sample_rate: int = 24000,
|
||||||
output_frame_size: int = 960,
|
output_frame_size: int | None = None,
|
||||||
input_sample_rate: int = 48000,
|
input_sample_rate: int = 48000,
|
||||||
fps: int = 30,
|
fps: int = 30,
|
||||||
) -> None:
|
) -> None:
|
||||||
self.expected_layout = expected_layout
|
self.expected_layout = expected_layout
|
||||||
self.output_sample_rate = output_sample_rate
|
self.output_sample_rate = output_sample_rate
|
||||||
self.output_frame_size = output_frame_size
|
|
||||||
self.input_sample_rate = input_sample_rate
|
self.input_sample_rate = input_sample_rate
|
||||||
self.fps = fps
|
self.fps = fps
|
||||||
self.latest_args: list[Any] = []
|
self.latest_args: list[Any] = []
|
||||||
@@ -257,6 +257,30 @@ class StreamHandlerBase(ABC):
|
|||||||
self._phone_mode = False
|
self._phone_mode = False
|
||||||
self._clear_queue: Callable | None = None
|
self._clear_queue: Callable | None = None
|
||||||
|
|
||||||
|
sample_rate_to_frame_size_coef = 50
|
||||||
|
if output_sample_rate % sample_rate_to_frame_size_coef != 0:
|
||||||
|
raise ValueError(
|
||||||
|
"output_sample_rate must be a multiple of "
|
||||||
|
f"{sample_rate_to_frame_size_coef}, got {output_sample_rate}"
|
||||||
|
)
|
||||||
|
|
||||||
|
actual_output_frame_size = output_sample_rate // sample_rate_to_frame_size_coef
|
||||||
|
if (
|
||||||
|
output_frame_size is not None
|
||||||
|
and output_frame_size != actual_output_frame_size
|
||||||
|
):
|
||||||
|
warnings.warn(
|
||||||
|
"The output_frame_size parameter is deprecated and will be removed "
|
||||||
|
"in a future release. The value passed in will be ignored. "
|
||||||
|
f"The actual output frame size is {actual_output_frame_size}, "
|
||||||
|
f"corresponding to {1 / sample_rate_to_frame_size_coef:.2f}s "
|
||||||
|
f"at {output_sample_rate=}Hz.",
|
||||||
|
# DeprecationWarning is filtered out by default, so use UserWarning
|
||||||
|
UserWarning,
|
||||||
|
stacklevel=2, # So that the warning points to the user's code
|
||||||
|
)
|
||||||
|
self.output_frame_size = actual_output_frame_size
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def clear_queue(self) -> Callable:
|
def clear_queue(self) -> Callable:
|
||||||
return cast(Callable, self._clear_queue)
|
return cast(Callable, self._clear_queue)
|
||||||
|
|||||||
@@ -44,7 +44,6 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
|
|||||||
super().__init__(
|
super().__init__(
|
||||||
"mono",
|
"mono",
|
||||||
output_sample_rate=24000,
|
output_sample_rate=24000,
|
||||||
output_frame_size=480,
|
|
||||||
input_sample_rate=16000,
|
input_sample_rate=16000,
|
||||||
)
|
)
|
||||||
self.audio_queue = asyncio.Queue()
|
self.audio_queue = asyncio.Queue()
|
||||||
|
|||||||
@@ -42,7 +42,6 @@ class GeminiHandler(AsyncStreamHandler):
|
|||||||
super().__init__(
|
super().__init__(
|
||||||
expected_layout="mono",
|
expected_layout="mono",
|
||||||
output_sample_rate=24000,
|
output_sample_rate=24000,
|
||||||
output_frame_size=480,
|
|
||||||
input_sample_rate=24000,
|
input_sample_rate=24000,
|
||||||
)
|
)
|
||||||
self.input_queue: asyncio.Queue = asyncio.Queue()
|
self.input_queue: asyncio.Queue = asyncio.Queue()
|
||||||
|
|||||||
@@ -38,7 +38,6 @@ class AzureAudioHandler(AsyncStreamHandler):
|
|||||||
super().__init__(
|
super().__init__(
|
||||||
expected_layout="mono",
|
expected_layout="mono",
|
||||||
output_sample_rate=SAMPLE_RATE,
|
output_sample_rate=SAMPLE_RATE,
|
||||||
output_frame_size=480,
|
|
||||||
input_sample_rate=SAMPLE_RATE,
|
input_sample_rate=SAMPLE_RATE,
|
||||||
)
|
)
|
||||||
self.ws = None
|
self.ws = None
|
||||||
|
|||||||
@@ -43,12 +43,10 @@ class GeminiHandler(AsyncStreamHandler):
|
|||||||
self,
|
self,
|
||||||
expected_layout: Literal["mono"] = "mono",
|
expected_layout: Literal["mono"] = "mono",
|
||||||
output_sample_rate: int = 24000,
|
output_sample_rate: int = 24000,
|
||||||
output_frame_size: int = 480,
|
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(
|
super().__init__(
|
||||||
expected_layout,
|
expected_layout,
|
||||||
output_sample_rate,
|
output_sample_rate,
|
||||||
output_frame_size,
|
|
||||||
input_sample_rate=16000,
|
input_sample_rate=16000,
|
||||||
)
|
)
|
||||||
self.input_queue: asyncio.Queue = asyncio.Queue()
|
self.input_queue: asyncio.Queue = asyncio.Queue()
|
||||||
@@ -59,7 +57,6 @@ class GeminiHandler(AsyncStreamHandler):
|
|||||||
return GeminiHandler(
|
return GeminiHandler(
|
||||||
expected_layout="mono",
|
expected_layout="mono",
|
||||||
output_sample_rate=self.output_sample_rate,
|
output_sample_rate=self.output_sample_rate,
|
||||||
output_frame_size=self.output_frame_size,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
async def start_up(self):
|
async def start_up(self):
|
||||||
|
|||||||
@@ -33,7 +33,6 @@ class OpenAIHandler(AsyncStreamHandler):
|
|||||||
super().__init__(
|
super().__init__(
|
||||||
expected_layout="mono",
|
expected_layout="mono",
|
||||||
output_sample_rate=SAMPLE_RATE,
|
output_sample_rate=SAMPLE_RATE,
|
||||||
output_frame_size=480,
|
|
||||||
input_sample_rate=SAMPLE_RATE,
|
input_sample_rate=SAMPLE_RATE,
|
||||||
)
|
)
|
||||||
self.connection = None
|
self.connection = None
|
||||||
|
|||||||
@@ -92,27 +92,19 @@ stream = Stream(
|
|||||||
|
|
||||||
## Stream Handler Output Audio
|
## Stream Handler Output Audio
|
||||||
|
|
||||||
You can configure the output audio chunk size of `ReplyOnPause` (and any `StreamHandler`)
|
You can configure the output sampling rate of `ReplyOnPause` (and any `StreamHandler`)
|
||||||
with the `output_sample_rate` and `output_frame_size` parameters.
|
with the `output_sample_rate` and parameter. For example:
|
||||||
|
|
||||||
The following code (which uses the default values of these parameters), states that each output chunk will be a frame of 960 samples at a frame rate of `24,000` hz. So it will correspond to `0.04` seconds.
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from fastrtc import ReplyOnPause, Stream
|
from fastrtc import ReplyOnPause, Stream
|
||||||
|
|
||||||
stream = Stream(
|
stream = Stream(
|
||||||
handler=ReplyOnPause(..., output_sample_rate=24000, output_frame_size=960),
|
handler=ReplyOnPause(..., output_sample_rate=16000),
|
||||||
modality="audio",
|
modality="audio",
|
||||||
mode="send-receive"
|
mode="send-receive"
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
!!! tip
|
|
||||||
|
|
||||||
In general it is best to leave these settings untouched. In some cases,
|
|
||||||
lowering the output_frame_size can yield smoother audio playback.
|
|
||||||
|
|
||||||
|
|
||||||
## Audio Icon
|
## Audio Icon
|
||||||
|
|
||||||
You can display an icon of your choice instead of the default wave animation for audio streaming.
|
You can display an icon of your choice instead of the default wave animation for audio streaming.
|
||||||
|
|||||||
Reference in New Issue
Block a user