mirror of
https://github.com/HumanAIGC-Engineering/gradio-webrtc.git
synced 2026-02-05 01:49:23 +08:00
@@ -4,7 +4,7 @@ from .credentials import (
|
||||
get_twilio_turn_credentials,
|
||||
)
|
||||
from .reply_on_pause import AlgoOptions, ReplyOnPause, SileroVadOptions
|
||||
from .utils import AdditionalOutputs, audio_to_bytes, audio_to_file
|
||||
from .utils import AdditionalOutputs, audio_to_bytes, audio_to_file, audio_to_float32
|
||||
from .webrtc import StreamHandler, WebRTC
|
||||
|
||||
__all__ = [
|
||||
@@ -12,6 +12,7 @@ __all__ = [
|
||||
"AdditionalOutputs",
|
||||
"audio_to_bytes",
|
||||
"audio_to_file",
|
||||
"audio_to_float32",
|
||||
"get_hf_turn_credentials",
|
||||
"get_twilio_turn_credentials",
|
||||
"get_turn_credentials",
|
||||
|
||||
@@ -83,8 +83,14 @@ class ReplyOnPause(StreamHandler):
|
||||
expected_layout: Literal["mono", "stereo"] = "mono",
|
||||
output_sample_rate: int = 24000,
|
||||
output_frame_size: int = 480,
|
||||
input_sample_rate: int = 48000,
|
||||
):
|
||||
super().__init__(expected_layout, output_sample_rate, output_frame_size)
|
||||
super().__init__(
|
||||
expected_layout,
|
||||
output_sample_rate,
|
||||
output_frame_size,
|
||||
input_sample_rate=input_sample_rate,
|
||||
)
|
||||
self.expected_layout: Literal["mono", "stereo"] = expected_layout
|
||||
self.output_sample_rate = output_sample_rate
|
||||
self.output_frame_size = output_frame_size
|
||||
|
||||
@@ -187,3 +187,29 @@ def audio_to_file(audio: tuple[int, np.ndarray]) -> str:
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
|
||||
f.write(bytes_)
|
||||
return f.name
|
||||
|
||||
|
||||
def audio_to_float32(audio: tuple[int, np.ndarray]) -> np.ndarray:
|
||||
"""
|
||||
Convert an audio tuple containing sample rate (int16) and numpy array data to float32.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
audio : tuple[int, np.ndarray]
|
||||
A tuple containing:
|
||||
- sample_rate (int): The audio sample rate in Hz
|
||||
- data (np.ndarray): The audio data as a numpy array
|
||||
|
||||
Returns
|
||||
-------
|
||||
np.ndarray
|
||||
The audio data as a numpy array with dtype float32
|
||||
|
||||
Example
|
||||
-------
|
||||
>>> sample_rate = 44100
|
||||
>>> audio_data = np.array([0.1, -0.2, 0.3]) # Example audio samples
|
||||
>>> audio_tuple = (sample_rate, audio_data)
|
||||
>>> audio_float32 = audio_to_float32(audio_tuple)
|
||||
"""
|
||||
return audio[1].astype(np.float32) / 32768.0
|
||||
|
||||
@@ -8,7 +8,7 @@ build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
name = "gradio_webrtc"
|
||||
version = "0.0.19"
|
||||
version = "0.0.20"
|
||||
description = "Stream images in realtime with webrtc"
|
||||
readme = "README.md"
|
||||
license = "apache-2.0"
|
||||
|
||||
Reference in New Issue
Block a user