Audio to float32 util (#32)

* add util

* version bump
This commit is contained in:
Freddy Boulton
2024-12-06 17:31:12 -05:00
committed by GitHub
parent 80283f6631
commit 903f1f70bd
4 changed files with 36 additions and 3 deletions

View File

@@ -4,7 +4,7 @@ from .credentials import (
get_twilio_turn_credentials,
)
from .reply_on_pause import AlgoOptions, ReplyOnPause, SileroVadOptions
from .utils import AdditionalOutputs, audio_to_bytes, audio_to_file
from .utils import AdditionalOutputs, audio_to_bytes, audio_to_file, audio_to_float32
from .webrtc import StreamHandler, WebRTC
__all__ = [
@@ -12,6 +12,7 @@ __all__ = [
"AdditionalOutputs",
"audio_to_bytes",
"audio_to_file",
"audio_to_float32",
"get_hf_turn_credentials",
"get_twilio_turn_credentials",
"get_turn_credentials",

View File

@@ -83,8 +83,14 @@ class ReplyOnPause(StreamHandler):
expected_layout: Literal["mono", "stereo"] = "mono",
output_sample_rate: int = 24000,
output_frame_size: int = 480,
input_sample_rate: int = 48000,
):
super().__init__(expected_layout, output_sample_rate, output_frame_size)
super().__init__(
expected_layout,
output_sample_rate,
output_frame_size,
input_sample_rate=input_sample_rate,
)
self.expected_layout: Literal["mono", "stereo"] = expected_layout
self.output_sample_rate = output_sample_rate
self.output_frame_size = output_frame_size

View File

@@ -187,3 +187,29 @@ def audio_to_file(audio: tuple[int, np.ndarray]) -> str:
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
f.write(bytes_)
return f.name
def audio_to_float32(audio: tuple[int, np.ndarray]) -> np.ndarray:
"""
Convert an audio tuple containing sample rate (int16) and numpy array data to float32.
Parameters
----------
audio : tuple[int, np.ndarray]
A tuple containing:
- sample_rate (int): The audio sample rate in Hz
- data (np.ndarray): The audio data as a numpy array
Returns
-------
np.ndarray
The audio data as a numpy array with dtype float32
Example
-------
>>> sample_rate = 44100
>>> audio_data = np.array([0.1, -0.2, 0.3]) # Example audio samples
>>> audio_tuple = (sample_rate, audio_data)
>>> audio_float32 = audio_to_float32(audio_tuple)
"""
return audio[1].astype(np.float32) / 32768.0

View File

@@ -8,7 +8,7 @@ build-backend = "hatchling.build"
[project]
name = "gradio_webrtc"
version = "0.0.19"
version = "0.0.20"
description = "Stream images in realtime with webrtc"
readme = "README.md"
license = "apache-2.0"