From 903f1f70bd586f638ad3b5a3940c7a8ec70ad1f5 Mon Sep 17 00:00:00 2001 From: Freddy Boulton Date: Fri, 6 Dec 2024 17:31:12 -0500 Subject: [PATCH] Audio to float32 util (#32) * add util * version bump --- backend/gradio_webrtc/__init__.py | 3 ++- backend/gradio_webrtc/reply_on_pause.py | 8 +++++++- backend/gradio_webrtc/utils.py | 26 +++++++++++++++++++++++++ pyproject.toml | 2 +- 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/backend/gradio_webrtc/__init__.py b/backend/gradio_webrtc/__init__.py index 08a4395..59be422 100644 --- a/backend/gradio_webrtc/__init__.py +++ b/backend/gradio_webrtc/__init__.py @@ -4,7 +4,7 @@ from .credentials import ( get_twilio_turn_credentials, ) from .reply_on_pause import AlgoOptions, ReplyOnPause, SileroVadOptions -from .utils import AdditionalOutputs, audio_to_bytes, audio_to_file +from .utils import AdditionalOutputs, audio_to_bytes, audio_to_file, audio_to_float32 from .webrtc import StreamHandler, WebRTC __all__ = [ @@ -12,6 +12,7 @@ __all__ = [ "AdditionalOutputs", "audio_to_bytes", "audio_to_file", + "audio_to_float32", "get_hf_turn_credentials", "get_twilio_turn_credentials", "get_turn_credentials", diff --git a/backend/gradio_webrtc/reply_on_pause.py b/backend/gradio_webrtc/reply_on_pause.py index bb25171..372efb4 100644 --- a/backend/gradio_webrtc/reply_on_pause.py +++ b/backend/gradio_webrtc/reply_on_pause.py @@ -83,8 +83,14 @@ class ReplyOnPause(StreamHandler): expected_layout: Literal["mono", "stereo"] = "mono", output_sample_rate: int = 24000, output_frame_size: int = 480, + input_sample_rate: int = 48000, ): - super().__init__(expected_layout, output_sample_rate, output_frame_size) + super().__init__( + expected_layout, + output_sample_rate, + output_frame_size, + input_sample_rate=input_sample_rate, + ) self.expected_layout: Literal["mono", "stereo"] = expected_layout self.output_sample_rate = output_sample_rate self.output_frame_size = output_frame_size diff --git a/backend/gradio_webrtc/utils.py b/backend/gradio_webrtc/utils.py index ba0bdab..6389281 100644 --- a/backend/gradio_webrtc/utils.py +++ b/backend/gradio_webrtc/utils.py @@ -187,3 +187,29 @@ def audio_to_file(audio: tuple[int, np.ndarray]) -> str: with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f: f.write(bytes_) return f.name + + +def audio_to_float32(audio: tuple[int, np.ndarray]) -> np.ndarray: + """ + Convert an audio tuple containing sample rate (int16) and numpy array data to float32. + + Parameters + ---------- + audio : tuple[int, np.ndarray] + A tuple containing: + - sample_rate (int): The audio sample rate in Hz + - data (np.ndarray): The audio data as a numpy array + + Returns + ------- + np.ndarray + The audio data as a numpy array with dtype float32 + + Example + ------- + >>> sample_rate = 44100 + >>> audio_data = np.array([0.1, -0.2, 0.3]) # Example audio samples + >>> audio_tuple = (sample_rate, audio_data) + >>> audio_float32 = audio_to_float32(audio_tuple) + """ + return audio[1].astype(np.float32) / 32768.0 diff --git a/pyproject.toml b/pyproject.toml index d7ad1fb..81192ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ build-backend = "hatchling.build" [project] name = "gradio_webrtc" -version = "0.0.19" +version = "0.0.20" description = "Stream images in realtime with webrtc" readme = "README.md" license = "apache-2.0"