Add some utils fns, add moshi to cookbook, fix querySelector, support async functions in ReplyOnPause (#29)

* add * add code
2026-02-05 18:09:23 +08:00 · 2024-12-04 15:14:19 -05:00
parent c85c117576
commit 868e0bfa64
9 changed files with 158 additions and 10 deletions
--- a/backend/gradio_webrtc/init.py
+++ b/backend/gradio_webrtc/init.py
@@ -4,12 +4,14 @@ from .credentials import (
    get_twilio_turn_credentials,
 )
 from .reply_on_pause import AlgoOptions, ReplyOnPause, SileroVadOptions
-from .utils import AdditionalOutputs
+from .utils import AdditionalOutputs, audio_to_bytes, audio_to_file
 from .webrtc import StreamHandler, WebRTC

 __all__ = [
    "AlgoOptions",
    "AdditionalOutputs",
+    "audio_to_bytes",
+    "audio_to_file",
    "get_hf_turn_credentials",
    "get_twilio_turn_credentials",
    "get_turn_credentials",
--- a/backend/gradio_webrtc/reply_on_pause.py
+++ b/backend/gradio_webrtc/reply_on_pause.py
@@ -70,6 +70,10 @@ ReplyFnGenerator = Union[
 ]


+async def iterate(generator: Generator) -> Any:
+    return next(generator)
+
+
 class ReplyOnPause(StreamHandler):
    def __init__(
        self,
@@ -86,6 +90,7 @@ class ReplyOnPause(StreamHandler):
        self.output_frame_size = output_frame_size
        self.model = get_vad_model()
        self.fn = fn
+        self.is_async = inspect.isasyncgenfunction(fn)
        self.event = Event()
        self.state = AppState()
        self.generator = None
@@ -172,6 +177,9 @@ class ReplyOnPause(StreamHandler):
            self.channel.send("tick")
            logger.debug("Sent tick")

+    async def async_iterate(self, generator) -> Any:
+        return await anext(generator)
+
    def emit(self):
        if not self.event.is_set():
            return None
@@ -190,6 +198,11 @@ class ReplyOnPause(StreamHandler):
                logger.debug("Latest args: %s", self.latest_args)
            self.state.responding = True
            try:
-                return next(self.generator)
-            except StopIteration:
+                if self.is_async:
+                    return asyncio.run_coroutine_threadsafe(
+                        self.async_iterate(self.generator), self.loop
+                    ).result()
+                else:
+                    return next(self.generator)
+            except (StopIteration, StopAsyncIteration):
                self.reset()
--- a/backend/gradio_webrtc/utils.py
+++ b/backend/gradio_webrtc/utils.py
@@ -1,10 +1,13 @@
 import asyncio
 import fractions
+import io
 import logging
+import tempfile
 from typing import Any, Callable, Protocol, cast

 import av
 import numpy as np
+from pydub import AudioSegment

 logger = logging.getLogger(__name__)

@@ -120,3 +123,67 @@ async def player_worker_decode(
            logger.debug("traceback %s", exec)
            logger.error("Error processing frame: %s", str(e))
            continue
+
+
+def audio_to_bytes(audio: tuple[int, np.ndarray]) -> bytes:
+    """
+    Convert an audio tuple containing sample rate and numpy array data into bytes.
+
+    Parameters
+    ----------
+    audio : tuple[int, np.ndarray]
+        A tuple containing:
+            - sample_rate (int): The audio sample rate in Hz
+            - data (np.ndarray): The audio data as a numpy array
+
+    Returns
+    -------
+    bytes
+        The audio data encoded as bytes, suitable for transmission or storage
+
+    Example
+    -------
+    >>> sample_rate = 44100
+    >>> audio_data = np.array([0.1, -0.2, 0.3])  # Example audio samples
+    >>> audio_tuple = (sample_rate, audio_data)
+    >>> audio_bytes = audio_to_bytes(audio_tuple)
+    """
+    audio_buffer = io.BytesIO()
+    segment = AudioSegment(
+        audio[1].tobytes(),
+        frame_rate=audio[0],
+        sample_width=audio[1].dtype.itemsize,
+        channels=1,
+    )
+    segment.export(audio_buffer, format="mp3")
+    return audio_buffer.getvalue()
+
+
+def audio_to_file(audio: tuple[int, np.ndarray]) -> str:
+    """
+    Save an audio tuple containing sample rate and numpy array data to a file.
+
+    Parameters
+    ----------
+    audio : tuple[int, np.ndarray]
+        A tuple containing:
+            - sample_rate (int): The audio sample rate in Hz
+            - data (np.ndarray): The audio data as a numpy array
+
+    Returns
+    -------
+    str
+        The path to the saved audio file
+
+    Example
+    -------
+    >>> sample_rate = 44100
+    >>> audio_data = np.array([0.1, -0.2, 0.3])  # Example audio samples
+    >>> audio_tuple = (sample_rate, audio_data)
+    >>> file_path = audio_to_file(audio_tuple)
+    >>> print(f"Audio saved to: {file_path}")
+    """
+    bytes_ = audio_to_bytes(audio)
+    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
+        f.write(bytes_)
+    return f.name