Add some utils fns, add moshi to cookbook, fix querySelector, support async functions in ReplyOnPause (#29)

* add

* add code
This commit is contained in:
Freddy Boulton
2024-12-04 15:14:19 -05:00
committed by GitHub
parent c85c117576
commit 868e0bfa64
9 changed files with 158 additions and 10 deletions

View File

@@ -4,12 +4,14 @@ from .credentials import (
get_twilio_turn_credentials,
)
from .reply_on_pause import AlgoOptions, ReplyOnPause, SileroVadOptions
from .utils import AdditionalOutputs
from .utils import AdditionalOutputs, audio_to_bytes, audio_to_file
from .webrtc import StreamHandler, WebRTC
__all__ = [
"AlgoOptions",
"AdditionalOutputs",
"audio_to_bytes",
"audio_to_file",
"get_hf_turn_credentials",
"get_twilio_turn_credentials",
"get_turn_credentials",

View File

@@ -70,6 +70,10 @@ ReplyFnGenerator = Union[
]
async def iterate(generator: Generator) -> Any:
return next(generator)
class ReplyOnPause(StreamHandler):
def __init__(
self,
@@ -86,6 +90,7 @@ class ReplyOnPause(StreamHandler):
self.output_frame_size = output_frame_size
self.model = get_vad_model()
self.fn = fn
self.is_async = inspect.isasyncgenfunction(fn)
self.event = Event()
self.state = AppState()
self.generator = None
@@ -172,6 +177,9 @@ class ReplyOnPause(StreamHandler):
self.channel.send("tick")
logger.debug("Sent tick")
async def async_iterate(self, generator) -> Any:
return await anext(generator)
def emit(self):
if not self.event.is_set():
return None
@@ -190,6 +198,11 @@ class ReplyOnPause(StreamHandler):
logger.debug("Latest args: %s", self.latest_args)
self.state.responding = True
try:
return next(self.generator)
except StopIteration:
if self.is_async:
return asyncio.run_coroutine_threadsafe(
self.async_iterate(self.generator), self.loop
).result()
else:
return next(self.generator)
except (StopIteration, StopAsyncIteration):
self.reset()

View File

@@ -1,10 +1,13 @@
import asyncio
import fractions
import io
import logging
import tempfile
from typing import Any, Callable, Protocol, cast
import av
import numpy as np
from pydub import AudioSegment
logger = logging.getLogger(__name__)
@@ -120,3 +123,67 @@ async def player_worker_decode(
logger.debug("traceback %s", exec)
logger.error("Error processing frame: %s", str(e))
continue
def audio_to_bytes(audio: tuple[int, np.ndarray]) -> bytes:
"""
Convert an audio tuple containing sample rate and numpy array data into bytes.
Parameters
----------
audio : tuple[int, np.ndarray]
A tuple containing:
- sample_rate (int): The audio sample rate in Hz
- data (np.ndarray): The audio data as a numpy array
Returns
-------
bytes
The audio data encoded as bytes, suitable for transmission or storage
Example
-------
>>> sample_rate = 44100
>>> audio_data = np.array([0.1, -0.2, 0.3]) # Example audio samples
>>> audio_tuple = (sample_rate, audio_data)
>>> audio_bytes = audio_to_bytes(audio_tuple)
"""
audio_buffer = io.BytesIO()
segment = AudioSegment(
audio[1].tobytes(),
frame_rate=audio[0],
sample_width=audio[1].dtype.itemsize,
channels=1,
)
segment.export(audio_buffer, format="mp3")
return audio_buffer.getvalue()
def audio_to_file(audio: tuple[int, np.ndarray]) -> str:
"""
Save an audio tuple containing sample rate and numpy array data to a file.
Parameters
----------
audio : tuple[int, np.ndarray]
A tuple containing:
- sample_rate (int): The audio sample rate in Hz
- data (np.ndarray): The audio data as a numpy array
Returns
-------
str
The path to the saved audio file
Example
-------
>>> sample_rate = 44100
>>> audio_data = np.array([0.1, -0.2, 0.3]) # Example audio samples
>>> audio_tuple = (sample_rate, audio_data)
>>> file_path = audio_to_file(audio_tuple)
>>> print(f"Audio saved to: {file_path}")
"""
bytes_ = audio_to_bytes(audio)
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
f.write(bytes_)
return f.name