import asyncio import fractions import io import json import logging import tempfile from contextvars import ContextVar from typing import Any, Callable, Protocol, TypedDict, cast import av import numpy as np from pydub import AudioSegment logger = logging.getLogger(__name__) AUDIO_PTIME = 0.020 class AudioChunk(TypedDict): start: int end: int class AdditionalOutputs: def __init__(self, *args) -> None: self.args = args class DataChannel(Protocol): def send(self, message: str) -> None: ... current_channel: ContextVar[DataChannel | None] = ContextVar( "current_channel", default=None ) def _send_log(message: str, type: str) -> None: async def _send(channel: DataChannel) -> None: channel.send( json.dumps( { "type": type, "message": message, } ) ) if channel := current_channel.get(): print("channel", channel) try: loop = asyncio.get_running_loop() asyncio.run_coroutine_threadsafe(_send(channel), loop) except RuntimeError: asyncio.run(_send(channel)) def Warning( # noqa: N802 message: str = "Warning issued.", ): """ Send a warning message that is deplayed in the UI of the application. Parameters ---------- audio : str The warning message to send Returns ------- None """ _send_log(message, "warning") class WebRTCError(Exception): def __init__(self, message: str) -> None: super().__init__(message) _send_log(message, "error") def split_output(data: tuple | Any) -> tuple[Any, AdditionalOutputs | None]: if isinstance(data, AdditionalOutputs): return None, data if isinstance(data, tuple): # handle the bare audio case if 2 <= len(data) <= 3 and isinstance(data[1], np.ndarray): return data, None if not len(data) == 2: raise ValueError( "The tuple must have exactly two elements: the data and an instance of AdditionalOutputs." ) if not isinstance(data[-1], AdditionalOutputs): raise ValueError( "The last element of the tuple must be an instance of AdditionalOutputs." ) return data[0], cast(AdditionalOutputs, data[1]) return data, None async def player_worker_decode( next_frame: Callable, queue: asyncio.Queue, thread_quit: asyncio.Event, channel: Callable[[], DataChannel | None] | None, set_additional_outputs: Callable | None, quit_on_none: bool = False, sample_rate: int = 48000, frame_size: int = int(48000 * AUDIO_PTIME), ): audio_samples = 0 audio_time_base = fractions.Fraction(1, sample_rate) audio_resampler = av.AudioResampler( # type: ignore format="s16", layout="stereo", rate=sample_rate, frame_size=frame_size, ) while not thread_quit.is_set(): try: # Get next frame frame, outputs = split_output( await asyncio.wait_for(next_frame(), timeout=60) ) if ( isinstance(outputs, AdditionalOutputs) and set_additional_outputs and channel and channel() ): set_additional_outputs(outputs) cast(DataChannel, channel()).send("change") if frame is None: if quit_on_none: await queue.put(None) break continue if len(frame) == 2: sample_rate, audio_array = frame layout = "mono" elif len(frame) == 3: sample_rate, audio_array, layout = frame logger.debug( "received array with shape %s sample rate %s layout %s", audio_array.shape, sample_rate, layout, ) format = "s16" if audio_array.dtype == "int16" else "fltp" # Convert to audio frame and resample # This runs in the same timeout context frame = av.AudioFrame.from_ndarray( # type: ignore audio_array, format=format, layout=layout ) frame.sample_rate = sample_rate for processed_frame in audio_resampler.resample(frame): processed_frame.pts = audio_samples processed_frame.time_base = audio_time_base audio_samples += processed_frame.samples await queue.put(processed_frame) logger.debug("Queue size utils.py: %s", queue.qsize()) except (TimeoutError, asyncio.TimeoutError): logger.warning( "Timeout in frame processing cycle after %s seconds - resetting", 60 ) continue except Exception as e: import traceback exec = traceback.format_exc() logger.debug("traceback %s", exec) logger.error("Error processing frame: %s", str(e)) continue def audio_to_bytes(audio: tuple[int, np.ndarray]) -> bytes: """ Convert an audio tuple containing sample rate and numpy array data into bytes. Parameters ---------- audio : tuple[int, np.ndarray] A tuple containing: - sample_rate (int): The audio sample rate in Hz - data (np.ndarray): The audio data as a numpy array Returns ------- bytes The audio data encoded as bytes, suitable for transmission or storage Example ------- >>> sample_rate = 44100 >>> audio_data = np.array([0.1, -0.2, 0.3]) # Example audio samples >>> audio_tuple = (sample_rate, audio_data) >>> audio_bytes = audio_to_bytes(audio_tuple) """ audio_buffer = io.BytesIO() segment = AudioSegment( audio[1].tobytes(), frame_rate=audio[0], sample_width=audio[1].dtype.itemsize, channels=1, ) segment.export(audio_buffer, format="mp3") return audio_buffer.getvalue() def audio_to_file(audio: tuple[int, np.ndarray]) -> str: """ Save an audio tuple containing sample rate and numpy array data to a file. Parameters ---------- audio : tuple[int, np.ndarray] A tuple containing: - sample_rate (int): The audio sample rate in Hz - data (np.ndarray): The audio data as a numpy array Returns ------- str The path to the saved audio file Example ------- >>> sample_rate = 44100 >>> audio_data = np.array([0.1, -0.2, 0.3]) # Example audio samples >>> audio_tuple = (sample_rate, audio_data) >>> file_path = audio_to_file(audio_tuple) >>> print(f"Audio saved to: {file_path}") """ bytes_ = audio_to_bytes(audio) with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f: f.write(bytes_) return f.name def audio_to_float32(audio: tuple[int, np.ndarray]) -> np.ndarray: """ Convert an audio tuple containing sample rate (int16) and numpy array data to float32. Parameters ---------- audio : tuple[int, np.ndarray] A tuple containing: - sample_rate (int): The audio sample rate in Hz - data (np.ndarray): The audio data as a numpy array Returns ------- np.ndarray The audio data as a numpy array with dtype float32 Example ------- >>> sample_rate = 44100 >>> audio_data = np.array([0.1, -0.2, 0.3]) # Example audio samples >>> audio_tuple = (sample_rate, audio_data) >>> audio_float32 = audio_to_float32(audio_tuple) """ return audio[1].astype(np.float32) / 32768.0 def aggregate_bytes_to_16bit(chunks_iterator): leftover = b"" # Store incomplete bytes between chunks for chunk in chunks_iterator: # Combine with any leftover bytes from previous chunk current_bytes = leftover + chunk # Calculate complete samples n_complete_samples = len(current_bytes) // 2 # int16 = 2 bytes bytes_to_process = n_complete_samples * 2 # Split into complete samples and leftover to_process = current_bytes[:bytes_to_process] leftover = current_bytes[bytes_to_process:] if to_process: # Only yield if we have complete samples audio_array = np.frombuffer(to_process, dtype=np.int16).reshape(1, -1) yield audio_array async def async_aggregate_bytes_to_16bit(chunks_iterator): leftover = b"" # Store incomplete bytes between chunks async for chunk in chunks_iterator: # Combine with any leftover bytes from previous chunk current_bytes = leftover + chunk # Calculate complete samples n_complete_samples = len(current_bytes) // 2 # int16 = 2 bytes bytes_to_process = n_complete_samples * 2 # Split into complete samples and leftover to_process = current_bytes[:bytes_to_process] leftover = current_bytes[bytes_to_process:] if to_process: # Only yield if we have complete samples audio_array = np.frombuffer(to_process, dtype=np.int16).reshape(1, -1) yield audio_array