This commit is contained in:
freddyaboulton
2024-10-23 16:14:10 -07:00
parent e87c4d49e8
commit 0b3ae91415
4 changed files with 35 additions and 36 deletions

View File

@@ -176,7 +176,7 @@ if __name__ == "__main__":
* An audio frame is represented as a tuple of (frame_rate, audio_samples) where `audio_samples` is a numpy array of shape (num_channels, num_samples). * An audio frame is represented as a tuple of (frame_rate, audio_samples) where `audio_samples` is a numpy array of shape (num_channels, num_samples).
* You can also specify the audio layout ("mono" or "stereo") in the emit method by retuning it as the third element of the tuple. If not specified, the default is "mono". * You can also specify the audio layout ("mono" or "stereo") in the emit method by retuning it as the third element of the tuple. If not specified, the default is "mono".
* The `time_limit` parameter is the maximum time in seconds the conversation will run. If the time limit is reached, the audio stream will stop. * The `time_limit` parameter is the maximum time in seconds the conversation will run. If the time limit is reached, the audio stream will stop.
* The `emit` method SHOULD NOT block. If a frame is not ready to be sent, the method should return None.
## Deployment ## Deployment

View File

@@ -30,45 +30,44 @@ async def player_worker_decode(
while not thread_quit.is_set(): while not thread_quit.is_set():
try: try:
async with asyncio.timeout(5): # Get next frame
# Get next frame frame = await asyncio.wait_for(next_frame(), timeout=5)
frame = await next_frame()
if frame is None: if frame is None:
if quit_on_none: if quit_on_none:
await queue.put(None) await queue.put(None)
break break
continue continue
if len(frame) == 2: if len(frame) == 2:
sample_rate, audio_array = frame sample_rate, audio_array = frame
layout = "mono" layout = "mono"
elif len(frame) == 3: elif len(frame) == 3:
sample_rate, audio_array, layout = frame sample_rate, audio_array, layout = frame
logger.debug( logger.debug(
"received array with shape %s sample rate %s layout %s", "received array with shape %s sample rate %s layout %s",
audio_array.shape, audio_array.shape,
sample_rate, sample_rate,
layout, layout,
) )
format = "s16" if audio_array.dtype == "int16" else "fltp" format = "s16" if audio_array.dtype == "int16" else "fltp"
# Convert to audio frame and resample # Convert to audio frame and resample
# This runs in the same timeout context # This runs in the same timeout context
frame = av.AudioFrame.from_ndarray( frame = av.AudioFrame.from_ndarray(
audio_array, format=format, layout=layout audio_array, format=format, layout=layout
) )
frame.sample_rate = sample_rate frame.sample_rate = sample_rate
for processed_frame in audio_resampler.resample(frame): for processed_frame in audio_resampler.resample(frame):
processed_frame.pts = audio_samples processed_frame.pts = audio_samples
processed_frame.time_base = audio_time_base processed_frame.time_base = audio_time_base
audio_samples += processed_frame.samples audio_samples += processed_frame.samples
await queue.put(processed_frame) await queue.put(processed_frame)
logger.debug("Queue size utils.py: %s", queue.qsize()) logger.debug("Queue size utils.py: %s", queue.qsize())
except TimeoutError: except (TimeoutError, asyncio.TimeoutError):
logger.warning( logger.warning(
"Timeout in frame processing cycle after %s seconds - resetting", 5 "Timeout in frame processing cycle after %s seconds - resetting", 5
) )

View File

@@ -214,7 +214,7 @@ if __name__ == "__main__":
* An audio frame is represented as a tuple of (frame_rate, audio_samples) where `audio_samples` is a numpy array of shape (num_channels, num_samples). * An audio frame is represented as a tuple of (frame_rate, audio_samples) where `audio_samples` is a numpy array of shape (num_channels, num_samples).
* You can also specify the audio layout ("mono" or "stereo") in the emit method by retuning it as the third element of the tuple. If not specified, the default is "mono". * You can also specify the audio layout ("mono" or "stereo") in the emit method by retuning it as the third element of the tuple. If not specified, the default is "mono".
* The `time_limit` parameter is the maximum time in seconds the conversation will run. If the time limit is reached, the audio stream will stop. * The `time_limit` parameter is the maximum time in seconds the conversation will run. If the time limit is reached, the audio stream will stop.
* The `emit` method SHOULD NOT block. If a frame is not ready to be sent, the method should return None.
## Deployment ## Deployment

View File

@@ -8,7 +8,7 @@ build-backend = "hatchling.build"
[project] [project]
name = "gradio_webrtc" name = "gradio_webrtc"
version = "0.0.6a3" version = "0.0.6"
description = "Stream images in realtime with webrtc" description = "Stream images in realtime with webrtc"
readme = "README.md" readme = "README.md"
license = "apache-2.0" license = "apache-2.0"