This commit is contained in:
freddyaboulton
2024-10-23 16:14:10 -07:00
parent e87c4d49e8
commit 0b3ae91415
4 changed files with 35 additions and 36 deletions

View File

@@ -176,7 +176,7 @@ if __name__ == "__main__":
* An audio frame is represented as a tuple of (frame_rate, audio_samples) where `audio_samples` is a numpy array of shape (num_channels, num_samples).
* You can also specify the audio layout ("mono" or "stereo") in the emit method by retuning it as the third element of the tuple. If not specified, the default is "mono".
* The `time_limit` parameter is the maximum time in seconds the conversation will run. If the time limit is reached, the audio stream will stop.
* The `emit` method SHOULD NOT block. If a frame is not ready to be sent, the method should return None.
## Deployment

View File

@@ -30,45 +30,44 @@ async def player_worker_decode(
while not thread_quit.is_set():
try:
async with asyncio.timeout(5):
# Get next frame
frame = await next_frame()
# Get next frame
frame = await asyncio.wait_for(next_frame(), timeout=5)
if frame is None:
if quit_on_none:
await queue.put(None)
break
continue
if frame is None:
if quit_on_none:
await queue.put(None)
break
continue
if len(frame) == 2:
sample_rate, audio_array = frame
layout = "mono"
elif len(frame) == 3:
sample_rate, audio_array, layout = frame
if len(frame) == 2:
sample_rate, audio_array = frame
layout = "mono"
elif len(frame) == 3:
sample_rate, audio_array, layout = frame
logger.debug(
"received array with shape %s sample rate %s layout %s",
audio_array.shape,
sample_rate,
layout,
)
format = "s16" if audio_array.dtype == "int16" else "fltp"
logger.debug(
"received array with shape %s sample rate %s layout %s",
audio_array.shape,
sample_rate,
layout,
)
format = "s16" if audio_array.dtype == "int16" else "fltp"
# Convert to audio frame and resample
# This runs in the same timeout context
frame = av.AudioFrame.from_ndarray(
audio_array, format=format, layout=layout
)
frame.sample_rate = sample_rate
# Convert to audio frame and resample
# This runs in the same timeout context
frame = av.AudioFrame.from_ndarray(
audio_array, format=format, layout=layout
)
frame.sample_rate = sample_rate
for processed_frame in audio_resampler.resample(frame):
processed_frame.pts = audio_samples
processed_frame.time_base = audio_time_base
audio_samples += processed_frame.samples
await queue.put(processed_frame)
logger.debug("Queue size utils.py: %s", queue.qsize())
for processed_frame in audio_resampler.resample(frame):
processed_frame.pts = audio_samples
processed_frame.time_base = audio_time_base
audio_samples += processed_frame.samples
await queue.put(processed_frame)
logger.debug("Queue size utils.py: %s", queue.qsize())
except TimeoutError:
except (TimeoutError, asyncio.TimeoutError):
logger.warning(
"Timeout in frame processing cycle after %s seconds - resetting", 5
)

View File

@@ -214,7 +214,7 @@ if __name__ == "__main__":
* An audio frame is represented as a tuple of (frame_rate, audio_samples) where `audio_samples` is a numpy array of shape (num_channels, num_samples).
* You can also specify the audio layout ("mono" or "stereo") in the emit method by retuning it as the third element of the tuple. If not specified, the default is "mono".
* The `time_limit` parameter is the maximum time in seconds the conversation will run. If the time limit is reached, the audio stream will stop.
* The `emit` method SHOULD NOT block. If a frame is not ready to be sent, the method should return None.
## Deployment

View File

@@ -8,7 +8,7 @@ build-backend = "hatchling.build"
[project]
name = "gradio_webrtc"
version = "0.0.6a3"
version = "0.0.6"
description = "Stream images in realtime with webrtc"
readme = "README.md"
license = "apache-2.0"