code

2026-02-05 18:09:23 +08:00 · 2024-10-23 16:14:10 -07:00
parent e87c4d49e8
commit 0b3ae91415
4 changed files with 35 additions and 36 deletions
--- a/README.md
+++ b/README.md
@@ -176,7 +176,7 @@ if __name__ == "__main__":
 * An audio frame is represented as a tuple of (frame_rate, audio_samples) where `audio_samples` is a numpy array of shape (num_channels, num_samples).
 * You can also specify the audio layout ("mono" or "stereo") in the emit method by retuning it as the third element of the tuple. If not specified, the default is "mono".
 * The `time_limit` parameter is the maximum time in seconds the conversation will run. If the time limit is reached, the audio stream will stop.
-
+* The `emit` method SHOULD NOT block. If a frame is not ready to be sent, the method should return None.
 ## Deployment
--- a/backend/gradio_webrtc/utils.py
+++ b/backend/gradio_webrtc/utils.py
@@ -30,45 +30,44 @@ async def player_worker_decode(
    while not thread_quit.is_set():
        try:
-            async with asyncio.timeout(5):
+            # Get next frame
-                # Get next frame
+            frame = await asyncio.wait_for(next_frame(), timeout=5)
                frame = await next_frame()
-                if frame is None:
+            if frame is None:
-                    if quit_on_none:
+                if quit_on_none:
-                        await queue.put(None)
+                    await queue.put(None)
-                        break
+                    break
-                    continue
+                continue
-                if len(frame) == 2:
+            if len(frame) == 2:
-                    sample_rate, audio_array = frame
+                sample_rate, audio_array = frame
-                    layout = "mono"
+                layout = "mono"
-                elif len(frame) == 3:
+            elif len(frame) == 3:
-                    sample_rate, audio_array, layout = frame
+                sample_rate, audio_array, layout = frame
-                logger.debug(
+            logger.debug(
-                    "received array with shape %s sample rate %s layout %s",
+                "received array with shape %s sample rate %s layout %s",
-                    audio_array.shape,
+                audio_array.shape,
-                    sample_rate,
+                sample_rate,
-                    layout,
+                layout,
-                )
+            )
-                format = "s16" if audio_array.dtype == "int16" else "fltp"
+            format = "s16" if audio_array.dtype == "int16" else "fltp"
-                # Convert to audio frame and resample
+            # Convert to audio frame and resample
-                # This runs in the same timeout context
+            # This runs in the same timeout context
-                frame = av.AudioFrame.from_ndarray(
+            frame = av.AudioFrame.from_ndarray(
-                    audio_array, format=format, layout=layout
+                audio_array, format=format, layout=layout
-                )
+            )
-                frame.sample_rate = sample_rate
+            frame.sample_rate = sample_rate
-                for processed_frame in audio_resampler.resample(frame):
+            for processed_frame in audio_resampler.resample(frame):
-                    processed_frame.pts = audio_samples
+                processed_frame.pts = audio_samples
-                    processed_frame.time_base = audio_time_base
+                processed_frame.time_base = audio_time_base
-                    audio_samples += processed_frame.samples
+                audio_samples += processed_frame.samples
-                    await queue.put(processed_frame)
+                await queue.put(processed_frame)
-                    logger.debug("Queue size utils.py: %s", queue.qsize())
+                logger.debug("Queue size utils.py: %s", queue.qsize())
-        except TimeoutError:
+        except (TimeoutError, asyncio.TimeoutError):
            logger.warning(
                "Timeout in frame processing cycle after %s seconds - resetting", 5
            )
--- a/demo/app.py
+++ b/demo/app.py
@@ -214,7 +214,7 @@ if __name__ == "__main__":
 * An audio frame is represented as a tuple of (frame_rate, audio_samples) where `audio_samples` is a numpy array of shape (num_channels, num_samples).
 * You can also specify the audio layout ("mono" or "stereo") in the emit method by retuning it as the third element of the tuple. If not specified, the default is "mono".
 * The `time_limit` parameter is the maximum time in seconds the conversation will run. If the time limit is reached, the audio stream will stop.
-
+* The `emit` method SHOULD NOT block. If a frame is not ready to be sent, the method should return None.
 ## Deployment
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ build-backend = "hatchling.build"
 [project]
 name = "gradio_webrtc"
-version = "0.0.6a3"
+version = "0.0.6"
 description = "Stream images in realtime with webrtc"
 readme = "README.md"
 license = "apache-2.0"