From 0b3ae914159d01f25d9eb2c69123e13b4fb85284 Mon Sep 17 00:00:00 2001
From: freddyaboulton <alfonsoboulton@gmail.com>
Date: Wed, 23 Oct 2024 16:14:10 -0700
Subject: [PATCH] code

---
 README.md                      |  2 +-
 backend/gradio_webrtc/utils.py | 65 +++++++++++++++++-----------------
 demo/app.py                    |  2 +-
 pyproject.toml                 |  2 +-
 4 files changed, 35 insertions(+), 36 deletions(-)

diff --git a/README.md b/README.md
index f3dea4c..1ef99ad 100644
--- a/README.md
+++ b/README.md
@@ -176,7 +176,7 @@ if __name__ == "__main__":
 * An audio frame is represented as a tuple of (frame_rate, audio_samples) where `audio_samples` is a numpy array of shape (num_channels, num_samples).
 * You can also specify the audio layout ("mono" or "stereo") in the emit method by retuning it as the third element of the tuple. If not specified, the default is "mono".
 * The `time_limit` parameter is the maximum time in seconds the conversation will run. If the time limit is reached, the audio stream will stop.
-
+* The `emit` method SHOULD NOT block. If a frame is not ready to be sent, the method should return None.
 
 ## Deployment
 
diff --git a/backend/gradio_webrtc/utils.py b/backend/gradio_webrtc/utils.py
index 16b75c3..be360a3 100644
--- a/backend/gradio_webrtc/utils.py
+++ b/backend/gradio_webrtc/utils.py
@@ -30,45 +30,44 @@ async def player_worker_decode(
 
     while not thread_quit.is_set():
         try:
-            async with asyncio.timeout(5):
-                # Get next frame
-                frame = await next_frame()
+            # Get next frame
+            frame = await asyncio.wait_for(next_frame(), timeout=5)
 
-                if frame is None:
-                    if quit_on_none:
-                        await queue.put(None)
-                        break
-                    continue
+            if frame is None:
+                if quit_on_none:
+                    await queue.put(None)
+                    break
+                continue
 
-                if len(frame) == 2:
-                    sample_rate, audio_array = frame
-                    layout = "mono"
-                elif len(frame) == 3:
-                    sample_rate, audio_array, layout = frame
+            if len(frame) == 2:
+                sample_rate, audio_array = frame
+                layout = "mono"
+            elif len(frame) == 3:
+                sample_rate, audio_array, layout = frame
 
-                logger.debug(
-                    "received array with shape %s sample rate %s layout %s",
-                    audio_array.shape,
-                    sample_rate,
-                    layout,
-                )
-                format = "s16" if audio_array.dtype == "int16" else "fltp"
+            logger.debug(
+                "received array with shape %s sample rate %s layout %s",
+                audio_array.shape,
+                sample_rate,
+                layout,
+            )
+            format = "s16" if audio_array.dtype == "int16" else "fltp"
 
-                # Convert to audio frame and resample
-                # This runs in the same timeout context
-                frame = av.AudioFrame.from_ndarray(
-                    audio_array, format=format, layout=layout
-                )
-                frame.sample_rate = sample_rate
+            # Convert to audio frame and resample
+            # This runs in the same timeout context
+            frame = av.AudioFrame.from_ndarray(
+                audio_array, format=format, layout=layout
+            )
+            frame.sample_rate = sample_rate
 
-                for processed_frame in audio_resampler.resample(frame):
-                    processed_frame.pts = audio_samples
-                    processed_frame.time_base = audio_time_base
-                    audio_samples += processed_frame.samples
-                    await queue.put(processed_frame)
-                    logger.debug("Queue size utils.py: %s", queue.qsize())
+            for processed_frame in audio_resampler.resample(frame):
+                processed_frame.pts = audio_samples
+                processed_frame.time_base = audio_time_base
+                audio_samples += processed_frame.samples
+                await queue.put(processed_frame)
+                logger.debug("Queue size utils.py: %s", queue.qsize())
 
-        except TimeoutError:
+        except (TimeoutError, asyncio.TimeoutError):
             logger.warning(
                 "Timeout in frame processing cycle after %s seconds - resetting", 5
             )
diff --git a/demo/app.py b/demo/app.py
index a34c83f..6dfb4e4 100644
--- a/demo/app.py
+++ b/demo/app.py
@@ -214,7 +214,7 @@ if __name__ == "__main__":
 * An audio frame is represented as a tuple of (frame_rate, audio_samples) where `audio_samples` is a numpy array of shape (num_channels, num_samples).
 * You can also specify the audio layout ("mono" or "stereo") in the emit method by retuning it as the third element of the tuple. If not specified, the default is "mono".
 * The `time_limit` parameter is the maximum time in seconds the conversation will run. If the time limit is reached, the audio stream will stop.
-
+* The `emit` method SHOULD NOT block. If a frame is not ready to be sent, the method should return None.
 
 ## Deployment
 
diff --git a/pyproject.toml b/pyproject.toml
index 3aaed1e..f75bd17 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "gradio_webrtc"
-version = "0.0.6a3"
+version = "0.0.6"
 description = "Stream images in realtime with webrtc"
 readme = "README.md"
 license = "apache-2.0"