final touches

2026-02-05 18:09:23 +08:00 · 2024-10-23 15:52:48 -07:00
parent 1688502e99
commit e87c4d49e8
3 changed files with 75 additions and 62 deletions
--- a/backend/gradio_webrtc/utils.py
+++ b/backend/gradio_webrtc/utils.py
@@ -1,7 +1,6 @@
 import asyncio
 import fractions
 import logging
-import threading
 from typing import Callable

 import av
@@ -12,11 +11,10 @@ logger = logging.getLogger(__name__)
 AUDIO_PTIME = 0.020


-def player_worker_decode(
-    loop,
+async def player_worker_decode(
    next_frame: Callable,
    queue: asyncio.Queue,
-    thread_quit: threading.Event,
+    thread_quit: asyncio.Event,
    quit_on_none: bool = False,
    sample_rate: int = 48000,
    frame_size: int = int(48000 * AUDIO_PTIME),
@@ -31,32 +29,54 @@ def player_worker_decode(
    )

    while not thread_quit.is_set():
-        frame = next_frame()
-        if frame is None:
-            if quit_on_none:
-                asyncio.run_coroutine_threadsafe(queue.put(None), loop)
+        try:
+            async with asyncio.timeout(5):
+                # Get next frame
+                frame = await next_frame()
+
+                if frame is None:
+                    if quit_on_none:
+                        await queue.put(None)
+                        break
+                    continue
+
+                if len(frame) == 2:
+                    sample_rate, audio_array = frame
+                    layout = "mono"
+                elif len(frame) == 3:
+                    sample_rate, audio_array, layout = frame
+
+                logger.debug(
+                    "received array with shape %s sample rate %s layout %s",
+                    audio_array.shape,
+                    sample_rate,
+                    layout,
+                )
+                format = "s16" if audio_array.dtype == "int16" else "fltp"
+
+                # Convert to audio frame and resample
+                # This runs in the same timeout context
+                frame = av.AudioFrame.from_ndarray(
+                    audio_array, format=format, layout=layout
+                )
+                frame.sample_rate = sample_rate
+
+                for processed_frame in audio_resampler.resample(frame):
+                    processed_frame.pts = audio_samples
+                    processed_frame.time_base = audio_time_base
+                    audio_samples += processed_frame.samples
+                    await queue.put(processed_frame)
+                    logger.debug("Queue size utils.py: %s", queue.qsize())
+
+        except TimeoutError:
+            logger.warning(
+                "Timeout in frame processing cycle after %s seconds - resetting", 5
+            )
            continue
+        except Exception as e:
+            import traceback

-        if len(frame) == 2:
-            sample_rate, audio_array = frame
-            layout = "mono"
-        elif len(frame) == 3:
-            sample_rate, audio_array, layout = frame
-
-        logger.debug(
-            "received array with shape %s sample rate %s layout %s",
-            audio_array.shape,
-            sample_rate,
-            layout,
-        )
-        format = "s16" if audio_array.dtype == "int16" else "fltp"
-
-        frame = av.AudioFrame.from_ndarray(audio_array, format=format, layout=layout)  # type: ignore
-        frame.sample_rate = sample_rate
-        for frame in audio_resampler.resample(frame):
-            # fix timestamps
-            frame.pts = audio_samples
-            frame.time_base = audio_time_base
-            audio_samples += frame.samples
-            asyncio.run_coroutine_threadsafe(queue.put(frame), loop)
-            logger.debug("Queue size utils.py: %s", queue.qsize())
+            exec = traceback.format_exc()
+            logger.debug("traceback %s", exec)
+            logger.error("Error processing frame: %s", str(e))
+            continue