working prototype

2026-02-05 18:09:23 +08:00 · 2024-10-17 15:34:57 -07:00
parent 35c2e313d2
commit cff6073df0
18 changed files with 1240 additions and 496 deletions
--- a/demo/app.py
+++ b/demo/app.py
@@ -1,72 +1,63 @@
+import logging
+
+# Configure the root logger to WARNING to suppress debug messages from other libraries
+logging.basicConfig(level=logging.WARNING)
+
+# Create a console handler
+console_handler = logging.StreamHandler()
+console_handler.setLevel(logging.DEBUG)
+
+# Create a formatter
+formatter = logging.Formatter("%(name)s - %(levelname)s - %(message)s")
+console_handler.setFormatter(formatter)
+
+# Configure the logger for your specific library
+logger = logging.getLogger("gradio_webrtc")
+logger.setLevel(logging.DEBUG)
+logger.addHandler(console_handler)
+
+
 import gradio as gr
-import cv2
-from huggingface_hub import hf_hub_download
-from gradio_webrtc import WebRTC
-from twilio.rest import Client
-import os
-from inference import YOLOv10
-
-model_file = hf_hub_download(
-    repo_id="onnx-community/yolov10n", filename="onnx/model.onnx"
-)
-
-model = YOLOv10(model_file)
-
-account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
-auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
-
-if account_sid and auth_token:
-    client = Client(account_sid, auth_token)
-
-    token = client.tokens.create()
-
-    rtc_configuration = {
-        "iceServers": token.ice_servers,
-        "iceTransportPolicy": "relay",
-    }
-else:
-    rtc_configuration = None
+import numpy as np
+from gradio_webrtc import WebRTC, StreamHandler
+from queue import Queue
+import time


-def detection(image, conf_threshold=0.3):
-    image = cv2.resize(image, (model.input_width, model.input_height))
-    new_image = model.detect_objects(image, conf_threshold)
-    return cv2.resize(new_image, (500, 500))
+class EchoHandler(StreamHandler):
+    def __init__(self) -> None:
+        self.queue = Queue()
+
+    def receive(self, frame: tuple[int, np.ndarray] | np.ndarray) -> None:
+        self.queue.put(frame)
+
+    def emit(self) -> None:
+        return self.queue.get()


 css = """.my-group {max-width: 600px !important; max-height: 600 !important;}
                      .my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""


-with gr.Blocks(css=css) as demo:
+with gr.Blocks() as demo:
    gr.HTML(
        """
    <h1 style='text-align: center'>
-    YOLOv10 Webcam Stream (Powered by WebRTC ⚡️)
+    Audio Streaming (Powered by WebRTC ⚡️)
    </h1>
    """
    )
-    gr.HTML(
-        """
-        <h3 style='text-align: center'>
-        <a href='https://arxiv.org/abs/2405.14458' target='_blank'>arXiv</a> | <a href='https://github.com/THU-MIG/yolov10' target='_blank'>github</a>
-        </h3>
-        """
-    )
    with gr.Column(elem_classes=["my-column"]):
        with gr.Group(elem_classes=["my-group"]):
-            image = WebRTC(label="Stream", rtc_configuration=rtc_configuration)
-            conf_threshold = gr.Slider(
-                label="Confidence Threshold",
-                minimum=0.0,
-                maximum=1.0,
-                step=0.05,
-                value=0.30,
+            audio = WebRTC(
+                label="Stream",
+                rtc_configuration=None,
+                mode="send-receive",
+                modality="audio",
            )

-        image.stream(
-            fn=detection, inputs=[image, conf_threshold], outputs=[image], time_limit=10
-        )
+        audio.stream(fn=EchoHandler(), inputs=[audio], outputs=[audio], time_limit=15)
+

 if __name__ == "__main__":
    demo.launch()