From d52a21449acaf33b4a0ae8104caa45f5111f61e9 Mon Sep 17 00:00:00 2001 From: freddyaboulton Date: Fri, 11 Oct 2024 14:57:20 -0700 Subject: [PATCH] fix bugs + release version 0.0.4 --- backend/gradio_webrtc/webrtc.py | 6 ++- demo/app_orig.py | 72 +++++++++++++++++++++++++++++++++ demo/audio_out_2.py | 57 ++++++++++++++++++++++++++ demo/space.py | 1 - demo/video_out_stream.py | 52 ++++++++++++++++++++++++ frontend/shared/Webcam.svelte | 1 + pyproject.toml | 2 +- 7 files changed, 188 insertions(+), 3 deletions(-) create mode 100644 demo/app_orig.py create mode 100644 demo/audio_out_2.py create mode 100644 demo/video_out_stream.py diff --git a/backend/gradio_webrtc/webrtc.py b/backend/gradio_webrtc/webrtc.py index 00a49bf..a18edbd 100644 --- a/backend/gradio_webrtc/webrtc.py +++ b/backend/gradio_webrtc/webrtc.py @@ -369,6 +369,10 @@ class WebRTC(Component): ): from gradio.blocks import Block + if inputs is None: + inputs = [] + if outputs is None: + outputs = [] if isinstance(inputs, Block): inputs = [inputs] if isinstance(outputs, Block): @@ -404,7 +408,7 @@ class WebRTC(Component): js=js, ) elif self.mode == "receive": - if self in cast(list[Block], inputs): + if isinstance(inputs, list) and self in cast(list[Block], inputs): raise ValueError( "In the receive mode stream event, the WebRTC component cannot be an input." ) diff --git a/demo/app_orig.py b/demo/app_orig.py new file mode 100644 index 0000000..3489fab --- /dev/null +++ b/demo/app_orig.py @@ -0,0 +1,72 @@ +import gradio as gr +import cv2 +from huggingface_hub import hf_hub_download +from gradio_webrtc import WebRTC +from twilio.rest import Client +import os +from inference import YOLOv10 + +model_file = hf_hub_download( + repo_id="onnx-community/yolov10n", filename="onnx/model.onnx" +) + +model = YOLOv10(model_file) + +account_sid = os.environ.get("TWILIO_ACCOUNT_SID") +auth_token = os.environ.get("TWILIO_AUTH_TOKEN") + +if account_sid and auth_token: + client = Client(account_sid, auth_token) + + token = client.tokens.create() + + rtc_configuration = { + "iceServers": token.ice_servers, + "iceTransportPolicy": "relay", + } +else: + rtc_configuration = None + + +def detection(image, conf_threshold=0.3): + image = cv2.resize(image, (model.input_width, model.input_height)) + new_image = model.detect_objects(image, conf_threshold) + return cv2.resize(new_image, (500, 500)) + + +css = """.my-group {max-width: 600px !important; max-height: 600 !important;} + .my-column {display: flex !important; justify-content: center !important; align-items: center !important};""" + + +with gr.Blocks(css=css) as demo: + gr.HTML( + """ +

+ YOLOv10 Webcam Stream (Powered by WebRTC ⚡️) +

+ """ + ) + gr.HTML( + """ +

+ arXiv | github +

+ """ + ) + with gr.Column(elem_classes=["my-column"]): + with gr.Group(elem_classes=["my-group"]): + image = WebRTC(label="Stream", rtc_configuration=rtc_configuration) + conf_threshold = gr.Slider( + label="Confidence Threshold", + minimum=0.0, + maximum=1.0, + step=0.05, + value=0.30, + ) + + image.stream( + fn=detection, inputs=[image, conf_threshold], outputs=[image], time_limit=10 + ) + +if __name__ == "__main__": + demo.launch() diff --git a/demo/audio_out_2.py b/demo/audio_out_2.py new file mode 100644 index 0000000..ef43b6b --- /dev/null +++ b/demo/audio_out_2.py @@ -0,0 +1,57 @@ +import gradio as gr +import numpy as np +from gradio_webrtc import WebRTC +from twilio.rest import Client +import os +from pydub import AudioSegment + + + +account_sid = os.environ.get("TWILIO_ACCOUNT_SID") +auth_token = os.environ.get("TWILIO_AUTH_TOKEN") + +if account_sid and auth_token: + client = Client(account_sid, auth_token) + + token = client.tokens.create() + + rtc_configuration = { + "iceServers": token.ice_servers, + "iceTransportPolicy": "relay", + } +else: + rtc_configuration = None + +import time + +def generation(num_steps): + for _ in range(num_steps): + segment = AudioSegment.from_file("/Users/freddy/sources/gradio/demo/audio_debugger/cantina.wav") + yield (segment.frame_rate, np.array(segment.get_array_of_samples()).reshape(1, -1)) + time.sleep(3.5) + + +css = """.my-group {max-width: 600px !important; max-height: 600 !important;} + .my-column {display: flex !important; justify-content: center !important; align-items: center !important};""" + + +with gr.Blocks() as demo: + gr.HTML( + """ +

+ Audio Streaming (Powered by WebRaTC ⚡️) +

+ """ + ) + with gr.Row(): + with gr.Column(): + gr.Slider() + with gr.Column(): + # audio = gr.Audio(interactive=False) + audio = WebRTC(label="Stream", rtc_configuration=rtc_configuration, + mode="receive", modality="audio") + + + +if __name__ == "__main__": + demo.launch() diff --git a/demo/space.py b/demo/space.py index 794d05e..98813c8 100644 --- a/demo/space.py +++ b/demo/space.py @@ -69,7 +69,6 @@ else: def detection(image, conf_threshold=0.3): - print("running detection") image = cv2.resize(image, (model.input_width, model.input_height)) new_image = model.detect_objects(image, conf_threshold) return cv2.resize(new_image, (500, 500)) diff --git a/demo/video_out_stream.py b/demo/video_out_stream.py new file mode 100644 index 0000000..1aeb4d6 --- /dev/null +++ b/demo/video_out_stream.py @@ -0,0 +1,52 @@ +import gradio as gr +from gradio_webrtc import WebRTC +from twilio.rest import Client +import os +import cv2 + + +account_sid = os.environ.get("TWILIO_ACCOUNT_SID") +auth_token = os.environ.get("TWILIO_AUTH_TOKEN") + +if account_sid and auth_token: + client = Client(account_sid, auth_token) + + token = client.tokens.create() + + rtc_configuration = { + "iceServers": token.ice_servers, + "iceTransportPolicy": "relay", + } +else: + rtc_configuration = None + + +def generation(): + url = "https://download.tsi.telecom-paristech.fr/gpac/dataset/dash/uhd/mux_sources/hevcds_720p30_2M.mp4" + cap = cv2.VideoCapture(url) + iterating = True + while iterating: + iterating, frame = cap.read() + yield frame + + + +with gr.Blocks() as demo: + gr.HTML( + """ +

+ Video Streaming (Powered by WebRTC ⚡️) +

+ """ + ) + output_video = WebRTC(label="Video Stream", rtc_configuration=rtc_configuration, + mode="receive", modality="video") + button = gr.Button("Start", variant="primary") + output_video.stream( + fn=generation, inputs=None, outputs=[output_video], + trigger=button.click + ) + + +if __name__ == "__main__": + demo.launch() diff --git a/frontend/shared/Webcam.svelte b/frontend/shared/Webcam.svelte index 5313a0c..7e30e5f 100644 --- a/frontend/shared/Webcam.svelte +++ b/frontend/shared/Webcam.svelte @@ -148,6 +148,7 @@ } else { stop(pc); stream_state = "closed"; + _time_limit = null; await access_webcam(); } diff --git a/pyproject.toml b/pyproject.toml index fe590f6..08b5f20 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ build-backend = "hatchling.build" [project] name = "gradio_webrtc" -version = "0.0.2" +version = "0.0.4" description = "Stream images in realtime with webrtc" readme = "README.md" license = "apache-2.0"