mirror of
https://github.com/HumanAIGC-Engineering/gradio-webrtc.git
synced 2026-02-05 18:09:23 +08:00
working prototype
This commit is contained in:
91
demo/app.py
91
demo/app.py
@@ -1,72 +1,63 @@
|
||||
import logging
|
||||
|
||||
# Configure the root logger to WARNING to suppress debug messages from other libraries
|
||||
logging.basicConfig(level=logging.WARNING)
|
||||
|
||||
# Create a console handler
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setLevel(logging.DEBUG)
|
||||
|
||||
# Create a formatter
|
||||
formatter = logging.Formatter("%(name)s - %(levelname)s - %(message)s")
|
||||
console_handler.setFormatter(formatter)
|
||||
|
||||
# Configure the logger for your specific library
|
||||
logger = logging.getLogger("gradio_webrtc")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
|
||||
import gradio as gr
|
||||
import cv2
|
||||
from huggingface_hub import hf_hub_download
|
||||
from gradio_webrtc import WebRTC
|
||||
from twilio.rest import Client
|
||||
import os
|
||||
from inference import YOLOv10
|
||||
|
||||
model_file = hf_hub_download(
|
||||
repo_id="onnx-community/yolov10n", filename="onnx/model.onnx"
|
||||
)
|
||||
|
||||
model = YOLOv10(model_file)
|
||||
|
||||
account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
|
||||
auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
|
||||
|
||||
if account_sid and auth_token:
|
||||
client = Client(account_sid, auth_token)
|
||||
|
||||
token = client.tokens.create()
|
||||
|
||||
rtc_configuration = {
|
||||
"iceServers": token.ice_servers,
|
||||
"iceTransportPolicy": "relay",
|
||||
}
|
||||
else:
|
||||
rtc_configuration = None
|
||||
import numpy as np
|
||||
from gradio_webrtc import WebRTC, StreamHandler
|
||||
from queue import Queue
|
||||
import time
|
||||
|
||||
|
||||
def detection(image, conf_threshold=0.3):
|
||||
image = cv2.resize(image, (model.input_width, model.input_height))
|
||||
new_image = model.detect_objects(image, conf_threshold)
|
||||
return cv2.resize(new_image, (500, 500))
|
||||
class EchoHandler(StreamHandler):
|
||||
def __init__(self) -> None:
|
||||
self.queue = Queue()
|
||||
|
||||
def receive(self, frame: tuple[int, np.ndarray] | np.ndarray) -> None:
|
||||
self.queue.put(frame)
|
||||
|
||||
def emit(self) -> None:
|
||||
return self.queue.get()
|
||||
|
||||
|
||||
css = """.my-group {max-width: 600px !important; max-height: 600 !important;}
|
||||
.my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""
|
||||
|
||||
|
||||
with gr.Blocks(css=css) as demo:
|
||||
with gr.Blocks() as demo:
|
||||
gr.HTML(
|
||||
"""
|
||||
<h1 style='text-align: center'>
|
||||
YOLOv10 Webcam Stream (Powered by WebRTC ⚡️)
|
||||
Audio Streaming (Powered by WebRTC ⚡️)
|
||||
</h1>
|
||||
"""
|
||||
)
|
||||
gr.HTML(
|
||||
"""
|
||||
<h3 style='text-align: center'>
|
||||
<a href='https://arxiv.org/abs/2405.14458' target='_blank'>arXiv</a> | <a href='https://github.com/THU-MIG/yolov10' target='_blank'>github</a>
|
||||
</h3>
|
||||
"""
|
||||
)
|
||||
with gr.Column(elem_classes=["my-column"]):
|
||||
with gr.Group(elem_classes=["my-group"]):
|
||||
image = WebRTC(label="Stream", rtc_configuration=rtc_configuration)
|
||||
conf_threshold = gr.Slider(
|
||||
label="Confidence Threshold",
|
||||
minimum=0.0,
|
||||
maximum=1.0,
|
||||
step=0.05,
|
||||
value=0.30,
|
||||
audio = WebRTC(
|
||||
label="Stream",
|
||||
rtc_configuration=None,
|
||||
mode="send-receive",
|
||||
modality="audio",
|
||||
)
|
||||
|
||||
image.stream(
|
||||
fn=detection, inputs=[image, conf_threshold], outputs=[image], time_limit=10
|
||||
)
|
||||
audio.stream(fn=EchoHandler(), inputs=[audio], outputs=[audio], time_limit=15)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.launch()
|
||||
|
||||
@@ -6,7 +6,6 @@ import os
|
||||
from pydub import AudioSegment
|
||||
|
||||
|
||||
|
||||
account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
|
||||
auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
|
||||
|
||||
@@ -24,10 +23,16 @@ else:
|
||||
|
||||
import time
|
||||
|
||||
|
||||
def generation(num_steps):
|
||||
for _ in range(num_steps):
|
||||
segment = AudioSegment.from_file("/Users/freddy/sources/gradio/demo/audio_debugger/cantina.wav")
|
||||
yield (segment.frame_rate, np.array(segment.get_array_of_samples()).reshape(1, -1))
|
||||
segment = AudioSegment.from_file(
|
||||
"/Users/freddy/sources/gradio/demo/audio_debugger/cantina.wav"
|
||||
)
|
||||
yield (
|
||||
segment.frame_rate,
|
||||
np.array(segment.get_array_of_samples()).reshape(1, -1),
|
||||
)
|
||||
time.sleep(3.5)
|
||||
|
||||
|
||||
@@ -45,8 +50,12 @@ with gr.Blocks() as demo:
|
||||
)
|
||||
with gr.Column(elem_classes=["my-column"]):
|
||||
with gr.Group(elem_classes=["my-group"]):
|
||||
audio = WebRTC(label="Stream", rtc_configuration=rtc_configuration,
|
||||
mode="receive", modality="audio")
|
||||
audio = WebRTC(
|
||||
label="Stream",
|
||||
rtc_configuration=rtc_configuration,
|
||||
mode="receive",
|
||||
modality="audio",
|
||||
)
|
||||
num_steps = gr.Slider(
|
||||
label="Number of Steps",
|
||||
minimum=1,
|
||||
@@ -57,8 +66,7 @@ with gr.Blocks() as demo:
|
||||
button = gr.Button("Generate")
|
||||
|
||||
audio.stream(
|
||||
fn=generation, inputs=[num_steps], outputs=[audio],
|
||||
trigger=button.click
|
||||
fn=generation, inputs=[num_steps], outputs=[audio], trigger=button.click
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -6,7 +6,6 @@ import os
|
||||
from pydub import AudioSegment
|
||||
|
||||
|
||||
|
||||
account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
|
||||
auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
|
||||
|
||||
@@ -24,10 +23,16 @@ else:
|
||||
|
||||
import time
|
||||
|
||||
|
||||
def generation(num_steps):
|
||||
for _ in range(num_steps):
|
||||
segment = AudioSegment.from_file("/Users/freddy/sources/gradio/demo/audio_debugger/cantina.wav")
|
||||
yield (segment.frame_rate, np.array(segment.get_array_of_samples()).reshape(1, -1))
|
||||
segment = AudioSegment.from_file(
|
||||
"/Users/freddy/sources/gradio/demo/audio_debugger/cantina.wav"
|
||||
)
|
||||
yield (
|
||||
segment.frame_rate,
|
||||
np.array(segment.get_array_of_samples()).reshape(1, -1),
|
||||
)
|
||||
time.sleep(3.5)
|
||||
|
||||
|
||||
@@ -48,9 +53,12 @@ with gr.Blocks() as demo:
|
||||
gr.Slider()
|
||||
with gr.Column():
|
||||
# audio = gr.Audio(interactive=False)
|
||||
audio = WebRTC(label="Stream", rtc_configuration=rtc_configuration,
|
||||
mode="receive", modality="audio")
|
||||
|
||||
audio = WebRTC(
|
||||
label="Stream",
|
||||
rtc_configuration=rtc_configuration,
|
||||
mode="receive",
|
||||
modality="audio",
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
136
demo/space.py
136
demo/space.py
@@ -1,26 +1,91 @@
|
||||
|
||||
import gradio as gr
|
||||
import os
|
||||
|
||||
_docs = {'WebRTC':
|
||||
{'description': 'Stream audio/video with WebRTC',
|
||||
'members': {'__init__':
|
||||
{
|
||||
'rtc_configuration': {'type': 'dict[str, Any] | None', 'default': 'None', 'description': "The configration dictionary to pass to the RTCPeerConnection constructor. If None, the default configuration is used."},
|
||||
'height': {'type': 'int | str | None', 'default': 'None', 'description': 'The height of the component, specified in pixels if a number is passed, or in CSS units if a string is passed. This has no effect on the preprocessed video file, but will affect the displayed video.'},
|
||||
'width': {'type': 'int | str | None', 'default': 'None', 'description': 'The width of the component, specified in pixels if a number is passed, or in CSS units if a string is passed. This has no effect on the preprocessed video file, but will affect the displayed video.'},
|
||||
'label': {'type': 'str | None', 'default': 'None', 'description': 'the label for this component. Appears above the component and is also used as the header if there are a table of examples for this component. If None and used in a `gr.Interface`, the label will be the name of the parameter this component is assigned to.'},
|
||||
'show_label': {'type': 'bool | None', 'default': 'None', 'description': 'if True, will display label.'}, 'container': {'type': 'bool', 'default': 'True', 'description': 'if True, will place the component in a container - providing some extra padding around the border.'},
|
||||
'scale': {'type': 'int | None', 'default': 'None', 'description': 'relative size compared to adjacent Components. For example if Components A and B are in a Row, and A has scale=2, and B has scale=1, A will be twice as wide as B. Should be an integer. scale applies in Rows, and to top-level Components in Blocks where fill_height=True.'},
|
||||
'min_width': {'type': 'int', 'default': '160', 'description': 'minimum pixel width, will wrap if not sufficient screen space to satisfy this value. If a certain scale value results in this Component being narrower than min_width, the min_width parameter will be respected first.'},
|
||||
'interactive': {'type': 'bool | None', 'default': 'None', 'description': 'if True, will allow users to upload a video; if False, can only be used to display videos. If not provided, this is inferred based on whether the component is used as an input or output.'}, 'visible': {'type': 'bool', 'default': 'True', 'description': 'if False, component will be hidden.'},
|
||||
'elem_id': {'type': 'str | None', 'default': 'None', 'description': 'an optional string that is assigned as the id of this component in the HTML DOM. Can be used for targeting CSS styles.'},
|
||||
'elem_classes': {'type': 'list[str] | str | None', 'default': 'None', 'description': 'an optional list of strings that are assigned as the classes of this component in the HTML DOM. Can be used for targeting CSS styles.'},
|
||||
'render': {'type': 'bool', 'default': 'True', 'description': 'if False, component will not render be rendered in the Blocks context. Should be used if the intention is to assign event listeners now but render the component later.'},
|
||||
'key': {'type': 'int | str | None', 'default': 'None', 'description': 'if assigned, will be used to assume identity across a re-render. Components that have the same key across a re-render will have their value preserved.'},
|
||||
'mirror_webcam': {'type': 'bool', 'default': 'True', 'description': 'if True webcam will be mirrored. Default is True.'},
|
||||
},
|
||||
'events': {'tick': {'type': None, 'default': None, 'description': ''}}}, '__meta__': {'additional_interfaces': {}, 'user_fn_refs': {'WebRTC': []}}}
|
||||
_docs = {
|
||||
"WebRTC": {
|
||||
"description": "Stream audio/video with WebRTC",
|
||||
"members": {
|
||||
"__init__": {
|
||||
"rtc_configuration": {
|
||||
"type": "dict[str, Any] | None",
|
||||
"default": "None",
|
||||
"description": "The configration dictionary to pass to the RTCPeerConnection constructor. If None, the default configuration is used.",
|
||||
},
|
||||
"height": {
|
||||
"type": "int | str | None",
|
||||
"default": "None",
|
||||
"description": "The height of the component, specified in pixels if a number is passed, or in CSS units if a string is passed. This has no effect on the preprocessed video file, but will affect the displayed video.",
|
||||
},
|
||||
"width": {
|
||||
"type": "int | str | None",
|
||||
"default": "None",
|
||||
"description": "The width of the component, specified in pixels if a number is passed, or in CSS units if a string is passed. This has no effect on the preprocessed video file, but will affect the displayed video.",
|
||||
},
|
||||
"label": {
|
||||
"type": "str | None",
|
||||
"default": "None",
|
||||
"description": "the label for this component. Appears above the component and is also used as the header if there are a table of examples for this component. If None and used in a `gr.Interface`, the label will be the name of the parameter this component is assigned to.",
|
||||
},
|
||||
"show_label": {
|
||||
"type": "bool | None",
|
||||
"default": "None",
|
||||
"description": "if True, will display label.",
|
||||
},
|
||||
"container": {
|
||||
"type": "bool",
|
||||
"default": "True",
|
||||
"description": "if True, will place the component in a container - providing some extra padding around the border.",
|
||||
},
|
||||
"scale": {
|
||||
"type": "int | None",
|
||||
"default": "None",
|
||||
"description": "relative size compared to adjacent Components. For example if Components A and B are in a Row, and A has scale=2, and B has scale=1, A will be twice as wide as B. Should be an integer. scale applies in Rows, and to top-level Components in Blocks where fill_height=True.",
|
||||
},
|
||||
"min_width": {
|
||||
"type": "int",
|
||||
"default": "160",
|
||||
"description": "minimum pixel width, will wrap if not sufficient screen space to satisfy this value. If a certain scale value results in this Component being narrower than min_width, the min_width parameter will be respected first.",
|
||||
},
|
||||
"interactive": {
|
||||
"type": "bool | None",
|
||||
"default": "None",
|
||||
"description": "if True, will allow users to upload a video; if False, can only be used to display videos. If not provided, this is inferred based on whether the component is used as an input or output.",
|
||||
},
|
||||
"visible": {
|
||||
"type": "bool",
|
||||
"default": "True",
|
||||
"description": "if False, component will be hidden.",
|
||||
},
|
||||
"elem_id": {
|
||||
"type": "str | None",
|
||||
"default": "None",
|
||||
"description": "an optional string that is assigned as the id of this component in the HTML DOM. Can be used for targeting CSS styles.",
|
||||
},
|
||||
"elem_classes": {
|
||||
"type": "list[str] | str | None",
|
||||
"default": "None",
|
||||
"description": "an optional list of strings that are assigned as the classes of this component in the HTML DOM. Can be used for targeting CSS styles.",
|
||||
},
|
||||
"render": {
|
||||
"type": "bool",
|
||||
"default": "True",
|
||||
"description": "if False, component will not render be rendered in the Blocks context. Should be used if the intention is to assign event listeners now but render the component later.",
|
||||
},
|
||||
"key": {
|
||||
"type": "int | str | None",
|
||||
"default": "None",
|
||||
"description": "if assigned, will be used to assume identity across a re-render. Components that have the same key across a re-render will have their value preserved.",
|
||||
},
|
||||
"mirror_webcam": {
|
||||
"type": "bool",
|
||||
"default": "True",
|
||||
"description": "if True webcam will be mirrored. Default is True.",
|
||||
},
|
||||
},
|
||||
"events": {"tick": {"type": None, "default": None, "description": ""}},
|
||||
},
|
||||
"__meta__": {"additional_interfaces": {}, "user_fn_refs": {"WebRTC": []}},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -36,16 +101,19 @@ with gr.Blocks(
|
||||
),
|
||||
) as demo:
|
||||
gr.Markdown(
|
||||
"""
|
||||
"""
|
||||
<h1 style='text-align: center; margin-bottom: 1rem'> Gradio WebRTC ⚡️ </h1>
|
||||
|
||||
<div style="display: flex; flex-direction: row; justify-content: center">
|
||||
<img style="display: block; padding-right: 5px; height: 20px;" alt="Static Badge" src="https://img.shields.io/badge/version%20-%200.0.5%20-%20orange">
|
||||
<a href="https://github.com/freddyaboulton/gradio-webrtc" target="_blank"><img alt="Static Badge" src="https://img.shields.io/badge/github-white?logo=github&logoColor=black"></a>
|
||||
</div>
|
||||
""", elem_classes=["md-custom"], header_links=True)
|
||||
""",
|
||||
elem_classes=["md-custom"],
|
||||
header_links=True,
|
||||
)
|
||||
gr.Markdown(
|
||||
"""
|
||||
"""
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
@@ -195,17 +263,24 @@ with gr.Blocks() as demo:
|
||||
rtc = WebRTC(rtc_configuration=rtc_configuration, ...)
|
||||
...
|
||||
```
|
||||
""", elem_classes=["md-custom"], header_links=True)
|
||||
""",
|
||||
elem_classes=["md-custom"],
|
||||
header_links=True,
|
||||
)
|
||||
|
||||
|
||||
gr.Markdown("""
|
||||
gr.Markdown(
|
||||
"""
|
||||
##
|
||||
""", elem_classes=["md-custom"], header_links=True)
|
||||
""",
|
||||
elem_classes=["md-custom"],
|
||||
header_links=True,
|
||||
)
|
||||
|
||||
gr.ParamViewer(value=_docs["WebRTC"]["members"]["__init__"], linkify=[])
|
||||
|
||||
|
||||
demo.load(None, js=r"""function() {
|
||||
demo.load(
|
||||
None,
|
||||
js=r"""function() {
|
||||
const refs = {};
|
||||
const user_fn_refs = {
|
||||
WebRTC: [], };
|
||||
@@ -239,6 +314,7 @@ with gr.Blocks() as demo:
|
||||
})
|
||||
}
|
||||
|
||||
""")
|
||||
""",
|
||||
)
|
||||
|
||||
demo.launch()
|
||||
|
||||
@@ -24,7 +24,6 @@ else:
|
||||
def generation(input_video):
|
||||
cap = cv2.VideoCapture(input_video)
|
||||
|
||||
|
||||
iterating = True
|
||||
|
||||
while iterating:
|
||||
@@ -35,6 +34,7 @@ def generation(input_video):
|
||||
display_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||||
yield display_frame
|
||||
|
||||
|
||||
with gr.Blocks() as demo:
|
||||
gr.HTML(
|
||||
"""
|
||||
@@ -47,11 +47,17 @@ with gr.Blocks() as demo:
|
||||
with gr.Column():
|
||||
input_video = gr.Video(sources="upload")
|
||||
with gr.Column():
|
||||
output_video = WebRTC(label="Video Stream", rtc_configuration=rtc_configuration,
|
||||
mode="receive", modality="video")
|
||||
output_video = WebRTC(
|
||||
label="Video Stream",
|
||||
rtc_configuration=rtc_configuration,
|
||||
mode="receive",
|
||||
modality="video",
|
||||
)
|
||||
output_video.stream(
|
||||
fn=generation, inputs=[input_video], outputs=[output_video],
|
||||
trigger=input_video.upload
|
||||
fn=generation,
|
||||
inputs=[input_video],
|
||||
outputs=[output_video],
|
||||
trigger=input_video.upload,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -30,7 +30,6 @@ def generation():
|
||||
yield frame
|
||||
|
||||
|
||||
|
||||
with gr.Blocks() as demo:
|
||||
gr.HTML(
|
||||
"""
|
||||
@@ -39,12 +38,15 @@ with gr.Blocks() as demo:
|
||||
</h1>
|
||||
"""
|
||||
)
|
||||
output_video = WebRTC(label="Video Stream", rtc_configuration=rtc_configuration,
|
||||
mode="receive", modality="video")
|
||||
output_video = WebRTC(
|
||||
label="Video Stream",
|
||||
rtc_configuration=rtc_configuration,
|
||||
mode="receive",
|
||||
modality="video",
|
||||
)
|
||||
button = gr.Button("Start", variant="primary")
|
||||
output_video.stream(
|
||||
fn=generation, inputs=None, outputs=[output_video],
|
||||
trigger=button.click
|
||||
fn=generation, inputs=None, outputs=[output_video], trigger=button.click
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user