mirror of
https://github.com/HumanAIGC-Engineering/gradio-webrtc.git
synced 2026-02-04 17:39:23 +08:00
109 lines
3.0 KiB
Python
109 lines
3.0 KiB
Python
import gradio as gr
|
|
import cv2
|
|
import numpy as np
|
|
from gradio_webrtc import WebRTC
|
|
from pathlib import Path
|
|
|
|
CLASSES = [
|
|
"background",
|
|
"aeroplane",
|
|
"bicycle",
|
|
"bird",
|
|
"boat",
|
|
"bottle",
|
|
"bus",
|
|
"car",
|
|
"cat",
|
|
"chair",
|
|
"cow",
|
|
"diningtable",
|
|
"dog",
|
|
"horse",
|
|
"motorbike",
|
|
"person",
|
|
"pottedplant",
|
|
"sheep",
|
|
"sofa",
|
|
"train",
|
|
"tvmonitor",
|
|
]
|
|
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
|
|
|
|
directory = Path(__file__).parent
|
|
|
|
MODEL = str((directory / "MobileNetSSD_deploy.caffemodel").resolve())
|
|
PROTOTXT = str((directory / "MobileNetSSD_deploy.prototxt.txt").resolve())
|
|
net = cv2.dnn.readNetFromCaffe(PROTOTXT, MODEL)
|
|
|
|
|
|
def detection(image, conf_threshold=0.3):
|
|
|
|
blob = cv2.dnn.blobFromImage(
|
|
cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5
|
|
)
|
|
net.setInput(blob)
|
|
|
|
detections = net.forward()
|
|
image = cv2.resize(image, (500, 500))
|
|
(h, w) = image.shape[:2]
|
|
labels = []
|
|
for i in np.arange(0, detections.shape[2]):
|
|
confidence = detections[0, 0, i, 2]
|
|
|
|
if confidence > conf_threshold:
|
|
# extract the index of the class label from the `detections`,
|
|
# then compute the (x, y)-coordinates of the bounding box for
|
|
# the object
|
|
idx = int(detections[0, 0, i, 1])
|
|
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
|
|
(startX, startY, endX, endY) = box.astype("int")
|
|
|
|
# display the prediction
|
|
label = f"{CLASSES[idx]}: {round(confidence * 100, 2)}%"
|
|
labels.append(label)
|
|
cv2.rectangle(image, (startX, startY), (endX, endY), COLORS[idx], 2)
|
|
y = startY - 15 if startY - 15 > 15 else startY + 15
|
|
cv2.putText(
|
|
image, label, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2
|
|
)
|
|
return image
|
|
|
|
|
|
css=""".my-group {max-width: 600px !important; max-height: 600 !important;}
|
|
.my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""
|
|
|
|
|
|
with gr.Blocks(css=css) as demo:
|
|
gr.HTML(
|
|
"""
|
|
<h1 style='text-align: center'>
|
|
YOLOv10 Webcam Stream
|
|
</h1>
|
|
""")
|
|
gr.HTML(
|
|
"""
|
|
<h3 style='text-align: center'>
|
|
<a href='https://arxiv.org/abs/2405.14458' target='_blank'>arXiv</a> | <a href='https://github.com/THU-MIG/yolov10' target='_blank'>github</a>
|
|
</h3>
|
|
""")
|
|
with gr.Column(elem_classes=["my-column"]):
|
|
with gr.Group(elem_classes=["my-group"]):
|
|
image = WebRTC(label="Strean")
|
|
conf_threshold = gr.Slider(
|
|
label="Confidence Threshold",
|
|
minimum=0.0,
|
|
maximum=1.0,
|
|
step=0.05,
|
|
value=0.30,
|
|
)
|
|
|
|
image.webrtc_stream(
|
|
fn=detection,
|
|
inputs=[image],
|
|
stream_every=0.05,
|
|
time_limit=30
|
|
)
|
|
|
|
if __name__ == '__main__':
|
|
demo.launch()
|