mirror of
https://github.com/HumanAIGC-Engineering/gradio-webrtc.git
synced 2026-02-05 18:09:23 +08:00
Working draft
This commit is contained in:
44
demo/README.md
Normal file
44
demo/README.md
Normal file
@@ -0,0 +1,44 @@
|
||||
---
|
||||
license: mit
|
||||
tags:
|
||||
- object-detection
|
||||
- computer-vision
|
||||
- yolov10
|
||||
datasets:
|
||||
- detection-datasets/coco
|
||||
sdk: gradio
|
||||
sdk_version: 5.0.0b1
|
||||
---
|
||||
|
||||
### Model Description
|
||||
[YOLOv10: Real-Time End-to-End Object Detection](https://arxiv.org/abs/2405.14458v1)
|
||||
|
||||
- arXiv: https://arxiv.org/abs/2405.14458v1
|
||||
- github: https://github.com/THU-MIG/yolov10
|
||||
|
||||
### Installation
|
||||
```
|
||||
pip install supervision git+https://github.com/THU-MIG/yolov10.git
|
||||
```
|
||||
|
||||
### Yolov10 Inference
|
||||
```python
|
||||
from ultralytics import YOLOv10
|
||||
import supervision as sv
|
||||
import cv2
|
||||
|
||||
IMAGE_PATH = 'dog.jpeg'
|
||||
|
||||
model = YOLOv10.from_pretrained('jameslahm/yolov10{n/s/m/b/l/x}')
|
||||
model.predict(IMAGE_PATH, show=True)
|
||||
```
|
||||
|
||||
### BibTeX Entry and Citation Info
|
||||
```
|
||||
@article{wang2024yolov10,
|
||||
title={YOLOv10: Real-Time End-to-End Object Detection},
|
||||
author={Wang, Ao and Chen, Hui and Liu, Lihao and Chen, Kai and Lin, Zijia and Han, Jungong and Ding, Guiguang},
|
||||
journal={arXiv preprint arXiv:2405.14458},
|
||||
year={2024}
|
||||
}
|
||||
```
|
||||
0
demo/__init__.py
Normal file
0
demo/__init__.py
Normal file
108
demo/app.py
Normal file
108
demo/app.py
Normal file
@@ -0,0 +1,108 @@
|
||||
import gradio as gr
|
||||
import cv2
|
||||
import numpy as np
|
||||
from gradio_webrtc import WebRTC
|
||||
from pathlib import Path
|
||||
|
||||
CLASSES = [
|
||||
"background",
|
||||
"aeroplane",
|
||||
"bicycle",
|
||||
"bird",
|
||||
"boat",
|
||||
"bottle",
|
||||
"bus",
|
||||
"car",
|
||||
"cat",
|
||||
"chair",
|
||||
"cow",
|
||||
"diningtable",
|
||||
"dog",
|
||||
"horse",
|
||||
"motorbike",
|
||||
"person",
|
||||
"pottedplant",
|
||||
"sheep",
|
||||
"sofa",
|
||||
"train",
|
||||
"tvmonitor",
|
||||
]
|
||||
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
|
||||
|
||||
directory = Path(__file__).parent
|
||||
|
||||
MODEL = str((directory / "MobileNetSSD_deploy.caffemodel").resolve())
|
||||
PROTOTXT = str((directory / "MobileNetSSD_deploy.prototxt.txt").resolve())
|
||||
net = cv2.dnn.readNetFromCaffe(PROTOTXT, MODEL)
|
||||
|
||||
|
||||
def detection(image, conf_threshold=0.3):
|
||||
|
||||
blob = cv2.dnn.blobFromImage(
|
||||
cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5
|
||||
)
|
||||
net.setInput(blob)
|
||||
|
||||
detections = net.forward()
|
||||
image = cv2.resize(image, (500, 500))
|
||||
(h, w) = image.shape[:2]
|
||||
labels = []
|
||||
for i in np.arange(0, detections.shape[2]):
|
||||
confidence = detections[0, 0, i, 2]
|
||||
|
||||
if confidence > conf_threshold:
|
||||
# extract the index of the class label from the `detections`,
|
||||
# then compute the (x, y)-coordinates of the bounding box for
|
||||
# the object
|
||||
idx = int(detections[0, 0, i, 1])
|
||||
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
|
||||
(startX, startY, endX, endY) = box.astype("int")
|
||||
|
||||
# display the prediction
|
||||
label = f"{CLASSES[idx]}: {round(confidence * 100, 2)}%"
|
||||
labels.append(label)
|
||||
cv2.rectangle(image, (startX, startY), (endX, endY), COLORS[idx], 2)
|
||||
y = startY - 15 if startY - 15 > 15 else startY + 15
|
||||
cv2.putText(
|
||||
image, label, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2
|
||||
)
|
||||
return image
|
||||
|
||||
|
||||
css=""".my-group {max-width: 600px !important; max-height: 600 !important;}
|
||||
.my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""
|
||||
|
||||
|
||||
with gr.Blocks(css=css) as demo:
|
||||
gr.HTML(
|
||||
"""
|
||||
<h1 style='text-align: center'>
|
||||
YOLOv10 Webcam Stream
|
||||
</h1>
|
||||
""")
|
||||
gr.HTML(
|
||||
"""
|
||||
<h3 style='text-align: center'>
|
||||
<a href='https://arxiv.org/abs/2405.14458' target='_blank'>arXiv</a> | <a href='https://github.com/THU-MIG/yolov10' target='_blank'>github</a>
|
||||
</h3>
|
||||
""")
|
||||
with gr.Column(elem_classes=["my-column"]):
|
||||
with gr.Group(elem_classes=["my-group"]):
|
||||
image = WebRTC(label="Strean")
|
||||
conf_threshold = gr.Slider(
|
||||
label="Confidence Threshold",
|
||||
minimum=0.0,
|
||||
maximum=1.0,
|
||||
step=0.05,
|
||||
value=0.30,
|
||||
)
|
||||
|
||||
image.webrtc_stream(
|
||||
fn=detection,
|
||||
inputs=[image],
|
||||
stream_every=0.05,
|
||||
time_limit=30
|
||||
)
|
||||
|
||||
if __name__ == '__main__':
|
||||
demo.launch()
|
||||
157
demo/css.css
Normal file
157
demo/css.css
Normal file
@@ -0,0 +1,157 @@
|
||||
html {
|
||||
font-family: Inter;
|
||||
font-size: 16px;
|
||||
font-weight: 400;
|
||||
line-height: 1.5;
|
||||
-webkit-text-size-adjust: 100%;
|
||||
background: #fff;
|
||||
color: #323232;
|
||||
-webkit-font-smoothing: antialiased;
|
||||
-moz-osx-font-smoothing: grayscale;
|
||||
text-rendering: optimizeLegibility;
|
||||
}
|
||||
|
||||
:root {
|
||||
--space: 1;
|
||||
--vspace: calc(var(--space) * 1rem);
|
||||
--vspace-0: calc(3 * var(--space) * 1rem);
|
||||
--vspace-1: calc(2 * var(--space) * 1rem);
|
||||
--vspace-2: calc(1.5 * var(--space) * 1rem);
|
||||
--vspace-3: calc(0.5 * var(--space) * 1rem);
|
||||
}
|
||||
|
||||
.app {
|
||||
max-width: 748px !important;
|
||||
}
|
||||
|
||||
.prose p {
|
||||
margin: var(--vspace) 0;
|
||||
line-height: var(--vspace * 2);
|
||||
font-size: 1rem;
|
||||
}
|
||||
|
||||
code {
|
||||
font-family: "Inconsolata", sans-serif;
|
||||
font-size: 16px;
|
||||
}
|
||||
|
||||
h1,
|
||||
h1 code {
|
||||
font-weight: 400;
|
||||
line-height: calc(2.5 / var(--space) * var(--vspace));
|
||||
}
|
||||
|
||||
h1 code {
|
||||
background: none;
|
||||
border: none;
|
||||
letter-spacing: 0.05em;
|
||||
padding-bottom: 5px;
|
||||
position: relative;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
h2 {
|
||||
margin: var(--vspace-1) 0 var(--vspace-2) 0;
|
||||
line-height: 1em;
|
||||
}
|
||||
|
||||
h3,
|
||||
h3 code {
|
||||
margin: var(--vspace-1) 0 var(--vspace-2) 0;
|
||||
line-height: 1em;
|
||||
}
|
||||
|
||||
h4,
|
||||
h5,
|
||||
h6 {
|
||||
margin: var(--vspace-3) 0 var(--vspace-3) 0;
|
||||
line-height: var(--vspace);
|
||||
}
|
||||
|
||||
.bigtitle,
|
||||
h1,
|
||||
h1 code {
|
||||
font-size: calc(8px * 4.5);
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.title,
|
||||
h2,
|
||||
h2 code {
|
||||
font-size: calc(8px * 3.375);
|
||||
font-weight: lighter;
|
||||
word-break: break-word;
|
||||
border: none;
|
||||
background: none;
|
||||
}
|
||||
|
||||
.subheading1,
|
||||
h3,
|
||||
h3 code {
|
||||
font-size: calc(8px * 1.8);
|
||||
font-weight: 600;
|
||||
border: none;
|
||||
background: none;
|
||||
letter-spacing: 0.1em;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
|
||||
h2 code {
|
||||
padding: 0;
|
||||
position: relative;
|
||||
letter-spacing: 0.05em;
|
||||
}
|
||||
|
||||
blockquote {
|
||||
font-size: calc(8px * 1.1667);
|
||||
font-style: italic;
|
||||
line-height: calc(1.1667 * var(--vspace));
|
||||
margin: var(--vspace-2) var(--vspace-2);
|
||||
}
|
||||
|
||||
.subheading2,
|
||||
h4 {
|
||||
font-size: calc(8px * 1.4292);
|
||||
text-transform: uppercase;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.subheading3,
|
||||
h5 {
|
||||
font-size: calc(8px * 1.2917);
|
||||
line-height: calc(1.2917 * var(--vspace));
|
||||
|
||||
font-weight: lighter;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.15em;
|
||||
}
|
||||
|
||||
h6 {
|
||||
font-size: calc(8px * 1.1667);
|
||||
font-size: 1.1667em;
|
||||
font-weight: normal;
|
||||
font-style: italic;
|
||||
font-family: "le-monde-livre-classic-byol", serif !important;
|
||||
letter-spacing: 0px !important;
|
||||
}
|
||||
|
||||
#start .md > *:first-child {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
h2 + h3 {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
.md hr {
|
||||
border: none;
|
||||
border-top: 1px solid var(--block-border-color);
|
||||
margin: var(--vspace-2) 0 var(--vspace-2) 0;
|
||||
}
|
||||
.prose ul {
|
||||
margin: var(--vspace-2) 0 var(--vspace-1) 0;
|
||||
}
|
||||
|
||||
.gap {
|
||||
gap: 0;
|
||||
}
|
||||
45
demo/draw_boxes.py
Normal file
45
demo/draw_boxes.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from PIL import ImageDraw, ImageFont # type: ignore
|
||||
import colorsys
|
||||
|
||||
|
||||
def get_color(label):
|
||||
# Simple hash function to generate consistent colors for each label
|
||||
hash_value = hash(label)
|
||||
hue = (hash_value % 100) / 100.0
|
||||
saturation = 0.7
|
||||
value = 0.9
|
||||
rgb = colorsys.hsv_to_rgb(hue, saturation, value)
|
||||
return tuple(int(x * 255) for x in rgb)
|
||||
|
||||
|
||||
def draw_bounding_boxes(image, results: dict, model, threshold=0.3):
|
||||
draw = ImageDraw.Draw(image)
|
||||
font = ImageFont.load_default()
|
||||
|
||||
for score, label_id, box in zip(
|
||||
results["scores"], results["labels"], results["boxes"]
|
||||
):
|
||||
if score > threshold:
|
||||
label = model.config.id2label[label_id.item()]
|
||||
box = [round(i, 2) for i in box.tolist()]
|
||||
color = get_color(label)
|
||||
|
||||
# Draw bounding box
|
||||
draw.rectangle(box, outline=color, width=3) # type: ignore
|
||||
|
||||
# Prepare text
|
||||
text = f"{label}: {score:.2f}"
|
||||
text_bbox = draw.textbbox((0, 0), text, font=font)
|
||||
text_width = text_bbox[2] - text_bbox[0]
|
||||
text_height = text_bbox[3] - text_bbox[1]
|
||||
|
||||
# Draw text background
|
||||
draw.rectangle(
|
||||
[box[0], box[1] - text_height - 4, box[0] + text_width, box[1]], # type: ignore
|
||||
fill=color, # type: ignore
|
||||
)
|
||||
|
||||
# Draw text
|
||||
draw.text((box[0], box[1] - text_height - 4), text, fill="white", font=font)
|
||||
|
||||
return image
|
||||
3
demo/requirements.txt
Normal file
3
demo/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
safetensors==0.4.3
|
||||
opencv-python
|
||||
https://gradio-builds.s3.amazonaws.com/5.0-dev/e85cc9248cc33e187528f24f3b4415ca7b9e7134/take2/gradio-5.0.0b1-py3-none-any.whl
|
||||
230
demo/space.py
Normal file
230
demo/space.py
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user