Working draft

This commit is contained in:
freddyaboulton
2024-09-24 17:51:45 -07:00
parent e18430ac0d
commit 83be4aa3ea
30 changed files with 8628 additions and 0 deletions

44
demo/README.md Normal file
View File

@@ -0,0 +1,44 @@
---
license: mit
tags:
- object-detection
- computer-vision
- yolov10
datasets:
- detection-datasets/coco
sdk: gradio
sdk_version: 5.0.0b1
---
### Model Description
[YOLOv10: Real-Time End-to-End Object Detection](https://arxiv.org/abs/2405.14458v1)
- arXiv: https://arxiv.org/abs/2405.14458v1
- github: https://github.com/THU-MIG/yolov10
### Installation
```
pip install supervision git+https://github.com/THU-MIG/yolov10.git
```
### Yolov10 Inference
```python
from ultralytics import YOLOv10
import supervision as sv
import cv2
IMAGE_PATH = 'dog.jpeg'
model = YOLOv10.from_pretrained('jameslahm/yolov10{n/s/m/b/l/x}')
model.predict(IMAGE_PATH, show=True)
```
### BibTeX Entry and Citation Info
```
@article{wang2024yolov10,
title={YOLOv10: Real-Time End-to-End Object Detection},
author={Wang, Ao and Chen, Hui and Liu, Lihao and Chen, Kai and Lin, Zijia and Han, Jungong and Ding, Guiguang},
journal={arXiv preprint arXiv:2405.14458},
year={2024}
}
```

0
demo/__init__.py Normal file
View File

108
demo/app.py Normal file
View File

@@ -0,0 +1,108 @@
import gradio as gr
import cv2
import numpy as np
from gradio_webrtc import WebRTC
from pathlib import Path
CLASSES = [
"background",
"aeroplane",
"bicycle",
"bird",
"boat",
"bottle",
"bus",
"car",
"cat",
"chair",
"cow",
"diningtable",
"dog",
"horse",
"motorbike",
"person",
"pottedplant",
"sheep",
"sofa",
"train",
"tvmonitor",
]
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
directory = Path(__file__).parent
MODEL = str((directory / "MobileNetSSD_deploy.caffemodel").resolve())
PROTOTXT = str((directory / "MobileNetSSD_deploy.prototxt.txt").resolve())
net = cv2.dnn.readNetFromCaffe(PROTOTXT, MODEL)
def detection(image, conf_threshold=0.3):
blob = cv2.dnn.blobFromImage(
cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5
)
net.setInput(blob)
detections = net.forward()
image = cv2.resize(image, (500, 500))
(h, w) = image.shape[:2]
labels = []
for i in np.arange(0, detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > conf_threshold:
# extract the index of the class label from the `detections`,
# then compute the (x, y)-coordinates of the bounding box for
# the object
idx = int(detections[0, 0, i, 1])
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# display the prediction
label = f"{CLASSES[idx]}: {round(confidence * 100, 2)}%"
labels.append(label)
cv2.rectangle(image, (startX, startY), (endX, endY), COLORS[idx], 2)
y = startY - 15 if startY - 15 > 15 else startY + 15
cv2.putText(
image, label, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2
)
return image
css=""".my-group {max-width: 600px !important; max-height: 600 !important;}
.my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""
with gr.Blocks(css=css) as demo:
gr.HTML(
"""
<h1 style='text-align: center'>
YOLOv10 Webcam Stream
</h1>
""")
gr.HTML(
"""
<h3 style='text-align: center'>
<a href='https://arxiv.org/abs/2405.14458' target='_blank'>arXiv</a> | <a href='https://github.com/THU-MIG/yolov10' target='_blank'>github</a>
</h3>
""")
with gr.Column(elem_classes=["my-column"]):
with gr.Group(elem_classes=["my-group"]):
image = WebRTC(label="Strean")
conf_threshold = gr.Slider(
label="Confidence Threshold",
minimum=0.0,
maximum=1.0,
step=0.05,
value=0.30,
)
image.webrtc_stream(
fn=detection,
inputs=[image],
stream_every=0.05,
time_limit=30
)
if __name__ == '__main__':
demo.launch()

157
demo/css.css Normal file
View File

@@ -0,0 +1,157 @@
html {
font-family: Inter;
font-size: 16px;
font-weight: 400;
line-height: 1.5;
-webkit-text-size-adjust: 100%;
background: #fff;
color: #323232;
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
text-rendering: optimizeLegibility;
}
:root {
--space: 1;
--vspace: calc(var(--space) * 1rem);
--vspace-0: calc(3 * var(--space) * 1rem);
--vspace-1: calc(2 * var(--space) * 1rem);
--vspace-2: calc(1.5 * var(--space) * 1rem);
--vspace-3: calc(0.5 * var(--space) * 1rem);
}
.app {
max-width: 748px !important;
}
.prose p {
margin: var(--vspace) 0;
line-height: var(--vspace * 2);
font-size: 1rem;
}
code {
font-family: "Inconsolata", sans-serif;
font-size: 16px;
}
h1,
h1 code {
font-weight: 400;
line-height: calc(2.5 / var(--space) * var(--vspace));
}
h1 code {
background: none;
border: none;
letter-spacing: 0.05em;
padding-bottom: 5px;
position: relative;
padding: 0;
}
h2 {
margin: var(--vspace-1) 0 var(--vspace-2) 0;
line-height: 1em;
}
h3,
h3 code {
margin: var(--vspace-1) 0 var(--vspace-2) 0;
line-height: 1em;
}
h4,
h5,
h6 {
margin: var(--vspace-3) 0 var(--vspace-3) 0;
line-height: var(--vspace);
}
.bigtitle,
h1,
h1 code {
font-size: calc(8px * 4.5);
word-break: break-word;
}
.title,
h2,
h2 code {
font-size: calc(8px * 3.375);
font-weight: lighter;
word-break: break-word;
border: none;
background: none;
}
.subheading1,
h3,
h3 code {
font-size: calc(8px * 1.8);
font-weight: 600;
border: none;
background: none;
letter-spacing: 0.1em;
text-transform: uppercase;
}
h2 code {
padding: 0;
position: relative;
letter-spacing: 0.05em;
}
blockquote {
font-size: calc(8px * 1.1667);
font-style: italic;
line-height: calc(1.1667 * var(--vspace));
margin: var(--vspace-2) var(--vspace-2);
}
.subheading2,
h4 {
font-size: calc(8px * 1.4292);
text-transform: uppercase;
font-weight: 600;
}
.subheading3,
h5 {
font-size: calc(8px * 1.2917);
line-height: calc(1.2917 * var(--vspace));
font-weight: lighter;
text-transform: uppercase;
letter-spacing: 0.15em;
}
h6 {
font-size: calc(8px * 1.1667);
font-size: 1.1667em;
font-weight: normal;
font-style: italic;
font-family: "le-monde-livre-classic-byol", serif !important;
letter-spacing: 0px !important;
}
#start .md > *:first-child {
margin-top: 0;
}
h2 + h3 {
margin-top: 0;
}
.md hr {
border: none;
border-top: 1px solid var(--block-border-color);
margin: var(--vspace-2) 0 var(--vspace-2) 0;
}
.prose ul {
margin: var(--vspace-2) 0 var(--vspace-1) 0;
}
.gap {
gap: 0;
}

45
demo/draw_boxes.py Normal file
View File

@@ -0,0 +1,45 @@
from PIL import ImageDraw, ImageFont # type: ignore
import colorsys
def get_color(label):
# Simple hash function to generate consistent colors for each label
hash_value = hash(label)
hue = (hash_value % 100) / 100.0
saturation = 0.7
value = 0.9
rgb = colorsys.hsv_to_rgb(hue, saturation, value)
return tuple(int(x * 255) for x in rgb)
def draw_bounding_boxes(image, results: dict, model, threshold=0.3):
draw = ImageDraw.Draw(image)
font = ImageFont.load_default()
for score, label_id, box in zip(
results["scores"], results["labels"], results["boxes"]
):
if score > threshold:
label = model.config.id2label[label_id.item()]
box = [round(i, 2) for i in box.tolist()]
color = get_color(label)
# Draw bounding box
draw.rectangle(box, outline=color, width=3) # type: ignore
# Prepare text
text = f"{label}: {score:.2f}"
text_bbox = draw.textbbox((0, 0), text, font=font)
text_width = text_bbox[2] - text_bbox[0]
text_height = text_bbox[3] - text_bbox[1]
# Draw text background
draw.rectangle(
[box[0], box[1] - text_height - 4, box[0] + text_width, box[1]], # type: ignore
fill=color, # type: ignore
)
# Draw text
draw.text((box[0], box[1] - text_height - 4), text, fill="white", font=font)
return image

3
demo/requirements.txt Normal file
View File

@@ -0,0 +1,3 @@
safetensors==0.4.3
opencv-python
https://gradio-builds.s3.amazonaws.com/5.0-dev/e85cc9248cc33e187528f24f3b4415ca7b9e7134/take2/gradio-5.0.0b1-py3-none-any.whl

230
demo/space.py Normal file

File diff suppressed because one or more lines are too long