Working draft

2026-02-05 18:09:23 +08:00 · 2024-09-24 17:51:45 -07:00
parent e18430ac0d
commit 83be4aa3ea
30 changed files with 8628 additions and 0 deletions
--- a/demo/README.md
+++ b/demo/README.md
@@ -0,0 +1,44 @@
+---
+license: mit
+tags:
+- object-detection
+- computer-vision
+- yolov10
+datasets:
+- detection-datasets/coco
+sdk: gradio
+sdk_version: 5.0.0b1
+---
+
+### Model Description
+[YOLOv10: Real-Time End-to-End Object Detection](https://arxiv.org/abs/2405.14458v1)
+
+- arXiv: https://arxiv.org/abs/2405.14458v1
+- github: https://github.com/THU-MIG/yolov10
+
+### Installation
+```
+pip install supervision git+https://github.com/THU-MIG/yolov10.git
+```
+
+### Yolov10 Inference
+```python
+from ultralytics import YOLOv10
+import supervision as sv
+import cv2
+
+IMAGE_PATH = 'dog.jpeg'
+
+model = YOLOv10.from_pretrained('jameslahm/yolov10{n/s/m/b/l/x}')
+model.predict(IMAGE_PATH, show=True)
+```
+
+### BibTeX Entry and Citation Info
+ ```
+@article{wang2024yolov10,
+  title={YOLOv10: Real-Time End-to-End Object Detection},
+  author={Wang, Ao and Chen, Hui and Liu, Lihao and Chen, Kai and Lin, Zijia and Han, Jungong and Ding, Guiguang},
+  journal={arXiv preprint arXiv:2405.14458},
+  year={2024}
+}
+```
--- a/demo/init.py
+++ b/demo/init.py
--- a/demo/app.py
+++ b/demo/app.py
@@ -0,0 +1,108 @@
+import gradio as gr
+import cv2
+import numpy as np
+from gradio_webrtc import WebRTC
+from pathlib import Path
+
+CLASSES = [
+    "background",
+    "aeroplane",
+    "bicycle",
+    "bird",
+    "boat",
+    "bottle",
+    "bus",
+    "car",
+    "cat",
+    "chair",
+    "cow",
+    "diningtable",
+    "dog",
+    "horse",
+    "motorbike",
+    "person",
+    "pottedplant",
+    "sheep",
+    "sofa",
+    "train",
+    "tvmonitor",
+]
+COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
+
+directory = Path(__file__).parent
+
+MODEL = str((directory / "MobileNetSSD_deploy.caffemodel").resolve())
+PROTOTXT = str((directory / "MobileNetSSD_deploy.prototxt.txt").resolve())
+net = cv2.dnn.readNetFromCaffe(PROTOTXT, MODEL)
+
+
+def detection(image, conf_threshold=0.3):
+
+    blob = cv2.dnn.blobFromImage(
+        cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5
+    )
+    net.setInput(blob)
+
+    detections = net.forward()
+    image = cv2.resize(image, (500, 500))
+    (h, w) = image.shape[:2]
+    labels = []
+    for i in np.arange(0, detections.shape[2]):
+        confidence = detections[0, 0, i, 2]
+
+        if confidence > conf_threshold:
+            # extract the index of the class label from the `detections`,
+            # then compute the (x, y)-coordinates of the bounding box for
+            # the object
+            idx = int(detections[0, 0, i, 1])
+            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
+            (startX, startY, endX, endY) = box.astype("int")
+
+            # display the prediction
+            label = f"{CLASSES[idx]}: {round(confidence * 100, 2)}%"
+            labels.append(label)
+            cv2.rectangle(image, (startX, startY), (endX, endY), COLORS[idx], 2)
+            y = startY - 15 if startY - 15 > 15 else startY + 15
+            cv2.putText(
+                image, label, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2
+            )
+    return image
+
+
+css=""".my-group {max-width: 600px !important; max-height: 600 !important;}
+                      .my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""
+
+
+with gr.Blocks(css=css) as demo:
+    gr.HTML(
+        """
+    <h1 style='text-align: center'>
+    YOLOv10 Webcam Stream
+    </h1>
+    """)
+    gr.HTML(
+        """
+        <h3 style='text-align: center'>
+        <a href='https://arxiv.org/abs/2405.14458' target='_blank'>arXiv</a> | <a href='https://github.com/THU-MIG/yolov10' target='_blank'>github</a>
+        </h3>
+        """)
+    with gr.Column(elem_classes=["my-column"]):
+        with gr.Group(elem_classes=["my-group"]):
+            image = WebRTC(label="Strean")
+            conf_threshold = gr.Slider(
+                label="Confidence Threshold",
+                minimum=0.0,
+                maximum=1.0,
+                step=0.05,
+                value=0.30,
+            )
+        
+        image.webrtc_stream(
+            fn=detection,
+            inputs=[image],
+            stream_every=0.05,
+            time_limit=30
+        )
+
+if __name__ == '__main__':
+    demo.launch()
--- a/demo/css.css
+++ b/demo/css.css
@@ -0,0 +1,157 @@
+html {
+	font-family: Inter;
+	font-size: 16px;
+	font-weight: 400;
+	line-height: 1.5;
+	-webkit-text-size-adjust: 100%;
+	background: #fff;
+	color: #323232;
+	-webkit-font-smoothing: antialiased;
+	-moz-osx-font-smoothing: grayscale;
+	text-rendering: optimizeLegibility;
+}
+
+:root {
+	--space: 1;
+	--vspace: calc(var(--space) * 1rem);
+	--vspace-0: calc(3 * var(--space) * 1rem);
+	--vspace-1: calc(2 * var(--space) * 1rem);
+	--vspace-2: calc(1.5 * var(--space) * 1rem);
+	--vspace-3: calc(0.5 * var(--space) * 1rem);
+}
+
+.app {
+	max-width: 748px !important;
+}
+
+.prose p {
+	margin: var(--vspace) 0;
+	line-height: var(--vspace * 2);
+	font-size: 1rem;
+}
+
+code {
+	font-family: "Inconsolata", sans-serif;
+	font-size: 16px;
+}
+
+h1,
+h1 code {
+	font-weight: 400;
+	line-height: calc(2.5 / var(--space) * var(--vspace));
+}
+
+h1 code {
+	background: none;
+	border: none;
+	letter-spacing: 0.05em;
+	padding-bottom: 5px;
+	position: relative;
+	padding: 0;
+}
+
+h2 {
+	margin: var(--vspace-1) 0 var(--vspace-2) 0;
+	line-height: 1em;
+}
+
+h3,
+h3 code {
+	margin: var(--vspace-1) 0 var(--vspace-2) 0;
+	line-height: 1em;
+}
+
+h4,
+h5,
+h6 {
+	margin: var(--vspace-3) 0 var(--vspace-3) 0;
+	line-height: var(--vspace);
+}
+
+.bigtitle,
+h1,
+h1 code {
+	font-size: calc(8px * 4.5);
+	word-break: break-word;
+}
+
+.title,
+h2,
+h2 code {
+	font-size: calc(8px * 3.375);
+	font-weight: lighter;
+	word-break: break-word;
+	border: none;
+	background: none;
+}
+
+.subheading1,
+h3,
+h3 code {
+	font-size: calc(8px * 1.8);
+	font-weight: 600;
+	border: none;
+	background: none;
+	letter-spacing: 0.1em;
+	text-transform: uppercase;
+}
+
+h2 code {
+	padding: 0;
+	position: relative;
+	letter-spacing: 0.05em;
+}
+
+blockquote {
+	font-size: calc(8px * 1.1667);
+	font-style: italic;
+	line-height: calc(1.1667 * var(--vspace));
+	margin: var(--vspace-2) var(--vspace-2);
+}
+
+.subheading2,
+h4 {
+	font-size: calc(8px * 1.4292);
+	text-transform: uppercase;
+	font-weight: 600;
+}
+
+.subheading3,
+h5 {
+	font-size: calc(8px * 1.2917);
+	line-height: calc(1.2917 * var(--vspace));
+
+	font-weight: lighter;
+	text-transform: uppercase;
+	letter-spacing: 0.15em;
+}
+
+h6 {
+	font-size: calc(8px * 1.1667);
+	font-size: 1.1667em;
+	font-weight: normal;
+	font-style: italic;
+	font-family: "le-monde-livre-classic-byol", serif !important;
+	letter-spacing: 0px !important;
+}
+
+#start .md > *:first-child {
+	margin-top: 0;
+}
+
+h2 + h3 {
+	margin-top: 0;
+}
+
+.md hr {
+	border: none;
+	border-top: 1px solid var(--block-border-color);
+	margin: var(--vspace-2) 0 var(--vspace-2) 0;
+}
+.prose ul {
+	margin: var(--vspace-2) 0 var(--vspace-1) 0;
+}
+
+.gap {
+	gap: 0;
+}
--- a/demo/draw_boxes.py
+++ b/demo/draw_boxes.py
@@ -0,0 +1,45 @@
+from PIL import ImageDraw, ImageFont  # type: ignore
+import colorsys
+
+
+def get_color(label):
+    # Simple hash function to generate consistent colors for each label
+    hash_value = hash(label)
+    hue = (hash_value % 100) / 100.0
+    saturation = 0.7
+    value = 0.9
+    rgb = colorsys.hsv_to_rgb(hue, saturation, value)
+    return tuple(int(x * 255) for x in rgb)
+
+
+def draw_bounding_boxes(image, results: dict, model, threshold=0.3):
+    draw = ImageDraw.Draw(image)
+    font = ImageFont.load_default()
+
+    for score, label_id, box in zip(
+        results["scores"], results["labels"], results["boxes"]
+    ):
+        if score > threshold:
+            label = model.config.id2label[label_id.item()]
+            box = [round(i, 2) for i in box.tolist()]
+            color = get_color(label)
+
+            # Draw bounding box
+            draw.rectangle(box, outline=color, width=3) # type: ignore
+
+            # Prepare text
+            text = f"{label}: {score:.2f}"
+            text_bbox = draw.textbbox((0, 0), text, font=font)
+            text_width = text_bbox[2] - text_bbox[0]
+            text_height = text_bbox[3] - text_bbox[1]
+
+            # Draw text background
+            draw.rectangle(
+                [box[0], box[1] - text_height - 4, box[0] + text_width, box[1]], # type: ignore
+                fill=color, # type: ignore
+            )
+
+            # Draw text
+            draw.text((box[0], box[1] - text_height - 4), text, fill="white", font=font)
+
+    return image
--- a/demo/requirements.txt
+++ b/demo/requirements.txt
@@ -0,0 +1,3 @@
+safetensors==0.4.3
+opencv-python
+https://gradio-builds.s3.amazonaws.com/5.0-dev/e85cc9248cc33e187528f24f3b4415ca7b9e7134/take2/gradio-5.0.0b1-py3-none-any.whl
--- a/demo/space.py
+++ b/demo/space.py