formatting

2026-02-05 01:49:23 +08:00 · 2024-09-26 12:42:58 -04:00
parent 9e0d3f5bbf
commit 4d184eabe7
3 changed files with 98 additions and 383 deletions
--- a/README.md
+++ b/README.md
@@ -25,89 +25,41 @@ pip install gradio_webrtc
 ```python
 import gradio as gr
 import cv2
-import numpy as np
+from huggingface_hub import hf_hub_download
 from gradio_webrtc import WebRTC
-from pathlib import Path
 from twilio.rest import Client
 import os
+from inference import YOLOv10
+
+model_file = hf_hub_download(
+    repo_id="onnx-community/yolov10n", filename="onnx/model.onnx"
+)
+
+model = YOLOv10(model_file)

 account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
 auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
-client = Client(account_sid, auth_token)

-token = client.tokens.create()
+if account_sid and auth_token:
+    client = Client(account_sid, auth_token)

-rtc_configuration = {
-    "iceServers": token.ice_servers,
-    "iceTransportPolicy": "relay",
-}
+    token = client.tokens.create()

-CLASSES = [
-    "background",
-    "aeroplane",
-    "bicycle",
-    "bird",
-    "boat",
-    "bottle",
-    "bus",
-    "car",
-    "cat",
-    "chair",
-    "cow",
-    "diningtable",
-    "dog",
-    "horse",
-    "motorbike",
-    "person",
-    "pottedplant",
-    "sheep",
-    "sofa",
-    "train",
-    "tvmonitor",
-]
-COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
-
-directory = Path(__file__).parent
-
-MODEL = str((directory / "MobileNetSSD_deploy.caffemodel").resolve())
-PROTOTXT = str((directory / "MobileNetSSD_deploy.prototxt.txt").resolve())
-net = cv2.dnn.readNetFromCaffe(PROTOTXT, MODEL)
+    rtc_configuration = {
+        "iceServers": token.ice_servers,
+        "iceTransportPolicy": "relay",
+    }
+else:
+    rtc_configuration = None


 def detection(image, conf_threshold=0.3):
-
-    blob = cv2.dnn.blobFromImage(
-        cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5
-    )
-    net.setInput(blob)
-
-    detections = net.forward()
-    image = cv2.resize(image, (500, 500))
-    (h, w) = image.shape[:2]
-    labels = []
-    for i in np.arange(0, detections.shape[2]):
-        confidence = detections[0, 0, i, 2]
-
-        if confidence > conf_threshold:
-            # extract the index of the class label from the `detections`,
-            # then compute the (x, y)-coordinates of the bounding box for
-            # the object
-            idx = int(detections[0, 0, i, 1])
-            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
-            (startX, startY, endX, endY) = box.astype("int")
-
-            # display the prediction
-            label = f"{CLASSES[idx]}: {round(confidence * 100, 2)}%"
-            labels.append(label)
-            cv2.rectangle(image, (startX, startY), (endX, endY), COLORS[idx], 2)
-            y = startY - 15 if startY - 15 > 15 else startY + 15
-            cv2.putText(
-                image, label, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2
-            )
-    return image
+    image = cv2.resize(image, (model.input_width, model.input_height))
+    new_image = model.detect_objects(image, conf_threshold)
+    return cv2.resize(new_image, (500, 500))


-css=""".my-group {max-width: 600px !important; max-height: 600 !important;}
+css = """.my-group {max-width: 600px !important; max-height: 600 !important;}
                      .my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""


@@ -115,18 +67,20 @@ with gr.Blocks(css=css) as demo:
    gr.HTML(
        """
    <h1 style='text-align: center'>
-    YOLOv10 Webcam Stream
+    YOLOv10 Webcam Stream (Powered by WebRTC ⚡️)
    </h1>
-    """)
+    """
+    )
    gr.HTML(
        """
        <h3 style='text-align: center'>
        <a href='https://arxiv.org/abs/2405.14458' target='_blank'>arXiv</a> | <a href='https://github.com/THU-MIG/yolov10' target='_blank'>github</a>
        </h3>
-        """)
+        """
+    )
    with gr.Column(elem_classes=["my-column"]):
        with gr.Group(elem_classes=["my-group"]):
-            image = WebRTC(label="Strean", rtc_configuration=rtc_configuration)
+            image = WebRTC(label="Stream", rtc_configuration=rtc_configuration)
            conf_threshold = gr.Slider(
                label="Confidence Threshold",
                minimum=0.0,
@@ -134,15 +88,12 @@ with gr.Blocks(css=css) as demo:
                step=0.05,
                value=0.30,
            )
-        
-        image.webrtc_stream(
-            fn=detection,
-            inputs=[image],
-            stream_every=0.05,
-            time_limit=30
+
+        image.stream(
+            fn=detection, inputs=[image, conf_threshold], outputs=[image], time_limit=10
        )

-if __name__ == '__main__':
+if __name__ == "__main__":
    demo.launch()

 ```
@@ -166,11 +117,7 @@ if __name__ == '__main__':
 <td align="left" style="width: 25%;">

 ```python
-str
-    | Path
-    | tuple[str | Path, str | Path | None]
-    | Callable
-    | None
+None
 ```

 </td>
@@ -386,58 +333,6 @@ bool
 <td align="left">if True webcam will be mirrored. Default is True.</td>
 </tr>

-<tr>
-<td align="left"><code>show_share_button</code></td>
-<td align="left" style="width: 25%;">
-
-```python
-bool | None
-```
-
-</td>
-<td align="left"><code>None</code></td>
-<td align="left">if True, will show a share icon in the corner of the component that allows user to share outputs to Hugging Face Spaces Discussions. If False, icon does not appear. If set to None (default behavior), then the icon appears if this Gradio app is launched on Spaces, but not otherwise.</td>
-</tr>
-
-<tr>
-<td align="left"><code>show_download_button</code></td>
-<td align="left" style="width: 25%;">
-
-```python
-bool | None
-```
-
-</td>
-<td align="left"><code>None</code></td>
-<td align="left">if True, will show a download icon in the corner of the component that allows user to download the output. If False, icon does not appear. By default, it will be True for output components and False for input components.</td>
-</tr>
-
-<tr>
-<td align="left"><code>min_length</code></td>
-<td align="left" style="width: 25%;">
-
-```python
-int | None
-```
-
-</td>
-<td align="left"><code>None</code></td>
-<td align="left">the minimum length of video (in seconds) that the user can pass into the prediction function. If None, there is no minimum length.</td>
-</tr>
-
-<tr>
-<td align="left"><code>max_length</code></td>
-<td align="left" style="width: 25%;">
-
-```python
-int | None
-```
-
-</td>
-<td align="left"><code>None</code></td>
-<td align="left">the maximum length of video (in seconds) that the user can pass into the prediction function. If None, there is no maximum length.</td>
-</tr>
-
 <tr>
 <td align="left"><code>rtc_configuration</code></td>
 <td align="left" style="width: 25%;">
@@ -446,6 +341,19 @@ int | None
 dict[str, Any] | None
 ```

+</td>
+<td align="left"><code>None</code></td>
+<td align="left">None</td>
+</tr>
+
+<tr>
+<td align="left"><code>time_limit</code></td>
+<td align="left" style="width: 25%;">
+
+```python
+float | None
+```
+
 </td>
 <td align="left"><code>None</code></td>
 <td align="left">None</td>
--- a/demo/app.py
+++ b/demo/app.py
@@ -28,9 +28,6 @@ else:
    rtc_configuration = None


-rtc_configuration = None
-
-
 def detection(image, conf_threshold=0.3):
    image = cv2.resize(image, (model.input_width, model.input_height))
    new_image = model.detect_objects(image, conf_threshold)
--- a/demo/space.py
+++ b/demo/space.py