formatting

2026-02-05 18:09:23 +08:00 · 2024-09-26 12:42:58 -04:00
parent 9e0d3f5bbf
commit 4d184eabe7
3 changed files with 98 additions and 383 deletions
--- a/README.md
+++ b/README.md
@@ -25,89 +25,41 @@ pip install gradio_webrtc
 ```python
 import gradio as gr
 import cv2
-import numpy as np
+from huggingface_hub import hf_hub_download
 from gradio_webrtc import WebRTC
 from pathlib import Path
 from twilio.rest import Client
 import os
 from inference import YOLOv10
 model_file = hf_hub_download(
    repo_id="onnx-community/yolov10n", filename="onnx/model.onnx"
 )
 model = YOLOv10(model_file)
 account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
 auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
 client = Client(account_sid, auth_token)
-token = client.tokens.create()
+if account_sid and auth_token:
    client = Client(account_sid, auth_token)
-rtc_configuration = {
+    token = client.tokens.create()
    "iceServers": token.ice_servers,
    "iceTransportPolicy": "relay",
 }
-CLASSES = [
+    rtc_configuration = {
-    "background",
+        "iceServers": token.ice_servers,
-    "aeroplane",
+        "iceTransportPolicy": "relay",
-    "bicycle",
+    }
-    "bird",
+else:
-    "boat",
+    rtc_configuration = None
    "bottle",
    "bus",
    "car",
    "cat",
    "chair",
    "cow",
    "diningtable",
    "dog",
    "horse",
    "motorbike",
    "person",
    "pottedplant",
    "sheep",
    "sofa",
    "train",
    "tvmonitor",
 ]
 COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
 directory = Path(__file__).parent
 MODEL = str((directory / "MobileNetSSD_deploy.caffemodel").resolve())
 PROTOTXT = str((directory / "MobileNetSSD_deploy.prototxt.txt").resolve())
 net = cv2.dnn.readNetFromCaffe(PROTOTXT, MODEL)
 def detection(image, conf_threshold=0.3):
-
+    image = cv2.resize(image, (model.input_width, model.input_height))
-    blob = cv2.dnn.blobFromImage(
+    new_image = model.detect_objects(image, conf_threshold)
-        cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5
+    return cv2.resize(new_image, (500, 500))
    )
    net.setInput(blob)
    detections = net.forward()
    image = cv2.resize(image, (500, 500))
    (h, w) = image.shape[:2]
    labels = []
    for i in np.arange(0, detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > conf_threshold:
            # extract the index of the class label from the `detections`,
            # then compute the (x, y)-coordinates of the bounding box for
            # the object
            idx = int(detections[0, 0, i, 1])
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")
            # display the prediction
            label = f"{CLASSES[idx]}: {round(confidence * 100, 2)}%"
            labels.append(label)
            cv2.rectangle(image, (startX, startY), (endX, endY), COLORS[idx], 2)
            y = startY - 15 if startY - 15 > 15 else startY + 15
            cv2.putText(
                image, label, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2
            )
    return image
-css=""".my-group {max-width: 600px !important; max-height: 600 !important;}
+css = """.my-group {max-width: 600px !important; max-height: 600 !important;}
                      .my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""
@@ -115,18 +67,20 @@ with gr.Blocks(css=css) as demo:
    gr.HTML(
        """
    <h1 style='text-align: center'>
-    YOLOv10 Webcam Stream
+    YOLOv10 Webcam Stream (Powered by WebRTC ⚡️)
    </h1>
-    """)
+    """
    )
    gr.HTML(
        """
        <h3 style='text-align: center'>
        <a href='https://arxiv.org/abs/2405.14458' target='_blank'>arXiv</a> | <a href='https://github.com/THU-MIG/yolov10' target='_blank'>github</a>
        </h3>
-        """)
+        """
    )
    with gr.Column(elem_classes=["my-column"]):
        with gr.Group(elem_classes=["my-group"]):
-            image = WebRTC(label="Strean", rtc_configuration=rtc_configuration)
+            image = WebRTC(label="Stream", rtc_configuration=rtc_configuration)
            conf_threshold = gr.Slider(
                label="Confidence Threshold",
                minimum=0.0,
@@ -134,15 +88,12 @@ with gr.Blocks(css=css) as demo:
                step=0.05,
                value=0.30,
            )
-        
+
-        image.webrtc_stream(
+        image.stream(
-            fn=detection,
+            fn=detection, inputs=[image, conf_threshold], outputs=[image], time_limit=10
            inputs=[image],
            stream_every=0.05,
            time_limit=30
        )
-if __name__ == '__main__':
+if __name__ == "__main__":
    demo.launch()
 ```
@@ -166,11 +117,7 @@ if __name__ == '__main__':
 <td align="left" style="width: 25%;">
 ```python
-str
+None
    | Path
    | tuple[str | Path, str | Path | None]
    | Callable
    | None
 ```
 </td>
@@ -386,58 +333,6 @@ bool
 <td align="left">if True webcam will be mirrored. Default is True.</td>
 </tr>
 <tr>
 <td align="left"><code>show_share_button</code></td>
 <td align="left" style="width: 25%;">
 ```python
 bool | None
 ```
 </td>
 <td align="left"><code>None</code></td>
 <td align="left">if True, will show a share icon in the corner of the component that allows user to share outputs to Hugging Face Spaces Discussions. If False, icon does not appear. If set to None (default behavior), then the icon appears if this Gradio app is launched on Spaces, but not otherwise.</td>
 </tr>
 <tr>
 <td align="left"><code>show_download_button</code></td>
 <td align="left" style="width: 25%;">
 ```python
 bool | None
 ```
 </td>
 <td align="left"><code>None</code></td>
 <td align="left">if True, will show a download icon in the corner of the component that allows user to download the output. If False, icon does not appear. By default, it will be True for output components and False for input components.</td>
 </tr>
 <tr>
 <td align="left"><code>min_length</code></td>
 <td align="left" style="width: 25%;">
 ```python
 int | None
 ```
 </td>
 <td align="left"><code>None</code></td>
 <td align="left">the minimum length of video (in seconds) that the user can pass into the prediction function. If None, there is no minimum length.</td>
 </tr>
 <tr>
 <td align="left"><code>max_length</code></td>
 <td align="left" style="width: 25%;">
 ```python
 int | None
 ```
 </td>
 <td align="left"><code>None</code></td>
 <td align="left">the maximum length of video (in seconds) that the user can pass into the prediction function. If None, there is no maximum length.</td>
 </tr>
 <tr>
 <td align="left"><code>rtc_configuration</code></td>
 <td align="left" style="width: 25%;">
@@ -446,6 +341,19 @@ int | None
 dict[str, Any] | None
 ```
 </td>
 <td align="left"><code>None</code></td>
 <td align="left">None</td>
 </tr>
 <tr>
 <td align="left"><code>time_limit</code></td>
 <td align="left" style="width: 25%;">
 ```python
 float | None
 ```
 </td>
 <td align="left"><code>None</code></td>
 <td align="left">None</td>
--- a/demo/app.py
+++ b/demo/app.py
@@ -28,9 +28,6 @@ else:
    rtc_configuration = None
 rtc_configuration = None
 def detection(image, conf_threshold=0.3):
    image = cv2.resize(image, (model.input_width, model.input_height))
    new_image = model.detect_objects(image, conf_threshold)
--- a/demo/space.py
+++ b/demo/space.py