mirror of
https://github.com/HumanAIGC-Engineering/gradio-webrtc.git
synced 2026-02-05 01:49:23 +08:00
238 lines
5.5 KiB
Python
238 lines
5.5 KiB
Python
import numpy as np
|
|
import cv2
|
|
|
|
class_names = [
|
|
"person",
|
|
"bicycle",
|
|
"car",
|
|
"motorcycle",
|
|
"airplane",
|
|
"bus",
|
|
"train",
|
|
"truck",
|
|
"boat",
|
|
"traffic light",
|
|
"fire hydrant",
|
|
"stop sign",
|
|
"parking meter",
|
|
"bench",
|
|
"bird",
|
|
"cat",
|
|
"dog",
|
|
"horse",
|
|
"sheep",
|
|
"cow",
|
|
"elephant",
|
|
"bear",
|
|
"zebra",
|
|
"giraffe",
|
|
"backpack",
|
|
"umbrella",
|
|
"handbag",
|
|
"tie",
|
|
"suitcase",
|
|
"frisbee",
|
|
"skis",
|
|
"snowboard",
|
|
"sports ball",
|
|
"kite",
|
|
"baseball bat",
|
|
"baseball glove",
|
|
"skateboard",
|
|
"surfboard",
|
|
"tennis racket",
|
|
"bottle",
|
|
"wine glass",
|
|
"cup",
|
|
"fork",
|
|
"knife",
|
|
"spoon",
|
|
"bowl",
|
|
"banana",
|
|
"apple",
|
|
"sandwich",
|
|
"orange",
|
|
"broccoli",
|
|
"carrot",
|
|
"hot dog",
|
|
"pizza",
|
|
"donut",
|
|
"cake",
|
|
"chair",
|
|
"couch",
|
|
"potted plant",
|
|
"bed",
|
|
"dining table",
|
|
"toilet",
|
|
"tv",
|
|
"laptop",
|
|
"mouse",
|
|
"remote",
|
|
"keyboard",
|
|
"cell phone",
|
|
"microwave",
|
|
"oven",
|
|
"toaster",
|
|
"sink",
|
|
"refrigerator",
|
|
"book",
|
|
"clock",
|
|
"vase",
|
|
"scissors",
|
|
"teddy bear",
|
|
"hair drier",
|
|
"toothbrush",
|
|
]
|
|
|
|
# Create a list of colors for each class where each color is a tuple of 3 integer values
|
|
rng = np.random.default_rng(3)
|
|
colors = rng.uniform(0, 255, size=(len(class_names), 3))
|
|
|
|
|
|
def nms(boxes, scores, iou_threshold):
|
|
# Sort by score
|
|
sorted_indices = np.argsort(scores)[::-1]
|
|
|
|
keep_boxes = []
|
|
while sorted_indices.size > 0:
|
|
# Pick the last box
|
|
box_id = sorted_indices[0]
|
|
keep_boxes.append(box_id)
|
|
|
|
# Compute IoU of the picked box with the rest
|
|
ious = compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])
|
|
|
|
# Remove boxes with IoU over the threshold
|
|
keep_indices = np.where(ious < iou_threshold)[0]
|
|
|
|
# print(keep_indices.shape, sorted_indices.shape)
|
|
sorted_indices = sorted_indices[keep_indices + 1]
|
|
|
|
return keep_boxes
|
|
|
|
|
|
def multiclass_nms(boxes, scores, class_ids, iou_threshold):
|
|
unique_class_ids = np.unique(class_ids)
|
|
|
|
keep_boxes = []
|
|
for class_id in unique_class_ids:
|
|
class_indices = np.where(class_ids == class_id)[0]
|
|
class_boxes = boxes[class_indices, :]
|
|
class_scores = scores[class_indices]
|
|
|
|
class_keep_boxes = nms(class_boxes, class_scores, iou_threshold)
|
|
keep_boxes.extend(class_indices[class_keep_boxes])
|
|
|
|
return keep_boxes
|
|
|
|
|
|
def compute_iou(box, boxes):
|
|
# Compute xmin, ymin, xmax, ymax for both boxes
|
|
xmin = np.maximum(box[0], boxes[:, 0])
|
|
ymin = np.maximum(box[1], boxes[:, 1])
|
|
xmax = np.minimum(box[2], boxes[:, 2])
|
|
ymax = np.minimum(box[3], boxes[:, 3])
|
|
|
|
# Compute intersection area
|
|
intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)
|
|
|
|
# Compute union area
|
|
box_area = (box[2] - box[0]) * (box[3] - box[1])
|
|
boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
|
|
union_area = box_area + boxes_area - intersection_area
|
|
|
|
# Compute IoU
|
|
iou = intersection_area / union_area
|
|
|
|
return iou
|
|
|
|
|
|
def xywh2xyxy(x):
|
|
# Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)
|
|
y = np.copy(x)
|
|
y[..., 0] = x[..., 0] - x[..., 2] / 2
|
|
y[..., 1] = x[..., 1] - x[..., 3] / 2
|
|
y[..., 2] = x[..., 0] + x[..., 2] / 2
|
|
y[..., 3] = x[..., 1] + x[..., 3] / 2
|
|
return y
|
|
|
|
|
|
def draw_detections(image, boxes, scores, class_ids, mask_alpha=0.3):
|
|
det_img = image.copy()
|
|
|
|
img_height, img_width = image.shape[:2]
|
|
font_size = min([img_height, img_width]) * 0.0006
|
|
text_thickness = int(min([img_height, img_width]) * 0.001)
|
|
|
|
# det_img = draw_masks(det_img, boxes, class_ids, mask_alpha)
|
|
|
|
# Draw bounding boxes and labels of detections
|
|
for class_id, box, score in zip(class_ids, boxes, scores):
|
|
color = colors[class_id]
|
|
|
|
draw_box(det_img, box, color)
|
|
|
|
label = class_names[class_id]
|
|
caption = f"{label} {int(score * 100)}%"
|
|
draw_text(det_img, caption, box, color, font_size, text_thickness)
|
|
|
|
return det_img
|
|
|
|
|
|
def draw_box(
|
|
image: np.ndarray,
|
|
box: np.ndarray,
|
|
color: tuple[int, int, int] = (0, 0, 255),
|
|
thickness: int = 2,
|
|
) -> np.ndarray:
|
|
x1, y1, x2, y2 = box.astype(int)
|
|
return cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)
|
|
|
|
|
|
def draw_text(
|
|
image: np.ndarray,
|
|
text: str,
|
|
box: np.ndarray,
|
|
color: tuple[int, int, int] = (0, 0, 255),
|
|
font_size: float = 0.001,
|
|
text_thickness: int = 2,
|
|
) -> np.ndarray:
|
|
x1, y1, x2, y2 = box.astype(int)
|
|
(tw, th), _ = cv2.getTextSize(
|
|
text=text,
|
|
fontFace=cv2.FONT_HERSHEY_SIMPLEX,
|
|
fontScale=font_size,
|
|
thickness=text_thickness,
|
|
)
|
|
th = int(th * 1.2)
|
|
|
|
cv2.rectangle(image, (x1, y1), (x1 + tw, y1 - th), color, -1)
|
|
|
|
return cv2.putText(
|
|
image,
|
|
text,
|
|
(x1, y1),
|
|
cv2.FONT_HERSHEY_SIMPLEX,
|
|
font_size,
|
|
(255, 255, 255),
|
|
text_thickness,
|
|
cv2.LINE_AA,
|
|
)
|
|
|
|
|
|
def draw_masks(
|
|
image: np.ndarray, boxes: np.ndarray, classes: np.ndarray, mask_alpha: float = 0.3
|
|
) -> np.ndarray:
|
|
mask_img = image.copy()
|
|
|
|
# Draw bounding boxes and labels of detections
|
|
for box, class_id in zip(boxes, classes):
|
|
color = colors[class_id]
|
|
|
|
x1, y1, x2, y2 = box.astype(int)
|
|
|
|
# Draw fill rectangle in mask image
|
|
cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1)
|
|
|
|
return cv2.addWeighted(mask_img, mask_alpha, image, 1 - mask_alpha, 0)
|