mirror of
https://github.com/HumanAIGC-Engineering/gradio-webrtc.git
synced 2026-02-05 01:49:23 +08:00
sync code of fastrtc, add text support through datachannel, fix safari connect problem support chat without camera or mic
238 lines
5.5 KiB
Python
238 lines
5.5 KiB
Python
import cv2
|
|
import numpy as np
|
|
|
|
class_names = [
|
|
"person",
|
|
"bicycle",
|
|
"car",
|
|
"motorcycle",
|
|
"airplane",
|
|
"bus",
|
|
"train",
|
|
"truck",
|
|
"boat",
|
|
"traffic light",
|
|
"fire hydrant",
|
|
"stop sign",
|
|
"parking meter",
|
|
"bench",
|
|
"bird",
|
|
"cat",
|
|
"dog",
|
|
"horse",
|
|
"sheep",
|
|
"cow",
|
|
"elephant",
|
|
"bear",
|
|
"zebra",
|
|
"giraffe",
|
|
"backpack",
|
|
"umbrella",
|
|
"handbag",
|
|
"tie",
|
|
"suitcase",
|
|
"frisbee",
|
|
"skis",
|
|
"snowboard",
|
|
"sports ball",
|
|
"kite",
|
|
"baseball bat",
|
|
"baseball glove",
|
|
"skateboard",
|
|
"surfboard",
|
|
"tennis racket",
|
|
"bottle",
|
|
"wine glass",
|
|
"cup",
|
|
"fork",
|
|
"knife",
|
|
"spoon",
|
|
"bowl",
|
|
"banana",
|
|
"apple",
|
|
"sandwich",
|
|
"orange",
|
|
"broccoli",
|
|
"carrot",
|
|
"hot dog",
|
|
"pizza",
|
|
"donut",
|
|
"cake",
|
|
"chair",
|
|
"couch",
|
|
"potted plant",
|
|
"bed",
|
|
"dining table",
|
|
"toilet",
|
|
"tv",
|
|
"laptop",
|
|
"mouse",
|
|
"remote",
|
|
"keyboard",
|
|
"cell phone",
|
|
"microwave",
|
|
"oven",
|
|
"toaster",
|
|
"sink",
|
|
"refrigerator",
|
|
"book",
|
|
"clock",
|
|
"vase",
|
|
"scissors",
|
|
"teddy bear",
|
|
"hair drier",
|
|
"toothbrush",
|
|
]
|
|
|
|
# Create a list of colors for each class where each color is a tuple of 3 integer values
|
|
rng = np.random.default_rng(3)
|
|
colors = rng.uniform(0, 255, size=(len(class_names), 3))
|
|
|
|
|
|
def nms(boxes, scores, iou_threshold):
|
|
# Sort by score
|
|
sorted_indices = np.argsort(scores)[::-1]
|
|
|
|
keep_boxes = []
|
|
while sorted_indices.size > 0:
|
|
# Pick the last box
|
|
box_id = sorted_indices[0]
|
|
keep_boxes.append(box_id)
|
|
|
|
# Compute IoU of the picked box with the rest
|
|
ious = compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])
|
|
|
|
# Remove boxes with IoU over the threshold
|
|
keep_indices = np.where(ious < iou_threshold)[0]
|
|
|
|
# print(keep_indices.shape, sorted_indices.shape)
|
|
sorted_indices = sorted_indices[keep_indices + 1]
|
|
|
|
return keep_boxes
|
|
|
|
|
|
def multiclass_nms(boxes, scores, class_ids, iou_threshold):
|
|
unique_class_ids = np.unique(class_ids)
|
|
|
|
keep_boxes = []
|
|
for class_id in unique_class_ids:
|
|
class_indices = np.where(class_ids == class_id)[0]
|
|
class_boxes = boxes[class_indices, :]
|
|
class_scores = scores[class_indices]
|
|
|
|
class_keep_boxes = nms(class_boxes, class_scores, iou_threshold)
|
|
keep_boxes.extend(class_indices[class_keep_boxes])
|
|
|
|
return keep_boxes
|
|
|
|
|
|
def compute_iou(box, boxes):
|
|
# Compute xmin, ymin, xmax, ymax for both boxes
|
|
xmin = np.maximum(box[0], boxes[:, 0])
|
|
ymin = np.maximum(box[1], boxes[:, 1])
|
|
xmax = np.minimum(box[2], boxes[:, 2])
|
|
ymax = np.minimum(box[3], boxes[:, 3])
|
|
|
|
# Compute intersection area
|
|
intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)
|
|
|
|
# Compute union area
|
|
box_area = (box[2] - box[0]) * (box[3] - box[1])
|
|
boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
|
|
union_area = box_area + boxes_area - intersection_area
|
|
|
|
# Compute IoU
|
|
iou = intersection_area / union_area
|
|
|
|
return iou
|
|
|
|
|
|
def xywh2xyxy(x):
|
|
# Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)
|
|
y = np.copy(x)
|
|
y[..., 0] = x[..., 0] - x[..., 2] / 2
|
|
y[..., 1] = x[..., 1] - x[..., 3] / 2
|
|
y[..., 2] = x[..., 0] + x[..., 2] / 2
|
|
y[..., 3] = x[..., 1] + x[..., 3] / 2
|
|
return y
|
|
|
|
|
|
def draw_detections(image, boxes, scores, class_ids, mask_alpha=0.3):
|
|
det_img = image.copy()
|
|
|
|
img_height, img_width = image.shape[:2]
|
|
font_size = min([img_height, img_width]) * 0.0006
|
|
text_thickness = int(min([img_height, img_width]) * 0.001)
|
|
|
|
# det_img = draw_masks(det_img, boxes, class_ids, mask_alpha)
|
|
|
|
# Draw bounding boxes and labels of detections
|
|
for class_id, box, score in zip(class_ids, boxes, scores):
|
|
color = colors[class_id]
|
|
|
|
draw_box(det_img, box, color) # type: ignore
|
|
|
|
label = class_names[class_id]
|
|
caption = f"{label} {int(score * 100)}%"
|
|
draw_text(det_img, caption, box, color, font_size, text_thickness) # type: ignore
|
|
|
|
return det_img
|
|
|
|
|
|
def draw_box(
|
|
image: np.ndarray,
|
|
box: np.ndarray,
|
|
color: tuple[int, int, int] = (0, 0, 255),
|
|
thickness: int = 2,
|
|
) -> np.ndarray:
|
|
x1, y1, x2, y2 = box.astype(int)
|
|
return cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)
|
|
|
|
|
|
def draw_text(
|
|
image: np.ndarray,
|
|
text: str,
|
|
box: np.ndarray,
|
|
color: tuple[int, int, int] = (0, 0, 255),
|
|
font_size: float = 0.001,
|
|
text_thickness: int = 2,
|
|
) -> np.ndarray:
|
|
x1, y1, x2, y2 = box.astype(int)
|
|
(tw, th), _ = cv2.getTextSize(
|
|
text=text,
|
|
fontFace=cv2.FONT_HERSHEY_SIMPLEX,
|
|
fontScale=font_size,
|
|
thickness=text_thickness,
|
|
)
|
|
th = int(th * 1.2)
|
|
|
|
cv2.rectangle(image, (x1, y1), (x1 + tw, y1 - th), color, -1)
|
|
|
|
return cv2.putText(
|
|
image,
|
|
text,
|
|
(x1, y1),
|
|
cv2.FONT_HERSHEY_SIMPLEX,
|
|
font_size,
|
|
(255, 255, 255),
|
|
text_thickness,
|
|
cv2.LINE_AA,
|
|
)
|
|
|
|
|
|
def draw_masks(
|
|
image: np.ndarray, boxes: np.ndarray, classes: np.ndarray, mask_alpha: float = 0.3
|
|
) -> np.ndarray:
|
|
mask_img = image.copy()
|
|
|
|
# Draw bounding boxes and labels of detections
|
|
for box, class_id in zip(boxes, classes):
|
|
color = colors[class_id]
|
|
|
|
x1, y1, x2, y2 = box.astype(int)
|
|
|
|
# Draw fill rectangle in mask image
|
|
cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1) # type: ignore
|
|
|
|
return cv2.addWeighted(mask_img, mask_alpha, image, 1 - mask_alpha, 0)
|