mirror of
https://github.com/TMElyralab/MuseTalk.git
synced 2026-02-05 18:09:19 +08:00
initial_commit
This commit is contained in:
1
musetalk/utils/face_detection/detection/__init__.py
Normal file
1
musetalk/utils/face_detection/detection/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from .core import FaceDetector
|
||||
130
musetalk/utils/face_detection/detection/core.py
Normal file
130
musetalk/utils/face_detection/detection/core.py
Normal file
@@ -0,0 +1,130 @@
|
||||
import logging
|
||||
import glob
|
||||
from tqdm import tqdm
|
||||
import numpy as np
|
||||
import torch
|
||||
import cv2
|
||||
|
||||
|
||||
class FaceDetector(object):
|
||||
"""An abstract class representing a face detector.
|
||||
|
||||
Any other face detection implementation must subclass it. All subclasses
|
||||
must implement ``detect_from_image``, that return a list of detected
|
||||
bounding boxes. Optionally, for speed considerations detect from path is
|
||||
recommended.
|
||||
"""
|
||||
|
||||
def __init__(self, device, verbose):
|
||||
self.device = device
|
||||
self.verbose = verbose
|
||||
|
||||
if verbose:
|
||||
if 'cpu' in device:
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.warning("Detection running on CPU, this may be potentially slow.")
|
||||
|
||||
if 'cpu' not in device and 'cuda' not in device:
|
||||
if verbose:
|
||||
logger.error("Expected values for device are: {cpu, cuda} but got: %s", device)
|
||||
raise ValueError
|
||||
|
||||
def detect_from_image(self, tensor_or_path):
|
||||
"""Detects faces in a given image.
|
||||
|
||||
This function detects the faces present in a provided BGR(usually)
|
||||
image. The input can be either the image itself or the path to it.
|
||||
|
||||
Arguments:
|
||||
tensor_or_path {numpy.ndarray, torch.tensor or string} -- the path
|
||||
to an image or the image itself.
|
||||
|
||||
Example::
|
||||
|
||||
>>> path_to_image = 'data/image_01.jpg'
|
||||
... detected_faces = detect_from_image(path_to_image)
|
||||
[A list of bounding boxes (x1, y1, x2, y2)]
|
||||
>>> image = cv2.imread(path_to_image)
|
||||
... detected_faces = detect_from_image(image)
|
||||
[A list of bounding boxes (x1, y1, x2, y2)]
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def detect_from_directory(self, path, extensions=['.jpg', '.png'], recursive=False, show_progress_bar=True):
|
||||
"""Detects faces from all the images present in a given directory.
|
||||
|
||||
Arguments:
|
||||
path {string} -- a string containing a path that points to the folder containing the images
|
||||
|
||||
Keyword Arguments:
|
||||
extensions {list} -- list of string containing the extensions to be
|
||||
consider in the following format: ``.extension_name`` (default:
|
||||
{['.jpg', '.png']}) recursive {bool} -- option wherever to scan the
|
||||
folder recursively (default: {False}) show_progress_bar {bool} --
|
||||
display a progressbar (default: {True})
|
||||
|
||||
Example:
|
||||
>>> directory = 'data'
|
||||
... detected_faces = detect_from_directory(directory)
|
||||
{A dictionary of [lists containing bounding boxes(x1, y1, x2, y2)]}
|
||||
|
||||
"""
|
||||
if self.verbose:
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if len(extensions) == 0:
|
||||
if self.verbose:
|
||||
logger.error("Expected at list one extension, but none was received.")
|
||||
raise ValueError
|
||||
|
||||
if self.verbose:
|
||||
logger.info("Constructing the list of images.")
|
||||
additional_pattern = '/**/*' if recursive else '/*'
|
||||
files = []
|
||||
for extension in extensions:
|
||||
files.extend(glob.glob(path + additional_pattern + extension, recursive=recursive))
|
||||
|
||||
if self.verbose:
|
||||
logger.info("Finished searching for images. %s images found", len(files))
|
||||
logger.info("Preparing to run the detection.")
|
||||
|
||||
predictions = {}
|
||||
for image_path in tqdm(files, disable=not show_progress_bar):
|
||||
if self.verbose:
|
||||
logger.info("Running the face detector on image: %s", image_path)
|
||||
predictions[image_path] = self.detect_from_image(image_path)
|
||||
|
||||
if self.verbose:
|
||||
logger.info("The detector was successfully run on all %s images", len(files))
|
||||
|
||||
return predictions
|
||||
|
||||
@property
|
||||
def reference_scale(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def reference_x_shift(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def reference_y_shift(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@staticmethod
|
||||
def tensor_or_path_to_ndarray(tensor_or_path, rgb=True):
|
||||
"""Convert path (represented as a string) or torch.tensor to a numpy.ndarray
|
||||
|
||||
Arguments:
|
||||
tensor_or_path {numpy.ndarray, torch.tensor or string} -- path to the image, or the image itself
|
||||
"""
|
||||
if isinstance(tensor_or_path, str):
|
||||
return cv2.imread(tensor_or_path) if not rgb else cv2.imread(tensor_or_path)[..., ::-1]
|
||||
elif torch.is_tensor(tensor_or_path):
|
||||
# Call cpu in case its coming from cuda
|
||||
return tensor_or_path.cpu().numpy()[..., ::-1].copy() if not rgb else tensor_or_path.cpu().numpy()
|
||||
elif isinstance(tensor_or_path, np.ndarray):
|
||||
return tensor_or_path[..., ::-1].copy() if not rgb else tensor_or_path
|
||||
else:
|
||||
raise TypeError
|
||||
1
musetalk/utils/face_detection/detection/sfd/__init__.py
Normal file
1
musetalk/utils/face_detection/detection/sfd/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from .sfd_detector import SFDDetector as FaceDetector
|
||||
129
musetalk/utils/face_detection/detection/sfd/bbox.py
Normal file
129
musetalk/utils/face_detection/detection/sfd/bbox.py
Normal file
@@ -0,0 +1,129 @@
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import sys
|
||||
import cv2
|
||||
import random
|
||||
import datetime
|
||||
import time
|
||||
import math
|
||||
import argparse
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
try:
|
||||
from iou import IOU
|
||||
except BaseException:
|
||||
# IOU cython speedup 10x
|
||||
def IOU(ax1, ay1, ax2, ay2, bx1, by1, bx2, by2):
|
||||
sa = abs((ax2 - ax1) * (ay2 - ay1))
|
||||
sb = abs((bx2 - bx1) * (by2 - by1))
|
||||
x1, y1 = max(ax1, bx1), max(ay1, by1)
|
||||
x2, y2 = min(ax2, bx2), min(ay2, by2)
|
||||
w = x2 - x1
|
||||
h = y2 - y1
|
||||
if w < 0 or h < 0:
|
||||
return 0.0
|
||||
else:
|
||||
return 1.0 * w * h / (sa + sb - w * h)
|
||||
|
||||
|
||||
def bboxlog(x1, y1, x2, y2, axc, ayc, aww, ahh):
|
||||
xc, yc, ww, hh = (x2 + x1) / 2, (y2 + y1) / 2, x2 - x1, y2 - y1
|
||||
dx, dy = (xc - axc) / aww, (yc - ayc) / ahh
|
||||
dw, dh = math.log(ww / aww), math.log(hh / ahh)
|
||||
return dx, dy, dw, dh
|
||||
|
||||
|
||||
def bboxloginv(dx, dy, dw, dh, axc, ayc, aww, ahh):
|
||||
xc, yc = dx * aww + axc, dy * ahh + ayc
|
||||
ww, hh = math.exp(dw) * aww, math.exp(dh) * ahh
|
||||
x1, x2, y1, y2 = xc - ww / 2, xc + ww / 2, yc - hh / 2, yc + hh / 2
|
||||
return x1, y1, x2, y2
|
||||
|
||||
|
||||
def nms(dets, thresh):
|
||||
if 0 == len(dets):
|
||||
return []
|
||||
x1, y1, x2, y2, scores = dets[:, 0], dets[:, 1], dets[:, 2], dets[:, 3], dets[:, 4]
|
||||
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||||
order = scores.argsort()[::-1]
|
||||
|
||||
keep = []
|
||||
while order.size > 0:
|
||||
i = order[0]
|
||||
keep.append(i)
|
||||
xx1, yy1 = np.maximum(x1[i], x1[order[1:]]), np.maximum(y1[i], y1[order[1:]])
|
||||
xx2, yy2 = np.minimum(x2[i], x2[order[1:]]), np.minimum(y2[i], y2[order[1:]])
|
||||
|
||||
w, h = np.maximum(0.0, xx2 - xx1 + 1), np.maximum(0.0, yy2 - yy1 + 1)
|
||||
ovr = w * h / (areas[i] + areas[order[1:]] - w * h)
|
||||
|
||||
inds = np.where(ovr <= thresh)[0]
|
||||
order = order[inds + 1]
|
||||
|
||||
return keep
|
||||
|
||||
|
||||
def encode(matched, priors, variances):
|
||||
"""Encode the variances from the priorbox layers into the ground truth boxes
|
||||
we have matched (based on jaccard overlap) with the prior boxes.
|
||||
Args:
|
||||
matched: (tensor) Coords of ground truth for each prior in point-form
|
||||
Shape: [num_priors, 4].
|
||||
priors: (tensor) Prior boxes in center-offset form
|
||||
Shape: [num_priors,4].
|
||||
variances: (list[float]) Variances of priorboxes
|
||||
Return:
|
||||
encoded boxes (tensor), Shape: [num_priors, 4]
|
||||
"""
|
||||
|
||||
# dist b/t match center and prior's center
|
||||
g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2]
|
||||
# encode variance
|
||||
g_cxcy /= (variances[0] * priors[:, 2:])
|
||||
# match wh / prior wh
|
||||
g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
|
||||
g_wh = torch.log(g_wh) / variances[1]
|
||||
# return target for smooth_l1_loss
|
||||
return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4]
|
||||
|
||||
|
||||
def decode(loc, priors, variances):
|
||||
"""Decode locations from predictions using priors to undo
|
||||
the encoding we did for offset regression at train time.
|
||||
Args:
|
||||
loc (tensor): location predictions for loc layers,
|
||||
Shape: [num_priors,4]
|
||||
priors (tensor): Prior boxes in center-offset form.
|
||||
Shape: [num_priors,4].
|
||||
variances: (list[float]) Variances of priorboxes
|
||||
Return:
|
||||
decoded bounding box predictions
|
||||
"""
|
||||
|
||||
boxes = torch.cat((
|
||||
priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
|
||||
priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
|
||||
boxes[:, :2] -= boxes[:, 2:] / 2
|
||||
boxes[:, 2:] += boxes[:, :2]
|
||||
return boxes
|
||||
|
||||
def batch_decode(loc, priors, variances):
|
||||
"""Decode locations from predictions using priors to undo
|
||||
the encoding we did for offset regression at train time.
|
||||
Args:
|
||||
loc (tensor): location predictions for loc layers,
|
||||
Shape: [num_priors,4]
|
||||
priors (tensor): Prior boxes in center-offset form.
|
||||
Shape: [num_priors,4].
|
||||
variances: (list[float]) Variances of priorboxes
|
||||
Return:
|
||||
decoded bounding box predictions
|
||||
"""
|
||||
|
||||
boxes = torch.cat((
|
||||
priors[:, :, :2] + loc[:, :, :2] * variances[0] * priors[:, :, 2:],
|
||||
priors[:, :, 2:] * torch.exp(loc[:, :, 2:] * variances[1])), 2)
|
||||
boxes[:, :, :2] -= boxes[:, :, 2:] / 2
|
||||
boxes[:, :, 2:] += boxes[:, :, :2]
|
||||
return boxes
|
||||
114
musetalk/utils/face_detection/detection/sfd/detect.py
Normal file
114
musetalk/utils/face_detection/detection/sfd/detect.py
Normal file
@@ -0,0 +1,114 @@
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
import os
|
||||
import sys
|
||||
import cv2
|
||||
import random
|
||||
import datetime
|
||||
import math
|
||||
import argparse
|
||||
import numpy as np
|
||||
|
||||
import scipy.io as sio
|
||||
import zipfile
|
||||
from .net_s3fd import s3fd
|
||||
from .bbox import *
|
||||
|
||||
|
||||
def detect(net, img, device):
|
||||
img = img - np.array([104, 117, 123])
|
||||
img = img.transpose(2, 0, 1)
|
||||
img = img.reshape((1,) + img.shape)
|
||||
|
||||
if 'cuda' in device:
|
||||
torch.backends.cudnn.benchmark = True
|
||||
|
||||
img = torch.from_numpy(img).float().to(device)
|
||||
BB, CC, HH, WW = img.size()
|
||||
with torch.no_grad():
|
||||
olist = net(img)
|
||||
|
||||
bboxlist = []
|
||||
for i in range(len(olist) // 2):
|
||||
olist[i * 2] = F.softmax(olist[i * 2], dim=1)
|
||||
olist = [oelem.data.cpu() for oelem in olist]
|
||||
for i in range(len(olist) // 2):
|
||||
ocls, oreg = olist[i * 2], olist[i * 2 + 1]
|
||||
FB, FC, FH, FW = ocls.size() # feature map size
|
||||
stride = 2**(i + 2) # 4,8,16,32,64,128
|
||||
anchor = stride * 4
|
||||
poss = zip(*np.where(ocls[:, 1, :, :] > 0.05))
|
||||
for Iindex, hindex, windex in poss:
|
||||
axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride
|
||||
score = ocls[0, 1, hindex, windex]
|
||||
loc = oreg[0, :, hindex, windex].contiguous().view(1, 4)
|
||||
priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]])
|
||||
variances = [0.1, 0.2]
|
||||
box = decode(loc, priors, variances)
|
||||
x1, y1, x2, y2 = box[0] * 1.0
|
||||
# cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1)
|
||||
bboxlist.append([x1, y1, x2, y2, score])
|
||||
bboxlist = np.array(bboxlist)
|
||||
if 0 == len(bboxlist):
|
||||
bboxlist = np.zeros((1, 5))
|
||||
|
||||
return bboxlist
|
||||
|
||||
def batch_detect(net, imgs, device):
|
||||
imgs = imgs - np.array([104, 117, 123])
|
||||
imgs = imgs.transpose(0, 3, 1, 2)
|
||||
|
||||
if 'cuda' in device:
|
||||
torch.backends.cudnn.benchmark = True
|
||||
|
||||
imgs = torch.from_numpy(imgs).float().to(device)
|
||||
BB, CC, HH, WW = imgs.size()
|
||||
with torch.no_grad():
|
||||
olist = net(imgs)
|
||||
# print(olist)
|
||||
|
||||
bboxlist = []
|
||||
for i in range(len(olist) // 2):
|
||||
olist[i * 2] = F.softmax(olist[i * 2], dim=1)
|
||||
|
||||
olist = [oelem.cpu() for oelem in olist]
|
||||
for i in range(len(olist) // 2):
|
||||
ocls, oreg = olist[i * 2], olist[i * 2 + 1]
|
||||
FB, FC, FH, FW = ocls.size() # feature map size
|
||||
stride = 2**(i + 2) # 4,8,16,32,64,128
|
||||
anchor = stride * 4
|
||||
poss = zip(*np.where(ocls[:, 1, :, :] > 0.05))
|
||||
for Iindex, hindex, windex in poss:
|
||||
axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride
|
||||
score = ocls[:, 1, hindex, windex]
|
||||
loc = oreg[:, :, hindex, windex].contiguous().view(BB, 1, 4)
|
||||
priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]]).view(1, 1, 4)
|
||||
variances = [0.1, 0.2]
|
||||
box = batch_decode(loc, priors, variances)
|
||||
box = box[:, 0] * 1.0
|
||||
# cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1)
|
||||
bboxlist.append(torch.cat([box, score.unsqueeze(1)], 1).cpu().numpy())
|
||||
bboxlist = np.array(bboxlist)
|
||||
if 0 == len(bboxlist):
|
||||
bboxlist = np.zeros((1, BB, 5))
|
||||
|
||||
return bboxlist
|
||||
|
||||
def flip_detect(net, img, device):
|
||||
img = cv2.flip(img, 1)
|
||||
b = detect(net, img, device)
|
||||
|
||||
bboxlist = np.zeros(b.shape)
|
||||
bboxlist[:, 0] = img.shape[1] - b[:, 2]
|
||||
bboxlist[:, 1] = b[:, 1]
|
||||
bboxlist[:, 2] = img.shape[1] - b[:, 0]
|
||||
bboxlist[:, 3] = b[:, 3]
|
||||
bboxlist[:, 4] = b[:, 4]
|
||||
return bboxlist
|
||||
|
||||
|
||||
def pts_to_bb(pts):
|
||||
min_x, min_y = np.min(pts, axis=0)
|
||||
max_x, max_y = np.max(pts, axis=0)
|
||||
return np.array([min_x, min_y, max_x, max_y])
|
||||
129
musetalk/utils/face_detection/detection/sfd/net_s3fd.py
Normal file
129
musetalk/utils/face_detection/detection/sfd/net_s3fd.py
Normal file
@@ -0,0 +1,129 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class L2Norm(nn.Module):
|
||||
def __init__(self, n_channels, scale=1.0):
|
||||
super(L2Norm, self).__init__()
|
||||
self.n_channels = n_channels
|
||||
self.scale = scale
|
||||
self.eps = 1e-10
|
||||
self.weight = nn.Parameter(torch.Tensor(self.n_channels))
|
||||
self.weight.data *= 0.0
|
||||
self.weight.data += self.scale
|
||||
|
||||
def forward(self, x):
|
||||
norm = x.pow(2).sum(dim=1, keepdim=True).sqrt() + self.eps
|
||||
x = x / norm * self.weight.view(1, -1, 1, 1)
|
||||
return x
|
||||
|
||||
|
||||
class s3fd(nn.Module):
|
||||
def __init__(self):
|
||||
super(s3fd, self).__init__()
|
||||
self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
|
||||
self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
|
||||
|
||||
self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
|
||||
self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
|
||||
|
||||
self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
|
||||
self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
|
||||
self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
|
||||
|
||||
self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
|
||||
self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
|
||||
self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
|
||||
|
||||
self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
|
||||
self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
|
||||
self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
|
||||
|
||||
self.fc6 = nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=3)
|
||||
self.fc7 = nn.Conv2d(1024, 1024, kernel_size=1, stride=1, padding=0)
|
||||
|
||||
self.conv6_1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)
|
||||
self.conv6_2 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1)
|
||||
|
||||
self.conv7_1 = nn.Conv2d(512, 128, kernel_size=1, stride=1, padding=0)
|
||||
self.conv7_2 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)
|
||||
|
||||
self.conv3_3_norm = L2Norm(256, scale=10)
|
||||
self.conv4_3_norm = L2Norm(512, scale=8)
|
||||
self.conv5_3_norm = L2Norm(512, scale=5)
|
||||
|
||||
self.conv3_3_norm_mbox_conf = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1)
|
||||
self.conv3_3_norm_mbox_loc = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1)
|
||||
self.conv4_3_norm_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1)
|
||||
self.conv4_3_norm_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1)
|
||||
self.conv5_3_norm_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1)
|
||||
self.conv5_3_norm_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1)
|
||||
|
||||
self.fc7_mbox_conf = nn.Conv2d(1024, 2, kernel_size=3, stride=1, padding=1)
|
||||
self.fc7_mbox_loc = nn.Conv2d(1024, 4, kernel_size=3, stride=1, padding=1)
|
||||
self.conv6_2_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1)
|
||||
self.conv6_2_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1)
|
||||
self.conv7_2_mbox_conf = nn.Conv2d(256, 2, kernel_size=3, stride=1, padding=1)
|
||||
self.conv7_2_mbox_loc = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1)
|
||||
|
||||
def forward(self, x):
|
||||
h = F.relu(self.conv1_1(x))
|
||||
h = F.relu(self.conv1_2(h))
|
||||
h = F.max_pool2d(h, 2, 2)
|
||||
|
||||
h = F.relu(self.conv2_1(h))
|
||||
h = F.relu(self.conv2_2(h))
|
||||
h = F.max_pool2d(h, 2, 2)
|
||||
|
||||
h = F.relu(self.conv3_1(h))
|
||||
h = F.relu(self.conv3_2(h))
|
||||
h = F.relu(self.conv3_3(h))
|
||||
f3_3 = h
|
||||
h = F.max_pool2d(h, 2, 2)
|
||||
|
||||
h = F.relu(self.conv4_1(h))
|
||||
h = F.relu(self.conv4_2(h))
|
||||
h = F.relu(self.conv4_3(h))
|
||||
f4_3 = h
|
||||
h = F.max_pool2d(h, 2, 2)
|
||||
|
||||
h = F.relu(self.conv5_1(h))
|
||||
h = F.relu(self.conv5_2(h))
|
||||
h = F.relu(self.conv5_3(h))
|
||||
f5_3 = h
|
||||
h = F.max_pool2d(h, 2, 2)
|
||||
|
||||
h = F.relu(self.fc6(h))
|
||||
h = F.relu(self.fc7(h))
|
||||
ffc7 = h
|
||||
h = F.relu(self.conv6_1(h))
|
||||
h = F.relu(self.conv6_2(h))
|
||||
f6_2 = h
|
||||
h = F.relu(self.conv7_1(h))
|
||||
h = F.relu(self.conv7_2(h))
|
||||
f7_2 = h
|
||||
|
||||
f3_3 = self.conv3_3_norm(f3_3)
|
||||
f4_3 = self.conv4_3_norm(f4_3)
|
||||
f5_3 = self.conv5_3_norm(f5_3)
|
||||
|
||||
cls1 = self.conv3_3_norm_mbox_conf(f3_3)
|
||||
reg1 = self.conv3_3_norm_mbox_loc(f3_3)
|
||||
cls2 = self.conv4_3_norm_mbox_conf(f4_3)
|
||||
reg2 = self.conv4_3_norm_mbox_loc(f4_3)
|
||||
cls3 = self.conv5_3_norm_mbox_conf(f5_3)
|
||||
reg3 = self.conv5_3_norm_mbox_loc(f5_3)
|
||||
cls4 = self.fc7_mbox_conf(ffc7)
|
||||
reg4 = self.fc7_mbox_loc(ffc7)
|
||||
cls5 = self.conv6_2_mbox_conf(f6_2)
|
||||
reg5 = self.conv6_2_mbox_loc(f6_2)
|
||||
cls6 = self.conv7_2_mbox_conf(f7_2)
|
||||
reg6 = self.conv7_2_mbox_loc(f7_2)
|
||||
|
||||
# max-out background label
|
||||
chunk = torch.chunk(cls1, 4, 1)
|
||||
bmax = torch.max(torch.max(chunk[0], chunk[1]), chunk[2])
|
||||
cls1 = torch.cat([bmax, chunk[3]], dim=1)
|
||||
|
||||
return [cls1, reg1, cls2, reg2, cls3, reg3, cls4, reg4, cls5, reg5, cls6, reg6]
|
||||
59
musetalk/utils/face_detection/detection/sfd/sfd_detector.py
Normal file
59
musetalk/utils/face_detection/detection/sfd/sfd_detector.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import os
|
||||
import cv2
|
||||
from torch.utils.model_zoo import load_url
|
||||
|
||||
from ..core import FaceDetector
|
||||
|
||||
from .net_s3fd import s3fd
|
||||
from .bbox import *
|
||||
from .detect import *
|
||||
|
||||
models_urls = {
|
||||
's3fd': 'https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth',
|
||||
}
|
||||
|
||||
|
||||
class SFDDetector(FaceDetector):
|
||||
def __init__(self, device, path_to_detector=os.path.join(os.path.dirname(os.path.abspath(__file__)), 's3fd.pth'), verbose=False):
|
||||
super(SFDDetector, self).__init__(device, verbose)
|
||||
|
||||
# Initialise the face detector
|
||||
if not os.path.isfile(path_to_detector):
|
||||
model_weights = load_url(models_urls['s3fd'])
|
||||
else:
|
||||
model_weights = torch.load(path_to_detector)
|
||||
|
||||
self.face_detector = s3fd()
|
||||
self.face_detector.load_state_dict(model_weights)
|
||||
self.face_detector.to(device)
|
||||
self.face_detector.eval()
|
||||
|
||||
def detect_from_image(self, tensor_or_path):
|
||||
image = self.tensor_or_path_to_ndarray(tensor_or_path)
|
||||
|
||||
bboxlist = detect(self.face_detector, image, device=self.device)
|
||||
keep = nms(bboxlist, 0.3)
|
||||
bboxlist = bboxlist[keep, :]
|
||||
bboxlist = [x for x in bboxlist if x[-1] > 0.5]
|
||||
|
||||
return bboxlist
|
||||
|
||||
def detect_from_batch(self, images):
|
||||
bboxlists = batch_detect(self.face_detector, images, device=self.device)
|
||||
keeps = [nms(bboxlists[:, i, :], 0.3) for i in range(bboxlists.shape[1])]
|
||||
bboxlists = [bboxlists[keep, i, :] for i, keep in enumerate(keeps)]
|
||||
bboxlists = [[x for x in bboxlist if x[-1] > 0.5] for bboxlist in bboxlists]
|
||||
|
||||
return bboxlists
|
||||
|
||||
@property
|
||||
def reference_scale(self):
|
||||
return 195
|
||||
|
||||
@property
|
||||
def reference_x_shift(self):
|
||||
return 0
|
||||
|
||||
@property
|
||||
def reference_y_shift(self):
|
||||
return 0
|
||||
Reference in New Issue
Block a user