mirror of
https://github.com/TMElyralab/MuseTalk.git
synced 2026-02-04 17:39:20 +08:00
feat: real-time infer (#286)
* feat: realtime infer * cchore: infer script
This commit is contained in:
@@ -11,7 +11,7 @@ class AudioProcessor:
|
||||
def __init__(self, feature_extractor_path="openai/whisper-tiny/"):
|
||||
self.feature_extractor = AutoFeatureExtractor.from_pretrained(feature_extractor_path)
|
||||
|
||||
def get_audio_feature(self, wav_path, start_index=0):
|
||||
def get_audio_feature(self, wav_path, start_index=0, weight_dtype=None):
|
||||
if not os.path.exists(wav_path):
|
||||
return None
|
||||
librosa_output, sampling_rate = librosa.load(wav_path, sr=16000)
|
||||
@@ -27,6 +27,8 @@ class AudioProcessor:
|
||||
return_tensors="pt",
|
||||
sampling_rate=sampling_rate
|
||||
).input_features
|
||||
if weight_dtype is not None:
|
||||
audio_feature = audio_feature.to(dtype=weight_dtype)
|
||||
features.append(audio_feature)
|
||||
|
||||
return features, len(librosa_output)
|
||||
|
||||
@@ -3,6 +3,7 @@ import numpy as np
|
||||
import cv2
|
||||
import copy
|
||||
|
||||
|
||||
def get_crop_box(box, expand):
|
||||
x, y, x1, y1 = box
|
||||
x_c, y_c = (x+x1)//2, (y+y1)//2
|
||||
@@ -11,7 +12,8 @@ def get_crop_box(box, expand):
|
||||
crop_box = [x_c-s, y_c-s, x_c+s, y_c+s]
|
||||
return crop_box, s
|
||||
|
||||
def face_seg(image, mode="jaw", fp=None):
|
||||
|
||||
def face_seg(image, mode="raw", fp=None):
|
||||
"""
|
||||
对图像进行面部解析,生成面部区域的掩码。
|
||||
|
||||
@@ -86,14 +88,12 @@ def get_image(image, face, face_box, upper_boundary_ratio=0.5, expand=1.5, mode=
|
||||
|
||||
body.paste(face_large, crop_box[:2], mask_image)
|
||||
|
||||
# 不用掩码,完全用infer
|
||||
#face_large.save("debug/checkpoint_6_face_large.png")
|
||||
|
||||
body = np.array(body) # 将 PIL 图像转换回 numpy 数组
|
||||
|
||||
return body[:, :, ::-1] # 返回处理后的图像(BGR 转 RGB)
|
||||
|
||||
def get_image_blending(image,face,face_box,mask_array,crop_box):
|
||||
|
||||
def get_image_blending(image, face, face_box, mask_array, crop_box):
|
||||
body = Image.fromarray(image[:,:,::-1])
|
||||
face = Image.fromarray(face[:,:,::-1])
|
||||
|
||||
@@ -108,7 +108,8 @@ def get_image_blending(image,face,face_box,mask_array,crop_box):
|
||||
body = np.array(body)
|
||||
return body[:,:,::-1]
|
||||
|
||||
def get_image_prepare_material(image,face_box,upper_boundary_ratio = 0.5,expand=1.2):
|
||||
|
||||
def get_image_prepare_material(image, face_box, upper_boundary_ratio=0.5, expand=1.5, fp=None, mode="raw"):
|
||||
body = Image.fromarray(image[:,:,::-1])
|
||||
|
||||
x, y, x1, y1 = face_box
|
||||
@@ -119,7 +120,7 @@ def get_image_prepare_material(image,face_box,upper_boundary_ratio = 0.5,expand=
|
||||
face_large = body.crop(crop_box)
|
||||
ori_shape = face_large.size
|
||||
|
||||
mask_image = face_seg(face_large)
|
||||
mask_image = face_seg(face_large, mode=mode, fp=fp)
|
||||
mask_small = mask_image.crop((x-x_s, y-y_s, x1-x_s, y1-y_s))
|
||||
mask_image = Image.new('L', ori_shape, 0)
|
||||
mask_image.paste(mask_small, (x-x_s, y-y_s, x1-x_s, y1-y_s))
|
||||
@@ -132,4 +133,4 @@ def get_image_prepare_material(image,face_box,upper_boundary_ratio = 0.5,expand=
|
||||
|
||||
blur_kernel_size = int(0.1 * ori_shape[0] // 2 * 2) + 1
|
||||
mask_array = cv2.GaussianBlur(np.array(modified_mask_image), (blur_kernel_size, blur_kernel_size), 0)
|
||||
return mask_array,crop_box
|
||||
return mask_array, crop_box
|
||||
|
||||
@@ -74,7 +74,7 @@ class FaceParsing():
|
||||
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
|
||||
])
|
||||
|
||||
def __call__(self, image, size=(512, 512), mode="jaw"):
|
||||
def __call__(self, image, size=(512, 512), mode="raw"):
|
||||
if isinstance(image, str):
|
||||
image = Image.open(image)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user