mirror of
https://github.com/TMElyralab/MuseTalk.git
synced 2026-02-05 18:09:19 +08:00
v1.5
This commit is contained in:
123
musetalk/utils/blending.py
Normal file → Executable file
123
musetalk/utils/blending.py
Normal file → Executable file
@@ -2,9 +2,6 @@ from PIL import Image
|
||||
import numpy as np
|
||||
import cv2
|
||||
import copy
|
||||
from face_parsing import FaceParsing
|
||||
|
||||
fp = FaceParsing()
|
||||
|
||||
def get_crop_box(box, expand):
|
||||
x, y, x1, y1 = box
|
||||
@@ -14,46 +11,98 @@ def get_crop_box(box, expand):
|
||||
crop_box = [x_c-s, y_c-s, x_c+s, y_c+s]
|
||||
return crop_box, s
|
||||
|
||||
def face_seg(image):
|
||||
seg_image = fp(image)
|
||||
def face_seg(image, mode="jaw", fp=None):
|
||||
"""
|
||||
对图像进行面部解析,生成面部区域的掩码。
|
||||
|
||||
Args:
|
||||
image (PIL.Image): 输入图像。
|
||||
|
||||
Returns:
|
||||
PIL.Image: 面部区域的掩码图像。
|
||||
"""
|
||||
seg_image = fp(image, mode=mode) # 使用 FaceParsing 模型解析面部
|
||||
if seg_image is None:
|
||||
print("error, no person_segment")
|
||||
print("error, no person_segment") # 如果没有检测到面部,返回错误
|
||||
return None
|
||||
|
||||
seg_image = seg_image.resize(image.size)
|
||||
seg_image = seg_image.resize(image.size) # 将掩码图像调整为输入图像的大小
|
||||
return seg_image
|
||||
|
||||
def get_image(image,face,face_box,upper_boundary_ratio = 0.5,expand=1.2):
|
||||
#print(image.shape)
|
||||
#print(face.shape)
|
||||
|
||||
def get_image(image, face, face_box, upper_boundary_ratio=0.5, expand=1.5, mode="raw", fp=None):
|
||||
"""
|
||||
将裁剪的面部图像粘贴回原始图像,并进行一些处理。
|
||||
|
||||
Args:
|
||||
image (numpy.ndarray): 原始图像(身体部分)。
|
||||
face (numpy.ndarray): 裁剪的面部图像。
|
||||
face_box (tuple): 面部边界框的坐标 (x, y, x1, y1)。
|
||||
upper_boundary_ratio (float): 用于控制面部区域的保留比例。
|
||||
expand (float): 扩展因子,用于放大裁剪框。
|
||||
mode: 融合mask构建方式
|
||||
|
||||
Returns:
|
||||
numpy.ndarray: 处理后的图像。
|
||||
"""
|
||||
# 将 numpy 数组转换为 PIL 图像
|
||||
body = Image.fromarray(image[:, :, ::-1]) # 身体部分图像(整张图)
|
||||
face = Image.fromarray(face[:, :, ::-1]) # 面部图像
|
||||
|
||||
x, y, x1, y1 = face_box # 获取面部边界框的坐标
|
||||
crop_box, s = get_crop_box(face_box, expand) # 计算扩展后的裁剪框
|
||||
x_s, y_s, x_e, y_e = crop_box # 裁剪框的坐标
|
||||
face_position = (x, y) # 面部在原始图像中的位置
|
||||
|
||||
# 从身体图像中裁剪出扩展后的面部区域(下巴到边界有距离)
|
||||
face_large = body.crop(crop_box)
|
||||
|
||||
ori_shape = face_large.size # 裁剪后图像的原始尺寸
|
||||
|
||||
# 对裁剪后的面部区域进行面部解析,生成掩码
|
||||
mask_image = face_seg(face_large, mode=mode, fp=fp)
|
||||
|
||||
mask_small = mask_image.crop((x - x_s, y - y_s, x1 - x_s, y1 - y_s)) # 裁剪出面部区域的掩码
|
||||
|
||||
mask_image = Image.new('L', ori_shape, 0) # 创建一个全黑的掩码图像
|
||||
mask_image.paste(mask_small, (x - x_s, y - y_s, x1 - x_s, y1 - y_s)) # 将面部掩码粘贴到全黑图像上
|
||||
|
||||
|
||||
# 保留面部区域的上半部分(用于控制说话区域)
|
||||
width, height = mask_image.size
|
||||
top_boundary = int(height * upper_boundary_ratio) # 计算上半部分的边界
|
||||
modified_mask_image = Image.new('L', ori_shape, 0) # 创建一个新的全黑掩码图像
|
||||
modified_mask_image.paste(mask_image.crop((0, top_boundary, width, height)), (0, top_boundary)) # 粘贴上半部分掩码
|
||||
|
||||
|
||||
# 对掩码进行高斯模糊,使边缘更平滑
|
||||
blur_kernel_size = int(0.05 * ori_shape[0] // 2 * 2) + 1 # 计算模糊核大小
|
||||
mask_array = cv2.GaussianBlur(np.array(modified_mask_image), (blur_kernel_size, blur_kernel_size), 0) # 高斯模糊
|
||||
#mask_array = np.array(modified_mask_image)
|
||||
mask_image = Image.fromarray(mask_array) # 将模糊后的掩码转换回 PIL 图像
|
||||
|
||||
# 将裁剪的面部图像粘贴回扩展后的面部区域
|
||||
face_large.paste(face, (x - x_s, y - y_s, x1 - x_s, y1 - y_s))
|
||||
|
||||
body.paste(face_large, crop_box[:2], mask_image)
|
||||
|
||||
# 不用掩码,完全用infer
|
||||
#face_large.save("debug/checkpoint_6_face_large.png")
|
||||
|
||||
body = np.array(body) # 将 PIL 图像转换回 numpy 数组
|
||||
|
||||
return body[:, :, ::-1] # 返回处理后的图像(BGR 转 RGB)
|
||||
|
||||
def get_image_blending(image,face,face_box,mask_array,crop_box):
|
||||
body = Image.fromarray(image[:,:,::-1])
|
||||
face = Image.fromarray(face[:,:,::-1])
|
||||
|
||||
x, y, x1, y1 = face_box
|
||||
#print(x1-x,y1-y)
|
||||
crop_box, s = get_crop_box(face_box, expand)
|
||||
x, y, x1, y1 = face_box
|
||||
x_s, y_s, x_e, y_e = crop_box
|
||||
face_position = (x, y)
|
||||
|
||||
face_large = body.crop(crop_box)
|
||||
ori_shape = face_large.size
|
||||
|
||||
mask_image = face_seg(face_large)
|
||||
mask_small = mask_image.crop((x-x_s, y-y_s, x1-x_s, y1-y_s))
|
||||
mask_image = Image.new('L', ori_shape, 0)
|
||||
mask_image.paste(mask_small, (x-x_s, y-y_s, x1-x_s, y1-y_s))
|
||||
|
||||
# keep upper_boundary_ratio of talking area
|
||||
width, height = mask_image.size
|
||||
top_boundary = int(height * upper_boundary_ratio)
|
||||
modified_mask_image = Image.new('L', ori_shape, 0)
|
||||
modified_mask_image.paste(mask_image.crop((0, top_boundary, width, height)), (0, top_boundary))
|
||||
|
||||
blur_kernel_size = int(0.1 * ori_shape[0] // 2 * 2) + 1
|
||||
mask_array = cv2.GaussianBlur(np.array(modified_mask_image), (blur_kernel_size, blur_kernel_size), 0)
|
||||
mask_image = Image.fromarray(mask_array)
|
||||
|
||||
mask_image = mask_image.convert("L")
|
||||
face_large.paste(face, (x-x_s, y-y_s, x1-x_s, y1-y_s))
|
||||
body.paste(face_large, crop_box[:2], mask_image)
|
||||
body = np.array(body)
|
||||
@@ -84,17 +133,3 @@ def get_image_prepare_material(image,face_box,upper_boundary_ratio = 0.5,expand=
|
||||
blur_kernel_size = int(0.1 * ori_shape[0] // 2 * 2) + 1
|
||||
mask_array = cv2.GaussianBlur(np.array(modified_mask_image), (blur_kernel_size, blur_kernel_size), 0)
|
||||
return mask_array,crop_box
|
||||
|
||||
def get_image_blending(image,face,face_box,mask_array,crop_box):
|
||||
body = image
|
||||
x, y, x1, y1 = face_box
|
||||
x_s, y_s, x_e, y_e = crop_box
|
||||
face_large = copy.deepcopy(body[y_s:y_e, x_s:x_e])
|
||||
face_large[y-y_s:y1-y_s, x-x_s:x1-x_s]=face
|
||||
|
||||
mask_image = cv2.cvtColor(mask_array,cv2.COLOR_BGR2GRAY)
|
||||
mask_image = (mask_image/255).astype(np.float32)
|
||||
|
||||
body[y_s:y_e, x_s:x_e] = cv2.blendLinear(face_large,body[y_s:y_e, x_s:x_e],mask_image,1-mask_image)
|
||||
|
||||
return body
|
||||
|
||||
Reference in New Issue
Block a user