mirror of
https://github.com/aigc3d/LAM_Audio2Expression.git
synced 2026-02-04 09:29:24 +08:00
752 lines
28 KiB
Python
752 lines
28 KiB
Python
import json
|
|
import time
|
|
import warnings
|
|
import numpy as np
|
|
from typing import List, Optional,Tuple
|
|
from scipy.signal import savgol_filter
|
|
|
|
|
|
ARKitLeftRightPair = [
|
|
("jawLeft", "jawRight"),
|
|
("mouthLeft", "mouthRight"),
|
|
("mouthSmileLeft", "mouthSmileRight"),
|
|
("mouthFrownLeft", "mouthFrownRight"),
|
|
("mouthDimpleLeft", "mouthDimpleRight"),
|
|
("mouthStretchLeft", "mouthStretchRight"),
|
|
("mouthPressLeft", "mouthPressRight"),
|
|
("mouthLowerDownLeft", "mouthLowerDownRight"),
|
|
("mouthUpperUpLeft", "mouthUpperUpRight"),
|
|
("cheekSquintLeft", "cheekSquintRight"),
|
|
("noseSneerLeft", "noseSneerRight"),
|
|
("browDownLeft", "browDownRight"),
|
|
("browOuterUpLeft", "browOuterUpRight"),
|
|
("eyeBlinkLeft","eyeBlinkRight"),
|
|
("eyeLookDownLeft","eyeLookDownRight"),
|
|
("eyeLookInLeft", "eyeLookInRight"),
|
|
("eyeLookOutLeft","eyeLookOutRight"),
|
|
("eyeLookUpLeft","eyeLookUpRight"),
|
|
("eyeSquintLeft","eyeSquintRight"),
|
|
("eyeWideLeft","eyeWideRight")
|
|
]
|
|
|
|
ARKitBlendShape =[
|
|
"browDownLeft",
|
|
"browDownRight",
|
|
"browInnerUp",
|
|
"browOuterUpLeft",
|
|
"browOuterUpRight",
|
|
"cheekPuff",
|
|
"cheekSquintLeft",
|
|
"cheekSquintRight",
|
|
"eyeBlinkLeft",
|
|
"eyeBlinkRight",
|
|
"eyeLookDownLeft",
|
|
"eyeLookDownRight",
|
|
"eyeLookInLeft",
|
|
"eyeLookInRight",
|
|
"eyeLookOutLeft",
|
|
"eyeLookOutRight",
|
|
"eyeLookUpLeft",
|
|
"eyeLookUpRight",
|
|
"eyeSquintLeft",
|
|
"eyeSquintRight",
|
|
"eyeWideLeft",
|
|
"eyeWideRight",
|
|
"jawForward",
|
|
"jawLeft",
|
|
"jawOpen",
|
|
"jawRight",
|
|
"mouthClose",
|
|
"mouthDimpleLeft",
|
|
"mouthDimpleRight",
|
|
"mouthFrownLeft",
|
|
"mouthFrownRight",
|
|
"mouthFunnel",
|
|
"mouthLeft",
|
|
"mouthLowerDownLeft",
|
|
"mouthLowerDownRight",
|
|
"mouthPressLeft",
|
|
"mouthPressRight",
|
|
"mouthPucker",
|
|
"mouthRight",
|
|
"mouthRollLower",
|
|
"mouthRollUpper",
|
|
"mouthShrugLower",
|
|
"mouthShrugUpper",
|
|
"mouthSmileLeft",
|
|
"mouthSmileRight",
|
|
"mouthStretchLeft",
|
|
"mouthStretchRight",
|
|
"mouthUpperUpLeft",
|
|
"mouthUpperUpRight",
|
|
"noseSneerLeft",
|
|
"noseSneerRight",
|
|
"tongueOut"
|
|
]
|
|
|
|
MOUTH_BLENDSHAPES = [ "mouthDimpleLeft",
|
|
"mouthDimpleRight",
|
|
"mouthFrownLeft",
|
|
"mouthFrownRight",
|
|
"mouthFunnel",
|
|
"mouthLeft",
|
|
"mouthLowerDownLeft",
|
|
"mouthLowerDownRight",
|
|
"mouthPressLeft",
|
|
"mouthPressRight",
|
|
"mouthPucker",
|
|
"mouthRight",
|
|
"mouthRollLower",
|
|
"mouthRollUpper",
|
|
"mouthShrugLower",
|
|
"mouthShrugUpper",
|
|
"mouthSmileLeft",
|
|
"mouthSmileRight",
|
|
"mouthStretchLeft",
|
|
"mouthStretchRight",
|
|
"mouthUpperUpLeft",
|
|
"mouthUpperUpRight",
|
|
"jawForward",
|
|
"jawLeft",
|
|
"jawOpen",
|
|
"jawRight",
|
|
"noseSneerLeft",
|
|
"noseSneerRight",
|
|
"cheekPuff",
|
|
]
|
|
|
|
DEFAULT_CONTEXT ={
|
|
'is_initial_input': True,
|
|
'previous_audio': None,
|
|
'previous_expression': None,
|
|
'previous_volume': None,
|
|
'previous_headpose': None,
|
|
}
|
|
|
|
RETURN_CODE = {
|
|
"SUCCESS": 0,
|
|
"AUDIO_LENGTH_ERROR": 1,
|
|
"CHECKPOINT_PATH_ERROR":2,
|
|
"MODEL_INFERENCE_ERROR":3,
|
|
}
|
|
|
|
DEFAULT_CONTEXTRETURN = {
|
|
"code": RETURN_CODE['SUCCESS'],
|
|
"expression": None,
|
|
"headpose": None,
|
|
}
|
|
|
|
BLINK_PATTERNS = [
|
|
np.array([0.365, 0.950, 0.956, 0.917, 0.367, 0.119, 0.025]),
|
|
np.array([0.235, 0.910, 0.945, 0.778, 0.191, 0.235, 0.089]),
|
|
np.array([0.870, 0.950, 0.949, 0.696, 0.191, 0.073, 0.007]),
|
|
np.array([0.000, 0.557, 0.953, 0.942, 0.426, 0.148, 0.018])
|
|
]
|
|
|
|
# Postprocess
|
|
def symmetrize_blendshapes(
|
|
bs_params: np.ndarray,
|
|
mode: str = "average",
|
|
symmetric_pairs: list = ARKitLeftRightPair
|
|
) -> np.ndarray:
|
|
"""
|
|
Apply symmetrization to ARKit blendshape parameters (batched version)
|
|
|
|
Args:
|
|
bs_params: numpy array of shape (N, 52), batch of ARKit parameters
|
|
mode: symmetrization mode ["average", "max", "min", "left_dominant", "right_dominant"]
|
|
symmetric_pairs: list of left-right parameter pairs
|
|
|
|
Returns:
|
|
Symmetrized parameters with same shape (N, 52)
|
|
"""
|
|
|
|
name_to_idx = {name: i for i, name in enumerate(ARKitBlendShape)}
|
|
|
|
# Input validation
|
|
if bs_params.ndim != 2 or bs_params.shape[1] != 52:
|
|
raise ValueError("Input must be of shape (N, 52)")
|
|
|
|
symmetric_bs = bs_params.copy() # Shape (N, 52)
|
|
|
|
# Precompute valid index pairs
|
|
valid_pairs = []
|
|
for left, right in symmetric_pairs:
|
|
left_idx = name_to_idx.get(left)
|
|
right_idx = name_to_idx.get(right)
|
|
if None not in (left_idx, right_idx):
|
|
valid_pairs.append((left_idx, right_idx))
|
|
|
|
# Vectorized processing
|
|
for l_idx, r_idx in valid_pairs:
|
|
left_col = symmetric_bs[:, l_idx]
|
|
right_col = symmetric_bs[:, r_idx]
|
|
|
|
if mode == "average":
|
|
new_vals = (left_col + right_col) / 2
|
|
elif mode == "max":
|
|
new_vals = np.maximum(left_col, right_col)
|
|
elif mode == "min":
|
|
new_vals = np.minimum(left_col, right_col)
|
|
elif mode == "left_dominant":
|
|
new_vals = left_col
|
|
elif mode == "right_dominant":
|
|
new_vals = right_col
|
|
else:
|
|
raise ValueError(f"Invalid mode: {mode}")
|
|
|
|
# Update both columns simultaneously
|
|
symmetric_bs[:, l_idx] = new_vals
|
|
symmetric_bs[:, r_idx] = new_vals
|
|
|
|
return symmetric_bs
|
|
|
|
|
|
def apply_random_eye_blinks(
|
|
input: np.ndarray,
|
|
blink_scale: tuple = (0.8, 1.0),
|
|
blink_interval: tuple = (60, 120),
|
|
blink_duration: int = 7
|
|
) -> np.ndarray:
|
|
"""
|
|
Apply randomized eye blinks to blendshape parameters
|
|
|
|
Args:
|
|
output: Input array of shape (N, 52) containing blendshape parameters
|
|
blink_scale: Tuple (min, max) for random blink intensity scaling
|
|
blink_interval: Tuple (min, max) for random blink spacing in frames
|
|
blink_duration: Number of frames for blink animation (fixed)
|
|
|
|
Returns:
|
|
None (modifies output array in-place)
|
|
"""
|
|
# Define eye blink patterns (normalized 0-1)
|
|
|
|
# Initialize parameters
|
|
n_frames = input.shape[0]
|
|
input[:,8:10] = np.zeros((n_frames,2))
|
|
current_frame = 0
|
|
|
|
# Main blink application loop
|
|
while current_frame < n_frames - blink_duration:
|
|
# Randomize blink parameters
|
|
scale = np.random.uniform(*blink_scale)
|
|
pattern = BLINK_PATTERNS[np.random.randint(0, 4)]
|
|
|
|
# Apply blink animation
|
|
blink_values = pattern * scale
|
|
input[current_frame:current_frame + blink_duration, 8] = blink_values
|
|
input[current_frame:current_frame + blink_duration, 9] = blink_values
|
|
|
|
# Advance to next blink position
|
|
current_frame += blink_duration + np.random.randint(*blink_interval)
|
|
|
|
return input
|
|
|
|
|
|
def apply_random_eye_blinks_context(
|
|
animation_params: np.ndarray,
|
|
processed_frames: int = 0,
|
|
intensity_range: tuple = (0.8, 1.0)
|
|
) -> np.ndarray:
|
|
"""Applies random eye blink patterns to facial animation parameters.
|
|
|
|
Args:
|
|
animation_params: Input facial animation parameters array with shape [num_frames, num_features].
|
|
Columns 8 and 9 typically represent left/right eye blink parameters.
|
|
processed_frames: Number of already processed frames that shouldn't be modified
|
|
intensity_range: Tuple defining (min, max) scaling for blink intensity
|
|
|
|
Returns:
|
|
Modified animation parameters array with random eye blinks added to unprocessed frames
|
|
"""
|
|
remaining_frames = animation_params.shape[0] - processed_frames
|
|
|
|
# Only apply blinks if there's enough remaining frames (blink pattern requires 7 frames)
|
|
if remaining_frames <= 7:
|
|
return animation_params
|
|
|
|
# Configure blink timing parameters
|
|
min_blink_interval = 40 # Minimum frames between blinks
|
|
max_blink_interval = 100 # Maximum frames between blinks
|
|
|
|
# Find last blink in previously processed frames (column 8 > 0.5 indicates blink)
|
|
previous_blink_indices = np.where(animation_params[:processed_frames, 8] > 0.5)[0]
|
|
last_processed_blink = previous_blink_indices[-1] - 7 if previous_blink_indices.size > 0 else processed_frames
|
|
|
|
# Calculate first new blink position
|
|
blink_interval = np.random.randint(min_blink_interval, max_blink_interval)
|
|
first_blink_start = max(0, blink_interval - last_processed_blink)
|
|
|
|
# Apply first blink if there's enough space
|
|
if first_blink_start <= (remaining_frames - 7):
|
|
# Randomly select blink pattern and intensity
|
|
blink_pattern = BLINK_PATTERNS[np.random.randint(0, 4)]
|
|
intensity = np.random.uniform(*intensity_range)
|
|
|
|
# Calculate blink frame range
|
|
blink_start = processed_frames + first_blink_start
|
|
blink_end = blink_start + 7
|
|
|
|
# Apply pattern to both eyes
|
|
animation_params[blink_start:blink_end, 8] = blink_pattern * intensity
|
|
animation_params[blink_start:blink_end, 9] = blink_pattern * intensity
|
|
|
|
# Check space for additional blink
|
|
remaining_after_blink = animation_params.shape[0] - blink_end
|
|
if remaining_after_blink > min_blink_interval:
|
|
# Calculate second blink position
|
|
second_intensity = np.random.uniform(*intensity_range)
|
|
second_interval = np.random.randint(min_blink_interval, max_blink_interval)
|
|
|
|
if (remaining_after_blink - 7) > second_interval:
|
|
second_pattern = BLINK_PATTERNS[np.random.randint(0, 4)]
|
|
second_blink_start = blink_end + second_interval
|
|
second_blink_end = second_blink_start + 7
|
|
|
|
# Apply second blink
|
|
animation_params[second_blink_start:second_blink_end, 8] = second_pattern * second_intensity
|
|
animation_params[second_blink_start:second_blink_end, 9] = second_pattern * second_intensity
|
|
|
|
return animation_params
|
|
|
|
|
|
def export_blendshape_animation(
|
|
blendshape_weights: np.ndarray,
|
|
output_path: str,
|
|
blendshape_names: List[str],
|
|
fps: float,
|
|
rotation_data: Optional[np.ndarray] = None
|
|
) -> None:
|
|
"""
|
|
Export blendshape animation data to JSON format compatible with ARKit.
|
|
|
|
Args:
|
|
blendshape_weights: 2D numpy array of shape (N, 52) containing animation frames
|
|
output_path: Full path for output JSON file (including .json extension)
|
|
blendshape_names: Ordered list of 52 ARKit-standard blendshape names
|
|
fps: Frame rate for timing calculations (frames per second)
|
|
rotation_data: Optional 3D rotation data array of shape (N, 3)
|
|
|
|
Raises:
|
|
ValueError: If input dimensions are incompatible
|
|
IOError: If file writing fails
|
|
"""
|
|
# Validate input dimensions
|
|
if blendshape_weights.shape[1] != 52:
|
|
raise ValueError(f"Expected 52 blendshapes, got {blendshape_weights.shape[1]}")
|
|
if len(blendshape_names) != 52:
|
|
raise ValueError(f"Requires 52 blendshape names, got {len(blendshape_names)}")
|
|
if rotation_data is not None and len(rotation_data) != len(blendshape_weights):
|
|
raise ValueError("Rotation data length must match animation frames")
|
|
|
|
# Build animation data structure
|
|
animation_data = {
|
|
"names":blendshape_names,
|
|
"metadata": {
|
|
"fps": fps,
|
|
"frame_count": len(blendshape_weights),
|
|
"blendshape_names": blendshape_names
|
|
},
|
|
"frames": []
|
|
}
|
|
|
|
# Convert numpy array to serializable format
|
|
for frame_idx in range(blendshape_weights.shape[0]):
|
|
frame_data = {
|
|
"weights": blendshape_weights[frame_idx].tolist(),
|
|
"time": frame_idx / fps,
|
|
"rotation": rotation_data[frame_idx].tolist() if rotation_data else []
|
|
}
|
|
animation_data["frames"].append(frame_data)
|
|
|
|
# Safeguard against data loss
|
|
if not output_path.endswith('.json'):
|
|
output_path += '.json'
|
|
|
|
# Write to file with error handling
|
|
try:
|
|
with open(output_path, 'w', encoding='utf-8') as json_file:
|
|
json.dump(animation_data, json_file, indent=2, ensure_ascii=False)
|
|
except Exception as e:
|
|
raise IOError(f"Failed to write animation data: {str(e)}") from e
|
|
|
|
|
|
def apply_savitzky_golay_smoothing(
|
|
input_data: np.ndarray,
|
|
window_length: int = 5,
|
|
polyorder: int = 2,
|
|
axis: int = 0,
|
|
validate: bool = True
|
|
) -> Tuple[np.ndarray, Optional[float]]:
|
|
"""
|
|
Apply Savitzky-Golay filter smoothing along specified axis of input data.
|
|
|
|
Args:
|
|
input_data: 2D numpy array of shape (n_samples, n_features)
|
|
window_length: Length of the filter window (must be odd and > polyorder)
|
|
polyorder: Order of the polynomial fit
|
|
axis: Axis along which to filter (0: column-wise, 1: row-wise)
|
|
validate: Enable input validation checks when True
|
|
|
|
Returns:
|
|
tuple: (smoothed_data, processing_time)
|
|
- smoothed_data: Smoothed output array
|
|
- processing_time: Execution time in seconds (None in validation mode)
|
|
|
|
Raises:
|
|
ValueError: For invalid input dimensions or filter parameters
|
|
"""
|
|
# Validation mode timing bypass
|
|
processing_time = None
|
|
|
|
if validate:
|
|
# Input integrity checks
|
|
if input_data.ndim != 2:
|
|
raise ValueError(f"Expected 2D input, got {input_data.ndim}D array")
|
|
|
|
if window_length % 2 == 0 or window_length < 3:
|
|
raise ValueError("Window length must be odd integer ≥ 3")
|
|
|
|
if polyorder >= window_length:
|
|
raise ValueError("Polynomial order must be < window length")
|
|
|
|
# Store original dtype and convert to float64 for numerical stability
|
|
original_dtype = input_data.dtype
|
|
working_data = input_data.astype(np.float64)
|
|
|
|
# Start performance timer
|
|
timer_start = time.perf_counter()
|
|
|
|
try:
|
|
# Vectorized Savitzky-Golay application
|
|
smoothed_data = savgol_filter(working_data,
|
|
window_length=window_length,
|
|
polyorder=polyorder,
|
|
axis=axis,
|
|
mode='mirror')
|
|
except Exception as e:
|
|
raise RuntimeError(f"Filtering failed: {str(e)}") from e
|
|
|
|
# Stop timer and calculate duration
|
|
processing_time = time.perf_counter() - timer_start
|
|
|
|
# Restore original data type with overflow protection
|
|
return (
|
|
np.clip(smoothed_data,
|
|
0.0,
|
|
1.0
|
|
).astype(original_dtype),
|
|
processing_time
|
|
)
|
|
|
|
|
|
def _blend_region_start(
|
|
array: np.ndarray,
|
|
region: np.ndarray,
|
|
processed_boundary: int,
|
|
blend_frames: int
|
|
) -> None:
|
|
"""Applies linear blend between last active frame and silent region start."""
|
|
blend_length = min(blend_frames, region[0] - processed_boundary)
|
|
if blend_length <= 0:
|
|
return
|
|
|
|
pre_frame = array[region[0] - 1]
|
|
for i in range(blend_length):
|
|
weight = (i + 1) / (blend_length + 1)
|
|
array[region[0] + i] = pre_frame * (1 - weight) + array[region[0] + i] * weight
|
|
|
|
def _blend_region_end(
|
|
array: np.ndarray,
|
|
region: np.ndarray,
|
|
blend_frames: int
|
|
) -> None:
|
|
"""Applies linear blend between silent region end and next active frame."""
|
|
blend_length = min(blend_frames, array.shape[0] - region[-1] - 1)
|
|
if blend_length <= 0:
|
|
return
|
|
|
|
post_frame = array[region[-1] + 1]
|
|
for i in range(blend_length):
|
|
weight = (i + 1) / (blend_length + 1)
|
|
array[region[-1] - i] = post_frame * (1 - weight) + array[region[-1] - i] * weight
|
|
|
|
def find_low_value_regions(
|
|
signal: np.ndarray,
|
|
threshold: float,
|
|
min_region_length: int = 5
|
|
) -> list:
|
|
"""Identifies contiguous regions in a signal where values fall below a threshold.
|
|
|
|
Args:
|
|
signal: Input 1D array of numerical values
|
|
threshold: Value threshold for identifying low regions
|
|
min_region_length: Minimum consecutive samples required to qualify as a region
|
|
|
|
Returns:
|
|
List of numpy arrays, each containing indices for a qualifying low-value region
|
|
"""
|
|
low_value_indices = np.where(signal < threshold)[0]
|
|
contiguous_regions = []
|
|
current_region_length = 0
|
|
region_start_idx = 0
|
|
|
|
for i in range(1, len(low_value_indices)):
|
|
# Check if current index continues a consecutive sequence
|
|
if low_value_indices[i] != low_value_indices[i - 1] + 1:
|
|
# Finalize previous region if it meets length requirement
|
|
if current_region_length >= min_region_length:
|
|
contiguous_regions.append(low_value_indices[region_start_idx:i])
|
|
# Reset tracking for new potential region
|
|
region_start_idx = i
|
|
current_region_length = 0
|
|
current_region_length += 1
|
|
|
|
# Add the final region if it qualifies
|
|
if current_region_length >= min_region_length:
|
|
contiguous_regions.append(low_value_indices[region_start_idx:])
|
|
|
|
return contiguous_regions
|
|
|
|
|
|
def smooth_mouth_movements(
|
|
blend_shapes: np.ndarray,
|
|
processed_frames: int,
|
|
volume: np.ndarray = None,
|
|
silence_threshold: float = 0.001,
|
|
min_silence_duration: int = 7,
|
|
blend_window: int = 3
|
|
) -> np.ndarray:
|
|
"""Reduces jaw movement artifacts during silent periods in audio-driven animation.
|
|
|
|
Args:
|
|
blend_shapes: Array of facial blend shape weights [num_frames, num_blendshapes]
|
|
processed_frames: Number of already processed frames that shouldn't be modified
|
|
volume: Audio volume array used to detect silent periods
|
|
silence_threshold: Volume threshold for considering a frame silent
|
|
min_silence_duration: Minimum consecutive silent frames to qualify for processing
|
|
blend_window: Number of frames to smooth at region boundaries
|
|
|
|
Returns:
|
|
Modified blend shape array with reduced mouth movements during silence
|
|
"""
|
|
if volume is None:
|
|
return blend_shapes
|
|
|
|
# Detect silence periods using volume data
|
|
silent_regions = find_low_value_regions(
|
|
volume,
|
|
threshold=silence_threshold,
|
|
min_region_length=min_silence_duration
|
|
)
|
|
|
|
for region_indices in silent_regions:
|
|
# Reduce mouth blend shapes in silent region
|
|
mouth_blend_indices = [ARKitBlendShape.index(name) for name in MOUTH_BLENDSHAPES]
|
|
for region_indice in region_indices.tolist():
|
|
blend_shapes[region_indice, mouth_blend_indices] *= 0.1
|
|
|
|
try:
|
|
# Smooth transition into silent region
|
|
_blend_region_start(
|
|
blend_shapes,
|
|
region_indices,
|
|
processed_frames,
|
|
blend_window
|
|
)
|
|
|
|
# Smooth transition out of silent region
|
|
_blend_region_end(
|
|
blend_shapes,
|
|
region_indices,
|
|
blend_window
|
|
)
|
|
except IndexError as e:
|
|
warnings.warn(f"Edge blending skipped at region {region_indices}: {str(e)}")
|
|
|
|
return blend_shapes
|
|
|
|
|
|
def apply_frame_blending(
|
|
blend_shapes: np.ndarray,
|
|
processed_frames: int,
|
|
initial_blend_window: int = 3,
|
|
subsequent_blend_window: int = 5
|
|
) -> np.ndarray:
|
|
"""Smooths transitions between processed and unprocessed animation frames using linear blending.
|
|
|
|
Args:
|
|
blend_shapes: Array of facial blend shape weights [num_frames, num_blendshapes]
|
|
processed_frames: Number of already processed frames (0 means no previous processing)
|
|
initial_blend_window: Max frames to blend at sequence start
|
|
subsequent_blend_window: Max frames to blend between processed and new frames
|
|
|
|
Returns:
|
|
Modified blend shape array with smoothed transitions
|
|
"""
|
|
if processed_frames > 0:
|
|
# Blend transition between existing and new animation
|
|
_blend_animation_segment(
|
|
blend_shapes,
|
|
transition_start=processed_frames,
|
|
blend_window=subsequent_blend_window,
|
|
reference_frame=blend_shapes[processed_frames - 1]
|
|
)
|
|
else:
|
|
# Smooth initial frames from neutral expression (zeros)
|
|
_blend_animation_segment(
|
|
blend_shapes,
|
|
transition_start=0,
|
|
blend_window=initial_blend_window,
|
|
reference_frame=np.zeros_like(blend_shapes[0])
|
|
)
|
|
return blend_shapes
|
|
|
|
|
|
def _blend_animation_segment(
|
|
array: np.ndarray,
|
|
transition_start: int,
|
|
blend_window: int,
|
|
reference_frame: np.ndarray
|
|
) -> None:
|
|
"""Applies linear interpolation between reference frame and target frames.
|
|
|
|
Args:
|
|
array: Blend shape array to modify
|
|
transition_start: Starting index for blending
|
|
blend_window: Maximum number of frames to blend
|
|
reference_frame: The reference frame to blend from
|
|
"""
|
|
actual_blend_length = min(blend_window, array.shape[0] - transition_start)
|
|
|
|
for frame_offset in range(actual_blend_length):
|
|
current_idx = transition_start + frame_offset
|
|
blend_weight = (frame_offset + 1) / (actual_blend_length + 1)
|
|
|
|
# Linear interpolation: ref_frame * (1 - weight) + current_frame * weight
|
|
array[current_idx] = (reference_frame * (1 - blend_weight)
|
|
+ array[current_idx] * blend_weight)
|
|
|
|
|
|
BROW1 = np.array([[0.05597309, 0.05727929, 0.07995935, 0. , 0. ],
|
|
[0.00757574, 0.00936678, 0.12242376, 0. , 0. ],
|
|
[0. , 0. , 0.14943372, 0.04535687, 0.04264118],
|
|
[0. , 0. , 0.18015374, 0.09019445, 0.08736137],
|
|
[0. , 0. , 0.20549579, 0.12802747, 0.12450772],
|
|
[0. , 0. , 0.21098022, 0.1369939 , 0.13343132],
|
|
[0. , 0. , 0.20904602, 0.13903855, 0.13562402],
|
|
[0. , 0. , 0.20365039, 0.13977394, 0.13653506],
|
|
[0. , 0. , 0.19714841, 0.14096624, 0.13805152],
|
|
[0. , 0. , 0.20325482, 0.17303431, 0.17028868],
|
|
[0. , 0. , 0.21990852, 0.20164253, 0.19818163],
|
|
[0. , 0. , 0.23858181, 0.21908803, 0.21540019],
|
|
[0. , 0. , 0.2567876 , 0.23762083, 0.23396946],
|
|
[0. , 0. , 0.34093422, 0.27898848, 0.27651772],
|
|
[0. , 0. , 0.45288125, 0.35008961, 0.34887788],
|
|
[0. , 0. , 0.48076251, 0.36878952, 0.36778417],
|
|
[0. , 0. , 0.47798249, 0.36362219, 0.36145973],
|
|
[0. , 0. , 0.46186113, 0.33865979, 0.33597934],
|
|
[0. , 0. , 0.45264384, 0.33152157, 0.32891783],
|
|
[0. , 0. , 0.40986338, 0.29646468, 0.2945672 ],
|
|
[0. , 0. , 0.35628179, 0.23356403, 0.23155804],
|
|
[0. , 0. , 0.30870566, 0.1780673 , 0.17637439],
|
|
[0. , 0. , 0.25293985, 0.10710219, 0.10622486],
|
|
[0. , 0. , 0.18743332, 0.03252602, 0.03244236],
|
|
[0.02340254, 0.02364671, 0.15736724, 0. , 0. ]])
|
|
|
|
BROW2 = np.array([
|
|
[0. , 0. , 0.09799323, 0.05944436, 0.05002545],
|
|
[0. , 0. , 0.09780276, 0.07674237, 0.01636653],
|
|
[0. , 0. , 0.11136199, 0.1027964 , 0.04249811],
|
|
[0. , 0. , 0.26883412, 0.15861984, 0.15832305],
|
|
[0. , 0. , 0.42191629, 0.27038204, 0.27007768],
|
|
[0. , 0. , 0.3404977 , 0.21633868, 0.21597538],
|
|
[0. , 0. , 0.27301185, 0.17176409, 0.17134669],
|
|
[0. , 0. , 0.25960442, 0.15670464, 0.15622253],
|
|
[0. , 0. , 0.22877269, 0.11805892, 0.11754539],
|
|
[0. , 0. , 0.1451605 , 0.06389034, 0.0636282 ]])
|
|
|
|
BROW3 = np.array([
|
|
[0. , 0. , 0.124 , 0.0295, 0.0295],
|
|
[0. , 0. , 0.267 , 0.184 , 0.184 ],
|
|
[0. , 0. , 0.359 , 0.2765, 0.2765],
|
|
[0. , 0. , 0.3945, 0.3125, 0.3125],
|
|
[0. , 0. , 0.4125, 0.331 , 0.331 ],
|
|
[0. , 0. , 0.4235, 0.3445, 0.3445],
|
|
[0. , 0. , 0.4085, 0.3305, 0.3305],
|
|
[0. , 0. , 0.3695, 0.294 , 0.294 ],
|
|
[0. , 0. , 0.2835, 0.213 , 0.213 ],
|
|
[0. , 0. , 0.1795, 0.1005, 0.1005],
|
|
[0. , 0. , 0.108 , 0.014 , 0.014 ]])
|
|
|
|
|
|
import numpy as np
|
|
from scipy.ndimage import label
|
|
|
|
|
|
def apply_random_brow_movement(input_exp, volume):
|
|
FRAME_SEGMENT = 150
|
|
HOLD_THRESHOLD = 10
|
|
VOLUME_THRESHOLD = 0.08
|
|
MIN_REGION_LENGTH = 6
|
|
STRENGTH_RANGE = (0.7, 1.3)
|
|
|
|
BROW_PEAKS = {
|
|
0: np.argmax(BROW1[:, 2]),
|
|
1: np.argmax(BROW2[:, 2])
|
|
}
|
|
|
|
for seg_start in range(0, len(volume), FRAME_SEGMENT):
|
|
seg_end = min(seg_start + FRAME_SEGMENT, len(volume))
|
|
seg_volume = volume[seg_start:seg_end]
|
|
|
|
candidate_regions = []
|
|
|
|
high_vol_mask = seg_volume > VOLUME_THRESHOLD
|
|
labeled_array, num_features = label(high_vol_mask)
|
|
|
|
for i in range(1, num_features + 1):
|
|
region = (labeled_array == i)
|
|
region_indices = np.where(region)[0]
|
|
if len(region_indices) >= MIN_REGION_LENGTH:
|
|
candidate_regions.append(region_indices)
|
|
|
|
if candidate_regions:
|
|
selected_region = candidate_regions[np.random.choice(len(candidate_regions))]
|
|
region_start = selected_region[0]
|
|
region_end = selected_region[-1]
|
|
region_length = region_end - region_start + 1
|
|
|
|
brow_idx = np.random.randint(0, 2)
|
|
base_brow = BROW1 if brow_idx == 0 else BROW2
|
|
peak_idx = BROW_PEAKS[brow_idx]
|
|
|
|
if region_length > HOLD_THRESHOLD:
|
|
local_max_pos = seg_volume[selected_region].argmax()
|
|
global_peak_frame = seg_start + selected_region[local_max_pos]
|
|
|
|
rise_anim = base_brow[:peak_idx + 1]
|
|
hold_frame = base_brow[peak_idx:peak_idx + 1]
|
|
|
|
insert_start = max(global_peak_frame - peak_idx, seg_start)
|
|
insert_end = min(global_peak_frame + (region_length - local_max_pos), seg_end)
|
|
|
|
strength = np.random.uniform(*STRENGTH_RANGE)
|
|
|
|
if insert_start + len(rise_anim) <= seg_end:
|
|
input_exp[insert_start:insert_start + len(rise_anim), :5] += rise_anim * strength
|
|
hold_duration = insert_end - (insert_start + len(rise_anim))
|
|
if hold_duration > 0:
|
|
input_exp[insert_start + len(rise_anim):insert_end, :5] += np.tile(hold_frame * strength,
|
|
(hold_duration, 1))
|
|
else:
|
|
anim_length = base_brow.shape[0]
|
|
insert_pos = seg_start + region_start + (region_length - anim_length) // 2
|
|
insert_pos = max(seg_start, min(insert_pos, seg_end - anim_length))
|
|
|
|
if insert_pos + anim_length <= seg_end:
|
|
strength = np.random.uniform(*STRENGTH_RANGE)
|
|
input_exp[insert_pos:insert_pos + anim_length, :5] += base_brow * strength
|
|
|
|
return np.clip(input_exp, 0, 1) |