mirror of
https://github.com/TMElyralab/MuseTalk.git
synced 2026-02-05 01:49:20 +08:00
* docs: update readme * docs: update readme * feat: training codes * feat: data preprocess * docs: release training
20 lines
834 B
YAML
20 lines
834 B
YAML
# This file is modified from LatentSync (https://github.com/bytedance/LatentSync/blob/main/latentsync/configs/training/syncnet_16_pixel.yaml).
|
|
model:
|
|
audio_encoder: # input (1, 80, 52)
|
|
in_channels: 1
|
|
block_out_channels: [32, 64, 128, 256, 512, 1024, 2048]
|
|
downsample_factors: [[2, 1], 2, 2, 1, 2, 2, [2, 3]]
|
|
attn_blocks: [0, 0, 0, 0, 0, 0, 0]
|
|
dropout: 0.0
|
|
visual_encoder: # input (48, 128, 256)
|
|
in_channels: 48
|
|
block_out_channels: [64, 128, 256, 256, 512, 1024, 2048, 2048]
|
|
downsample_factors: [[1, 2], 2, 2, 2, 2, 2, 2, 2]
|
|
attn_blocks: [0, 0, 0, 0, 0, 0, 0, 0]
|
|
dropout: 0.0
|
|
|
|
ckpt:
|
|
resume_ckpt_path: ""
|
|
inference_ckpt_path: ./models/syncnet/latentsync_syncnet.pt # this pretrained model is from LatentSync (https://huggingface.co/ByteDance/LatentSync/tree/main)
|
|
save_ckpt_steps: 2500
|