diff --git a/.gitignore b/.gitignore index aa31084..a2df39d 100644 --- a/.gitignore +++ b/.gitignore @@ -5,11 +5,13 @@ *.pyc .ipynb_checkpoints results/ -./models +models/ **/__pycache__/ *.py[cod] *$py.class dataset/ ffmpeg* +ffmprobe* +ffplay* debug exp_out \ No newline at end of file diff --git a/README.md b/README.md index 6bf3549..c522290 100644 --- a/README.md +++ b/README.md @@ -146,15 +146,36 @@ We also hope you note that we have not verified, maintained, or updated third-pa ## Installation To prepare the Python environment and install additional packages such as opencv, diffusers, mmcv, etc., please follow the steps below: -### Build environment -We recommend a python version >=3.10 and cuda version =11.7. Then build environment as follows: +### Build environment +We recommend Python 3.10 and CUDA 11.7. Set up your environment as follows: + +```shell +conda create -n MuseTalk python==3.10 +conda activate MuseTalk +``` + +### Install PyTorch 2.0.1 +Choose one of the following installation methods: + +```shell +# Option 1: Using pip +pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118 + +# Option 2: Using conda +conda install pytorch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 pytorch-cuda=11.8 -c pytorch -c nvidia +``` + +### Install Dependencies +Install the remaining required packages: ```shell pip install -r requirements.txt ``` -### mmlab packages +### Install MMLab Packages +Install the MMLab ecosystem packages: + ```bash pip install --no-cache-dir -U openmim mim install mmengine @@ -163,33 +184,52 @@ mim install "mmdet>=3.1.0" mim install "mmpose>=1.1.0" ``` -### Download ffmpeg-static -Download the ffmpeg-static and -``` +### Setup FFmpeg +1. [Download](https://github.com/BtbN/FFmpeg-Builds/releases) the ffmpeg-static package + +2. Configure FFmpeg based on your operating system: + +For Linux: +```bash export FFMPEG_PATH=/path/to/ffmpeg -``` -for example: -``` +# Example: export FFMPEG_PATH=/musetalk/ffmpeg-4.4-amd64-static ``` -### Download weights -You can download weights manually as follows: -1. Download our trained [weights](https://huggingface.co/TMElyralab/MuseTalk). +For Windows: +Add the `ffmpeg-xxx\bin` directory to your system's PATH environment variable. Verify the installation by running `ffmpeg -version` in the command prompt - it should display the ffmpeg version information. + +### Download weights +You can download weights in two ways: + +#### Option 1: Using Download Scripts +We provide two scripts for automatic downloading: + +For Linux: ```bash -# !pip install -U "huggingface_hub[cli]" -export HF_ENDPOINT=https://hf-mirror.com -huggingface-cli download TMElyralab/MuseTalk --local-dir models/ +# Make the script executable +chmod +x download_weights.sh +# Run the script +./download_weights.sh ``` +For Windows: +```batch +# Run the script +download_weights.bat +``` + +#### Option 2: Manual Download +You can also download the weights manually from the following links: + +1. Download our trained [weights](https://huggingface.co/TMElyralab/MuseTalk/tree/main) 2. Download the weights of other components: - - [sd-vae-ft-mse](https://huggingface.co/stabilityai/sd-vae-ft-mse) + - [sd-vae-ft-mse](https://huggingface.co/stabilityai/sd-vae-ft-mse/tree/main) - [whisper](https://huggingface.co/openai/whisper-tiny/tree/main) - [dwpose](https://huggingface.co/yzd-v/DWPose/tree/main) + - [syncnet](https://huggingface.co/ByteDance/LatentSync/tree/main) - [face-parse-bisent](https://github.com/zllrunning/face-parsing.PyTorch) - [resnet18](https://download.pytorch.org/models/resnet18-5c106cde.pth) - - [syncnet](https://huggingface.co/ByteDance/LatentSync/tree/main) - Finally, these weights should be organized in `models` as follows: ``` @@ -207,7 +247,7 @@ Finally, these weights should be organized in `models` as follows: ├── face-parse-bisent │ ├── 79999_iter.pth │ └── resnet18-5c106cde.pth -├── sd-vae-ft-mse +├── sd-vae │ ├── config.json │ └── diffusion_pytorch_model.bin └── whisper @@ -221,21 +261,60 @@ Finally, these weights should be organized in `models` as follows: ### Inference We provide inference scripts for both versions of MuseTalk: -#### MuseTalk 1.5 (Recommended) +#### Prerequisites +Before running inference, please ensure ffmpeg is installed and accessible: ```bash -# Run MuseTalk 1.5 inference -sh inference.sh v1.5 normal +# Check ffmpeg installation +ffmpeg -version ``` +If ffmpeg is not found, please install it first: +- Windows: Download from [ffmpeg-static](https://github.com/BtbN/FFmpeg-Builds/releases) and add to PATH +- Linux: `sudo apt-get install ffmpeg` -#### MuseTalk 1.0 +#### Normal Inference +##### Linux Environment ```bash -# Run MuseTalk 1.0 inference +# MuseTalk 1.5 (Recommended) +sh inference.sh v1.5 normal + +# MuseTalk 1.0 sh inference.sh v1.0 normal ``` -The inference script supports both MuseTalk 1.5 and 1.0 models: -- For MuseTalk 1.5: Use the command above with the V1.5 model path -- For MuseTalk 1.0: Use the same script but point to the V1.0 model path +##### Windows Environment + +Please ensure that you set the `ffmpeg_path` to match the actual location of your FFmpeg installation. + +```bash +# MuseTalk 1.5 (Recommended) +python -m scripts.inference --inference_config configs\inference\test.yaml --result_dir results\test --unet_model_path models\musetalkV15\unet.pth --unet_config models\musetalkV15\musetalk.json --version v15 --ffmpeg_path ffmpeg-master-latest-win64-gpl-shared\bin + +# For MuseTalk 1.0, change: +# - models\musetalkV15 -> models\musetalk +# - unet.pth -> pytorch_model.bin +# - --version v15 -> --version v1 +``` + +#### Real-time Inference +##### Linux Environment +```bash +# MuseTalk 1.5 (Recommended) +sh inference.sh v1.5 realtime + +# MuseTalk 1.0 +sh inference.sh v1.0 realtime +``` + +##### Windows Environment +```bash +# MuseTalk 1.5 (Recommended) +python -m scripts.realtime_inference --inference_config configs\inference\realtime.yaml --result_dir results\realtime --unet_model_path models\musetalkV15\unet.pth --unet_config models\musetalkV15\musetalk.json --version v15 --fps 25 + +# For MuseTalk 1.0, change: +# - models\musetalkV15 -> models\musetalk +# - unet.pth -> pytorch_model.bin +# - --version v15 -> --version v1 +``` The configuration file `configs/inference/test.yaml` contains the inference settings, including: - `video_path`: Path to the input video, image file, or directory of images @@ -243,21 +322,6 @@ The configuration file `configs/inference/test.yaml` contains the inference sett Note: For optimal results, we recommend using input videos with 25fps, which is the same fps used during model training. If your video has a lower frame rate, you can use frame interpolation or convert it to 25fps using ffmpeg. -#### Real-time Inference -For real-time inference, use the following command: -```bash -# Run real-time inference -sh inference.sh v1.5 realtime # For MuseTalk 1.5 -# or -sh inference.sh v1.0 realtime # For MuseTalk 1.0 -``` - -The real-time inference configuration is in `configs/inference/realtime.yaml`, which includes: -- `preparation`: Set to `True` for new avatar preparation -- `video_path`: Path to the input video -- `bbox_shift`: Adjustable parameter for mouth region control -- `audio_clips`: List of audio clips for generation - Important notes for real-time inference: 1. Set `preparation` to `True` when processing a new avatar 2. After preparation, the avatar will generate videos using audio clips from `audio_clips` diff --git a/download_weights.bat b/download_weights.bat new file mode 100644 index 0000000..c6fbdd2 --- /dev/null +++ b/download_weights.bat @@ -0,0 +1,45 @@ +@echo off +setlocal + +:: Set the checkpoints directory +set CheckpointsDir=models + +:: Create necessary directories +mkdir %CheckpointsDir%\musetalk +mkdir %CheckpointsDir%\musetalkV15 +mkdir %CheckpointsDir%\syncnet +mkdir %CheckpointsDir%\dwpose +mkdir %CheckpointsDir%\face-parse-bisent +mkdir %CheckpointsDir%\sd-vae-ft-mse +mkdir %CheckpointsDir%\whisper + +:: Install required packages +pip install -U "huggingface_hub[cli]" +pip install gdown + +:: Set HuggingFace endpoint +set HF_ENDPOINT=https://hf-mirror.com + +:: Download MuseTalk weights +huggingface-cli download TMElyralab/MuseTalk --local-dir %CheckpointsDir% + +:: Download SD VAE weights +huggingface-cli download stabilityai/sd-vae-ft-mse --local-dir %CheckpointsDir%\sd-vae --include "config.json" "diffusion_pytorch_model.bin" + +:: Download Whisper weights +huggingface-cli download openai/whisper-tiny --local-dir %CheckpointsDir%\whisper --include "config.json" "pytorch_model.bin" "preprocessor_config.json" + +:: Download DWPose weights +huggingface-cli download yzd-v/DWPose --local-dir %CheckpointsDir%\dwpose --include "dw-ll_ucoco_384.pth" + +:: Download SyncNet weights +huggingface-cli download ByteDance/LatentSync --local-dir %CheckpointsDir%\syncnet --include "latentsync_syncnet.pt" + +:: Download Face Parse Bisent weights (using gdown) +gdown --id 154JgKpzCPW82qINcVieuPH3fZ2e0P812 -O %CheckpointsDir%\face-parse-bisent\79999_iter.pth + +:: Download ResNet weights +curl -L https://download.pytorch.org/models/resnet18-5c106cde.pth -o %CheckpointsDir%\face-parse-bisent\resnet18-5c106cde.pth + +echo All weights have been downloaded successfully! +endlocal \ No newline at end of file diff --git a/download_weights.sh b/download_weights.sh new file mode 100644 index 0000000..0faa0c9 --- /dev/null +++ b/download_weights.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# Set the checkpoints directory +CheckpointsDir="models" + +# Create necessary directories +mkdir -p $CheckpointsDir/{musetalk,musetalkV15,syncnet,dwpose,face-parse-bisent,sd-vae-ft-mse,whisper} + +# Install required packages +pip install -U "huggingface_hub[cli]" +pip install gdown + +# Set HuggingFace endpoint +export HF_ENDPOINT=https://hf-mirror.com + +# Download MuseTalk weights +huggingface-cli download TMElyralab/MuseTalk --local-dir $CheckpointsDir + +# Download SD VAE weights +huggingface-cli download stabilityai/sd-vae-ft-mse --local-dir $CheckpointsDir/sd-vae --include "config.json" "diffusion_pytorch_model.bin" + +# Download Whisper weights +huggingface-cli download openai/whisper-tiny --local-dir $CheckpointsDir/whisper --include "config.json" "pytorch_model.bin" "preprocessor_config.json" + +# Download DWPose weights +huggingface-cli download yzd-v/DWPose --local-dir $CheckpointsDir/dwpose --include "dw-ll_ucoco_384.pth" + +# Download SyncNet weights +huggingface-cli download ByteDance/LatentSync --local-dir $CheckpointsDir/syncnet --include "latentsync_syncnet.pt" + +# Download Face Parse Bisent weights (using gdown) +gdown --id 154JgKpzCPW82qINcVieuPH3fZ2e0P812 -O $CheckpointsDir/face-parse-bisent/79999_iter.pth + +# Download ResNet weights +curl -L https://download.pytorch.org/models/resnet18-5c106cde.pth -o $CheckpointsDir/face-parse-bisent/resnet18-5c106cde.pth + +echo "All weights have been downloaded successfully!" \ No newline at end of file diff --git a/musetalk/utils/utils.py b/musetalk/utils/utils.py index 6b14eff..b4882d5 100755 --- a/musetalk/utils/utils.py +++ b/musetalk/utils/utils.py @@ -8,26 +8,18 @@ from einops import rearrange import shutil import os.path as osp -ffmpeg_path = os.getenv('FFMPEG_PATH') -if ffmpeg_path is None: - print("please download ffmpeg-static and export to FFMPEG_PATH. \nFor example: export FFMPEG_PATH=/musetalk/ffmpeg-4.4-amd64-static") -elif ffmpeg_path not in os.getenv('PATH'): - print("add ffmpeg to path") - os.environ["PATH"] = f"{ffmpeg_path}:{os.environ['PATH']}" - - from musetalk.models.vae import VAE from musetalk.models.unet import UNet,PositionalEncoding def load_all_model( - unet_model_path="./models/musetalk/pytorch_model.bin", - vae_type="sd-vae-ft-mse", - unet_config="./models/musetalk/musetalk.json", + unet_model_path=os.path.join("models", "musetalk", "pytorch_model.bin"), + vae_type="sd-vae", + unet_config=os.path.join("models", "musetalk", "musetalk.json"), device=None, ): vae = VAE( - model_path = f"./models/{vae_type}/", + model_path = os.path.join("models", vae_type), ) print(f"load unet model from {unet_model_path}") unet = UNet( diff --git a/requirements.txt b/requirements.txt index 651bf48..0f4d36b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,4 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch==2.0.1 -torchvision==0.15.2 -torchaudio==2.0.2 -diffusers==0.27.2 +diffusers==0.30.2 accelerate==0.28.0 tensorflow==2.12.0 tensorboard==2.12.0 @@ -10,13 +6,15 @@ opencv-python==4.9.0.80 soundfile==0.12.1 transformers==4.39.2 huggingface_hub==0.25.0 +librosa==0.11.0 +numpy==1.24.4 +einops==0.8.1 gdown requests imageio[ffmpeg] +gradio omegaconf ffmpeg-python -gradio -spaces moviepy diff --git a/scripts/inference.py b/scripts/inference.py index 41da8dd..428afb9 100644 --- a/scripts/inference.py +++ b/scripts/inference.py @@ -8,9 +8,11 @@ import shutil import pickle import argparse import numpy as np +import subprocess from tqdm import tqdm from omegaconf import OmegaConf from transformers import WhisperModel +import sys from musetalk.utils.blending import get_image from musetalk.utils.face_parsing import FaceParsing @@ -18,16 +20,26 @@ from musetalk.utils.audio_processor import AudioProcessor from musetalk.utils.utils import get_file_type, get_video_fps, datagen, load_all_model from musetalk.utils.preprocessing import get_landmark_and_bbox, read_imgs, coord_placeholder +def fast_check_ffmpeg(): + try: + subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True) + return True + except: + return False + @torch.no_grad() def main(args): # Configure ffmpeg path - if args.ffmpeg_path not in os.getenv('PATH'): + if not fast_check_ffmpeg(): print("Adding ffmpeg to PATH") - os.environ["PATH"] = f"{args.ffmpeg_path}:{os.environ['PATH']}" + # Choose path separator based on operating system + path_separator = ';' if sys.platform == 'win32' else ':' + os.environ["PATH"] = f"{args.ffmpeg_path}{path_separator}{os.environ['PATH']}" + if not fast_check_ffmpeg(): + print("Warning: Unable to find ffmpeg, please ensure ffmpeg is properly installed") # Set computing device device = torch.device(f"cuda:{args.gpu_id}" if torch.cuda.is_available() else "cpu") - # Load model weights vae, unet, pe = load_all_model( unet_model_path=args.unet_model_path, diff --git a/scripts/preprocess.py b/scripts/preprocess.py index c493e3c..2f8bc6d 100755 --- a/scripts/preprocess.py +++ b/scripts/preprocess.py @@ -12,11 +12,23 @@ from mmpose.structures import merge_data_samples import torch import numpy as np from tqdm import tqdm +import sys + +def fast_check_ffmpeg(): + try: + subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True) + return True + except: + return False ffmpeg_path = "./ffmpeg-4.4-amd64-static/" -if ffmpeg_path not in os.getenv('PATH'): - print("add ffmpeg to path") - os.environ["PATH"] = f"{ffmpeg_path}:{os.environ['PATH']}" +if not fast_check_ffmpeg(): + print("Adding ffmpeg to PATH") + # Choose path separator based on operating system + path_separator = ';' if sys.platform == 'win32' else ':' + os.environ["PATH"] = f"{args.ffmpeg_path}{path_separator}{os.environ['PATH']}" + if not fast_check_ffmpeg(): + print("Warning: Unable to find ffmpeg, please ensure ffmpeg is properly installed") class AnalyzeFace: def __init__(self, device: Union[str, torch.device], config_file: str, checkpoint_file: str): diff --git a/scripts/realtime_inference.py b/scripts/realtime_inference.py index 52560c5..5c547ef 100644 --- a/scripts/realtime_inference.py +++ b/scripts/realtime_inference.py @@ -23,6 +23,15 @@ import shutil import threading import queue import time +import subprocess + + +def fast_check_ffmpeg(): + try: + subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True) + return True + except: + return False def video2imgs(vid_path, save_path, ext='.png', cut_frame=10000000): @@ -332,6 +341,15 @@ if __name__ == "__main__": args = parser.parse_args() + # Configure ffmpeg path + if not fast_check_ffmpeg(): + print("Adding ffmpeg to PATH") + # Choose path separator based on operating system + path_separator = ';' if sys.platform == 'win32' else ':' + os.environ["PATH"] = f"{args.ffmpeg_path}{path_separator}{os.environ['PATH']}" + if not fast_check_ffmpeg(): + print("Warning: Unable to find ffmpeg, please ensure ffmpeg is properly installed") + # Set computing device device = torch.device(f"cuda:{args.gpu_id}" if torch.cuda.is_available() else "cpu") diff --git a/test_ffmpeg.py b/test_ffmpeg.py new file mode 100644 index 0000000..3788817 --- /dev/null +++ b/test_ffmpeg.py @@ -0,0 +1,33 @@ +import os +import subprocess +import sys + +def test_ffmpeg(ffmpeg_path): + print(f"Testing ffmpeg path: {ffmpeg_path}") + + # Choose path separator based on operating system + path_separator = ';' if sys.platform == 'win32' else ':' + + # Add ffmpeg path to environment variable + os.environ["PATH"] = f"{ffmpeg_path}{path_separator}{os.environ['PATH']}" + + try: + # Try to run ffmpeg + result = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True) + print("FFmpeg test successful!") + print("FFmpeg version information:") + print(result.stdout) + return True + except Exception as e: + print("FFmpeg test failed!") + print(f"Error message: {str(e)}") + return False + +if __name__ == "__main__": + # Default ffmpeg path, can be modified as needed + default_path = r"ffmpeg-master-latest-win64-gpl-shared\bin" + + # Use command line argument if provided, otherwise use default path + ffmpeg_path = sys.argv[1] if len(sys.argv) > 1 else default_path + + test_ffmpeg(ffmpeg_path) \ No newline at end of file