From 98fcde60e8846d5440d93023e045e27efc06cc66 Mon Sep 17 00:00:00 2001 From: sudowind Date: Fri, 11 Apr 2025 08:28:25 +0800 Subject: [PATCH 1/3] feat: support use gpu --- audio2mouth_cpu.py | 5 +++-- lite_avatar.py | 17 ++++++++++------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/audio2mouth_cpu.py b/audio2mouth_cpu.py index c5ce12d..3f459be 100644 --- a/audio2mouth_cpu.py +++ b/audio2mouth_cpu.py @@ -8,12 +8,13 @@ from extract_paraformer_feature import extract_para_feature from scipy import signal class Audio2Mouth(object): - def __init__(self): + def __init__(self, use_gpu): self.p_list = [str(ii) for ii in range(32)] model_path = './weights/model_1.onnx' - self.audio2mouth_model=onnxruntime.InferenceSession(model_path, providers=['CPUExecutionProvider']) + provider = "CUDAExecutionProvider" if use_gpu else "CPUExecutionProvider" + self.audio2mouth_model=onnxruntime.InferenceSession(model_path, providers=[provider]) self.w = np.array([1.0]).astype(np.float32) self.sp = np.array([2]).astype(np.int64) diff --git a/lite_avatar.py b/lite_avatar.py index 647df3a..2907412 100644 --- a/lite_avatar.py +++ b/lite_avatar.py @@ -41,18 +41,21 @@ class liteAvatar(object): num_threads=1, use_bg_as_idle=False, fps=30, - generate_offline=False): + generate_offline=False, + use_gpu=False): logger.info('liteAvatar init start...') self.data_dir = data_dir self.fps = fps self.use_bg_as_idle = use_bg_as_idle + self.use_gpu = use_gpu + self.device = "cuda" if use_gpu else "cpu" s = time.time() from audio2mouth_cpu import Audio2Mouth - self.audio2mouth = Audio2Mouth() + self.audio2mouth = Audio2Mouth(use_gpu) logger.info(f'audio2mouth init over in {time.time() - s}s') self.p_list = [str(ii) for ii in range(32)] @@ -82,8 +85,8 @@ class liteAvatar(object): def load_dynamic_model(self, data_dir): logger.info("start to load dynamic data") start_time = time.time() - self.encoder = torch.jit.load(f'{data_dir}/net_encode.pt') - self.generator = torch.jit.load(f'{data_dir}/net_decode.pt') + self.encoder = torch.jit.load(f'{data_dir}/net_encode.pt').to(self.device) + self.generator = torch.jit.load(f'{data_dir}/net_decode.pt').to(self.device) self.load_data_sync(data_dir=data_dir, bg_frame_cnt=150) self.load_data(data_dir=data_dir, bg_frame_cnt=150) @@ -137,7 +140,7 @@ class liteAvatar(object): image = cv2.cvtColor(cv2.imread(img_file_path)[:,:,0:3],cv2.COLOR_BGR2RGB) image = cv2.resize(image, (384, 384), interpolation=cv2.INTER_LINEAR) ref_img = self.image_transforms(np.uint8(image)) - encoder_input = ref_img.unsqueeze(0).float() + encoder_input = ref_img.unsqueeze(0).float().to(self.device) x = self.encoder(encoder_input) self.ref_img_list.append(x) @@ -179,8 +182,8 @@ class liteAvatar(object): param_val.append(val) param_val = np.asarray(param_val) - source_img = self.generator(self.ref_img_list[bg_frame_id], torch.from_numpy(param_val).unsqueeze(0).float()) - source_img = source_img.detach() + source_img = self.generator(self.ref_img_list[bg_frame_id], torch.from_numpy(param_val).unsqueeze(0).float().to(self.device)) + source_img = source_img.detach().to("cpu") return source_img From 0de6bad85c053ac6a55826af7997c84a6109d3f9 Mon Sep 17 00:00:00 2001 From: sudowind Date: Thu, 26 Jun 2025 22:28:37 +0800 Subject: [PATCH 2/3] feat: remove mdoel files --- .gitattributes | 3 --- .gitignore | 6 +++++- download_model.sh | 3 +++ weights/model_1.onnx | 3 --- .../lm/lm.pb | 3 --- .../model.pb | 3 --- 6 files changed, 8 insertions(+), 13 deletions(-) create mode 100644 download_model.sh delete mode 100644 weights/model_1.onnx delete mode 100644 weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/lm/lm.pb delete mode 100644 weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/model.pb diff --git a/.gitattributes b/.gitattributes index f69af18..e69de29 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +0,0 @@ -*.pth filter=lfs diff=lfs merge=lfs -text -*.pb filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/.gitignore b/.gitignore index 699b799..ec60f13 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ *.pyc __pycache__ -/results/ \ No newline at end of file +/results/ + +lm.pb +model_1.onnx +model.pb \ No newline at end of file diff --git a/download_model.sh b/download_model.sh new file mode 100644 index 0000000..20d8831 --- /dev/null +++ b/download_model.sh @@ -0,0 +1,3 @@ +modelscope download --model HumanAIGC-Engineering/LiteAvatarGallery lite_avatar_weights/lm.pb --local_dir ./weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/lm/ +modelscope download --model HumanAIGC-Engineering/LiteAvatarGallery lite_avatar_weights/model_1.onnx --local_dir ./weights/ +modelscope download --model HumanAIGC-Engineering/LiteAvatarGallery lite_avatar_weights/model.pb --local_dir ./weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/ \ No newline at end of file diff --git a/weights/model_1.onnx b/weights/model_1.onnx deleted file mode 100644 index 67addc8..0000000 --- a/weights/model_1.onnx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:312f0213a23a1c17ecb66f9edf8413b8ef193c0d4d2f5f9dd6714a178492c34c -size 184415825 diff --git a/weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/lm/lm.pb b/weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/lm/lm.pb deleted file mode 100644 index 1414f72..0000000 --- a/weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/lm/lm.pb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5b59db5f607fde57bd8d6c8df8f22ce5b37ec7adcdb7b4ee785a4478f06c1c50 -size 237373385 diff --git a/weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/model.pb b/weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/model.pb deleted file mode 100644 index 0bcfb7b..0000000 --- a/weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/model.pb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5bba782a5e9196166233b9ab12ba04cadff9ef9212b4ff6153ed9290ff679025 -size 880502012 From 9d778b0b3117942ec17dd41093d05d4adfeab1a8 Mon Sep 17 00:00:00 2001 From: sudowind Date: Thu, 26 Jun 2025 22:41:16 +0800 Subject: [PATCH 3/3] feat: support download model from modelscope rather than git lfs --- .gitignore | 5 ++++- README.md | 10 ++++++++++ download_model.bat | 21 +++++++++++++++++++++ download_model.sh | 19 ++++++++++++++++--- 4 files changed, 51 insertions(+), 4 deletions(-) create mode 100644 download_model.bat diff --git a/.gitignore b/.gitignore index ec60f13..5e344c9 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,7 @@ __pycache__ lm.pb model_1.onnx -model.pb \ No newline at end of file +model.pb +*.msc +lite_avatar_weights/ +*.mv \ No newline at end of file diff --git a/README.md b/README.md index 309c9c1..bf3ee98 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,16 @@ We recommend a python version = 3.10 and cuda version = 11.8. Then build environ ```shell pip install -r requirements.txt ``` + +## Model Preparation +```shell +# for windows +download_model.bat + +# for linux +bash download_model.sh +``` + ## Inference ``` python lite_avatar.py --data_dir /path/to/sample_data --audio_file /path/to/audio.wav --result_dir /path/to/result diff --git a/download_model.bat b/download_model.bat new file mode 100644 index 0000000..1a9f2bc --- /dev/null +++ b/download_model.bat @@ -0,0 +1,21 @@ +@echo off +REM Download LiteAvatar model files using modelscope + +echo Downloading LiteAvatar model files... + +modelscope download --model HumanAIGC-Engineering/LiteAvatarGallery lite_avatar_weights/lm.pb lite_avatar_weights/model_1.onnx lite_avatar_weights/model.pb --local_dir ./ +if %errorlevel% neq 0 ( + echo Error downloading lite_avatar_weights + pause + exit /b 1 +) + +@REM move file +move lite_avatar_weights\lm.pb ./weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/lm/ +move lite_avatar_weights\model_1.onnx ./weights/ +move lite_avatar_weights\model.pb ./weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/ + +@REM remove folder +rmdir lite_avatar_weights + +echo All model files downloaded successfully! diff --git a/download_model.sh b/download_model.sh index 20d8831..4a6fdc4 100644 --- a/download_model.sh +++ b/download_model.sh @@ -1,3 +1,16 @@ -modelscope download --model HumanAIGC-Engineering/LiteAvatarGallery lite_avatar_weights/lm.pb --local_dir ./weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/lm/ -modelscope download --model HumanAIGC-Engineering/LiteAvatarGallery lite_avatar_weights/model_1.onnx --local_dir ./weights/ -modelscope download --model HumanAIGC-Engineering/LiteAvatarGallery lite_avatar_weights/model.pb --local_dir ./weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/ \ No newline at end of file +#!/bin/bash + +# download model +echo "Downloading LiteAvatar model files..." + +modelscope download --model HumanAIGC-Engineering/LiteAvatarGallery lite_avatar_weights/lm.pb lite_avatar_weights/model_1.onnx lite_avatar_weights/model.pb --local_dir ./ + +# move file +mv lite_avatar_weights/lm.pb ./weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/lm/ +mv lite_avatar_weights/model_1.onnx ./weights/ +mv lite_avatar_weights/model.pb ./weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/ + +# remove folder +rm -rf lite_avatar_weights + +echo "All model files downloaded successfully!"