From 98fcde60e8846d5440d93023e045e27efc06cc66 Mon Sep 17 00:00:00 2001
From: sudowind <winfred.wang@outlook.com>
Date: Fri, 11 Apr 2025 08:28:25 +0800
Subject: [PATCH 1/3] feat: support use gpu

---
 audio2mouth_cpu.py |  5 +++--
 lite_avatar.py     | 17 ++++++++++-------
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/audio2mouth_cpu.py b/audio2mouth_cpu.py
index c5ce12d..3f459be 100644
--- a/audio2mouth_cpu.py
+++ b/audio2mouth_cpu.py
@@ -8,12 +8,13 @@ from extract_paraformer_feature import extract_para_feature
 from scipy import signal
 
 class Audio2Mouth(object):
-    def __init__(self):
+    def __init__(self, use_gpu):
 
         self.p_list = [str(ii) for ii in range(32)]
 
         model_path = './weights/model_1.onnx'
-        self.audio2mouth_model=onnxruntime.InferenceSession(model_path, providers=['CPUExecutionProvider'])
+        provider = "CUDAExecutionProvider" if use_gpu else "CPUExecutionProvider"
+        self.audio2mouth_model=onnxruntime.InferenceSession(model_path, providers=[provider])
         self.w = np.array([1.0]).astype(np.float32)
         self.sp = np.array([2]).astype(np.int64)
 
diff --git a/lite_avatar.py b/lite_avatar.py
index 647df3a..2907412 100644
--- a/lite_avatar.py
+++ b/lite_avatar.py
@@ -41,18 +41,21 @@ class liteAvatar(object):
                  num_threads=1,
                  use_bg_as_idle=False,
                  fps=30,
-                 generate_offline=False):
+                 generate_offline=False,
+                 use_gpu=False):
         
         logger.info('liteAvatar init start...')
         
         self.data_dir = data_dir
         self.fps = fps
         self.use_bg_as_idle = use_bg_as_idle
+        self.use_gpu = use_gpu
+        self.device = "cuda" if use_gpu else "cpu"
         
         s = time.time()
         from audio2mouth_cpu import Audio2Mouth
         
-        self.audio2mouth = Audio2Mouth()
+        self.audio2mouth = Audio2Mouth(use_gpu)
         logger.info(f'audio2mouth init over in {time.time() - s}s')
         
         self.p_list = [str(ii) for ii in range(32)]
@@ -82,8 +85,8 @@ class liteAvatar(object):
     def load_dynamic_model(self, data_dir):
         logger.info("start to load dynamic data")
         start_time = time.time()
-        self.encoder = torch.jit.load(f'{data_dir}/net_encode.pt')
-        self.generator = torch.jit.load(f'{data_dir}/net_decode.pt')
+        self.encoder = torch.jit.load(f'{data_dir}/net_encode.pt').to(self.device)
+        self.generator = torch.jit.load(f'{data_dir}/net_decode.pt').to(self.device)
 
         self.load_data_sync(data_dir=data_dir, bg_frame_cnt=150)
         self.load_data(data_dir=data_dir, bg_frame_cnt=150)
@@ -137,7 +140,7 @@ class liteAvatar(object):
             image = cv2.cvtColor(cv2.imread(img_file_path)[:,:,0:3],cv2.COLOR_BGR2RGB)
             image = cv2.resize(image, (384, 384), interpolation=cv2.INTER_LINEAR)
             ref_img = self.image_transforms(np.uint8(image))
-            encoder_input = ref_img.unsqueeze(0).float()
+            encoder_input = ref_img.unsqueeze(0).float().to(self.device)
             x = self.encoder(encoder_input)
             self.ref_img_list.append(x)
         
@@ -179,8 +182,8 @@ class liteAvatar(object):
             param_val.append(val)
         param_val = np.asarray(param_val)
         
-        source_img = self.generator(self.ref_img_list[bg_frame_id], torch.from_numpy(param_val).unsqueeze(0).float())
-        source_img = source_img.detach()
+        source_img = self.generator(self.ref_img_list[bg_frame_id], torch.from_numpy(param_val).unsqueeze(0).float().to(self.device))
+        source_img = source_img.detach().to("cpu")
         
         return source_img
     

From 0de6bad85c053ac6a55826af7997c84a6109d3f9 Mon Sep 17 00:00:00 2001
From: sudowind <winfred.wang@outlook.com>
Date: Thu, 26 Jun 2025 22:28:37 +0800
Subject: [PATCH 2/3] feat: remove mdoel files

---
 .gitattributes                                              | 3 ---
 .gitignore                                                  | 6 +++++-
 download_model.sh                                           | 3 +++
 weights/model_1.onnx                                        | 3 ---
 .../lm/lm.pb                                                | 3 ---
 .../model.pb                                                | 3 ---
 6 files changed, 8 insertions(+), 13 deletions(-)
 create mode 100644 download_model.sh
 delete mode 100644 weights/model_1.onnx
 delete mode 100644 weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/lm/lm.pb
 delete mode 100644 weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/model.pb

diff --git a/.gitattributes b/.gitattributes
index f69af18..e69de29 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +0,0 @@
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 699b799..ec60f13 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,7 @@
 *.pyc
 __pycache__
-/results/
\ No newline at end of file
+/results/
+
+lm.pb
+model_1.onnx
+model.pb
\ No newline at end of file
diff --git a/download_model.sh b/download_model.sh
new file mode 100644
index 0000000..20d8831
--- /dev/null
+++ b/download_model.sh
@@ -0,0 +1,3 @@
+modelscope download --model HumanAIGC-Engineering/LiteAvatarGallery lite_avatar_weights/lm.pb --local_dir ./weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/lm/
+modelscope download --model HumanAIGC-Engineering/LiteAvatarGallery lite_avatar_weights/model_1.onnx --local_dir ./weights/
+modelscope download --model HumanAIGC-Engineering/LiteAvatarGallery lite_avatar_weights/model.pb --local_dir ./weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/
\ No newline at end of file
diff --git a/weights/model_1.onnx b/weights/model_1.onnx
deleted file mode 100644
index 67addc8..0000000
--- a/weights/model_1.onnx
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:312f0213a23a1c17ecb66f9edf8413b8ef193c0d4d2f5f9dd6714a178492c34c
-size 184415825
diff --git a/weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/lm/lm.pb b/weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/lm/lm.pb
deleted file mode 100644
index 1414f72..0000000
--- a/weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/lm/lm.pb
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5b59db5f607fde57bd8d6c8df8f22ce5b37ec7adcdb7b4ee785a4478f06c1c50
-size 237373385
diff --git a/weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/model.pb b/weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/model.pb
deleted file mode 100644
index 0bcfb7b..0000000
--- a/weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/model.pb
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5bba782a5e9196166233b9ab12ba04cadff9ef9212b4ff6153ed9290ff679025
-size 880502012

From 9d778b0b3117942ec17dd41093d05d4adfeab1a8 Mon Sep 17 00:00:00 2001
From: sudowind <winfred.wang@outlook.com>
Date: Thu, 26 Jun 2025 22:41:16 +0800
Subject: [PATCH 3/3] feat: support download model from modelscope rather than
 git lfs

---
 .gitignore         |  5 ++++-
 README.md          | 10 ++++++++++
 download_model.bat | 21 +++++++++++++++++++++
 download_model.sh  | 19 ++++++++++++++++---
 4 files changed, 51 insertions(+), 4 deletions(-)
 create mode 100644 download_model.bat

diff --git a/.gitignore b/.gitignore
index ec60f13..5e344c9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,7 @@ __pycache__
 
 lm.pb
 model_1.onnx
-model.pb
\ No newline at end of file
+model.pb
+*.msc
+lite_avatar_weights/
+*.mv
\ No newline at end of file
diff --git a/README.md b/README.md
index 309c9c1..bf3ee98 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,16 @@ We recommend a python version = 3.10 and cuda version = 11.8. Then build environ
 ```shell
 pip install -r requirements.txt
 ```
+
+## Model Preparation
+```shell
+# for windows
+download_model.bat
+
+# for linux
+bash download_model.sh
+```
+
 ## Inference
 ```
 python lite_avatar.py --data_dir /path/to/sample_data --audio_file /path/to/audio.wav --result_dir /path/to/result
diff --git a/download_model.bat b/download_model.bat
new file mode 100644
index 0000000..1a9f2bc
--- /dev/null
+++ b/download_model.bat
@@ -0,0 +1,21 @@
+@echo off
+REM Download LiteAvatar model files using modelscope
+
+echo Downloading LiteAvatar model files...
+
+modelscope download --model HumanAIGC-Engineering/LiteAvatarGallery lite_avatar_weights/lm.pb lite_avatar_weights/model_1.onnx lite_avatar_weights/model.pb --local_dir ./
+if %errorlevel% neq 0 (
+    echo Error downloading lite_avatar_weights
+    pause
+    exit /b 1
+)
+
+@REM move file
+move lite_avatar_weights\lm.pb ./weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/lm/
+move lite_avatar_weights\model_1.onnx ./weights/
+move lite_avatar_weights\model.pb ./weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/
+
+@REM remove folder
+rmdir lite_avatar_weights
+
+echo All model files downloaded successfully!
diff --git a/download_model.sh b/download_model.sh
index 20d8831..4a6fdc4 100644
--- a/download_model.sh
+++ b/download_model.sh
@@ -1,3 +1,16 @@
-modelscope download --model HumanAIGC-Engineering/LiteAvatarGallery lite_avatar_weights/lm.pb --local_dir ./weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/lm/
-modelscope download --model HumanAIGC-Engineering/LiteAvatarGallery lite_avatar_weights/model_1.onnx --local_dir ./weights/
-modelscope download --model HumanAIGC-Engineering/LiteAvatarGallery lite_avatar_weights/model.pb --local_dir ./weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/
\ No newline at end of file
+#!/bin/bash
+
+# download model
+echo "Downloading LiteAvatar model files..."
+
+modelscope download --model HumanAIGC-Engineering/LiteAvatarGallery lite_avatar_weights/lm.pb lite_avatar_weights/model_1.onnx lite_avatar_weights/model.pb --local_dir ./
+
+# move file
+mv lite_avatar_weights/lm.pb ./weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/lm/
+mv lite_avatar_weights/model_1.onnx ./weights/
+mv lite_avatar_weights/model.pb ./weights/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/
+
+# remove folder
+rm -rf lite_avatar_weights
+
+echo "All model files downloaded successfully!"