feat: WebGL render with audio

2026-02-04 17:39:24 +08:00 · 2025-04-18 16:18:10 +08:00
parent ca93dd0572
commit 7cc233a737
3 changed files with 27 additions and 9 deletions
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 ## Demo

 <div align="center">
-  <video controls src="https://github.com/user-attachments/assets/30ccbe82-7933-4031-8578-b5248435d317">
+  <video controls src="https://github.com/user-attachments/assets/a89a0d70-a573-4d61-91bd-4f09a0b6ce2c">
  </video>
 </div>

@@ -28,6 +28,10 @@
 ```bash
 git clone git@github.com:aigc3d/LAM_Audio2Expression.git
 cd LAM_Audio2Expression
+# Create conda environment (currently only supports Python 3.10)
+conda create -n lam_a2e python=3.10
+# Activate the conda environment
+conda activate lam_a2e
 # Install with Cuda 12.1
 sh  ./scripts/install/install_cu121.sh
 # Or Install with Cuda 11.8
@@ -44,7 +48,6 @@ huggingface-cli download 3DAIGC/LAM_audio2exp --local-dir ./
 tar -xzvf LAM_audio2exp_assets.tar && rm -f LAM_audio2exp_assets.tar
 tar -xzvf LAM_audio2exp_streaming.tar && rm -f LAM_audio2exp_streaming.tar

-
 # Or OSS Download (In case of HuggingFace download failing)
 # Download Assets
 wget https://virutalbuy-public.oss-cn-hangzhou.aliyuncs.com/share/aigc3d/data/LAM/LAM_audio2exp_assets.tar
@@ -52,12 +55,15 @@ tar -xzvf LAM_audio2exp_assets.tar && rm -f LAM_audio2exp_assets.tar
 # Download Model Weights
 wget https://virutalbuy-public.oss-cn-hangzhou.aliyuncs.com/share/aigc3d/data/LAM/LAM_audio2exp_streaming.tar
 tar -xzvf LAM_audio2exp_streaming.tar && rm -f LAM_audio2exp_streaming.tar
+
+Or Modelscope Download
+git clone https://www.modelscope.cn/Damo_XR_Lab/LAM_audio2exp.git ./modelscope_download
 ```


 ### Quick Start Guide
 #### Using <a href="https://github.com/gradio-app/gradio">Gradio</a> Interface: 
-We provide a simple Gradio demo with **WebGLGL Render**, and you can get rendering results by uploading audio in seconds.
+We provide a simple Gradio demo with **WebGL Render**, and you can get rendering results by uploading audio in seconds.

 <img src="./assets/images/snapshot.png" alt="teaser" width="1000"/>

--- a/app_lam_audio2exp.py
+++ b/app_lam_audio2exp.py
@@ -53,8 +53,13 @@ def get_image_base64(path):
    return f'data:image/png;base64,{encoded_string}'


-def doRender():
-    print('H5 rendering ....')
+def do_render():
+    print('WebGL rendering ....')
+    return
+
+def audio_loading():
+    print("Audio loading ....")
+    return "None"

 def parse_configs():
    parser = argparse.ArgumentParser()
@@ -139,7 +144,7 @@ def demo_lam_audio2exp(infer, cfg):

        create_zip_archive(output_zip='./assets/arkitWithBSData.zip', base_dir=os.path.join("./assets/sample_lam", base_id))

-        return
+        return 'gradio_api/file='+audio_params

    with gr.Blocks(analytics_enabled=False) as demo:
        logo_url = './assets/images/logo.jpeg'
@@ -224,6 +229,8 @@ def demo_lam_audio2exp(infer, cfg):
                gs = gaussian_render(width=380, height=680, assets=assetPrefix + 'arkitWithBSData.zip')

        working_dir = gr.State()
+        selected_audio = gr.Textbox(visible=False)
+
        submit.click(
            fn=assert_input_image,
            inputs=[input_image],
@@ -235,11 +242,16 @@ def demo_lam_audio2exp(infer, cfg):
        ).success(
            fn=core_fn,
            inputs=[input_image, audio_input,
-                    working_dir],  # video_params refer to smpl dir
-            outputs=[],
+                    working_dir],
+            outputs=[selected_audio],
            queue=False,
        ).success(
-            doRender, js='''() => window.start()'''
+            fn=audio_loading,
+            outputs=[selected_audio],
+            js='''(output_component) => window.loadAudio(output_component)'''
+        ).success(
+            fn=do_render(),
+            js='''() => window.start()'''
        )

        demo.queue()
--- a/wheels/gradio_gaussian_render-0.0.2-py3-none-any.whl
+++ b/wheels/gradio_gaussian_render-0.0.2-py3-none-any.whl