fix lint

2026-02-05 18:09:24 +08:00 · 2025-02-06 16:07:13 +08:00
parent 24f796a2b1
commit 2a3e033ee1
17 changed files with 187 additions and 135 deletions
--- a/examples/libritts/cosyvoice2/conf/cosyvoice2.yaml
+++ b/examples/libritts/cosyvoice2/conf/cosyvoice2.yaml
@@ -13,6 +13,10 @@ qwen_pretrain_path: ''
 token_frame_rate: 25
 token_mel_ratio: 2

+# stream related params
+chunk_size: 1 # streaming inference chunk size, in second
+num_decoding_left_chunks: 2 # streaming inference flow decoder left chunk size, in second
+
 # model params
 # for all class/function included in this repo, we use !<name> or !<new> for intialization, so that user may find all corresponding class/function according to one single yaml.
 # for system/third_party class/function, we do not require this.
@@ -56,7 +60,7 @@ flow: !new:cosyvoice.flow.flow.CausalMaskedDiffWithXvec
        input_size: 512
        use_cnn_module: False
        macaron_style: False
-        static_chunk_size: !ref <token_frame_rate> # 试试UpsampleConformerEncoder也是static
+        static_chunk_size: !ref <chunk_size> * <token_frame_rate>
    decoder: !new:cosyvoice.flow.flow_matching.CausalConditionalCFM
        in_channels: 240
        n_spks: 1
@@ -79,8 +83,8 @@ flow: !new:cosyvoice.flow.flow.CausalMaskedDiffWithXvec
            num_mid_blocks: 12
            num_heads: 8
            act_fn: 'gelu'
-            static_chunk_size: !ref <token_frame_rate> * <token_mel_ratio> # here we use static_chunk_size because we want to fix kv cache size during inference
-            num_decoding_left_chunks: 2
+            static_chunk_size: !ref <chunk_size> * <token_frame_rate> * <token_mel_ratio> # here we use static_chunk_size because we want to fix kv cache size during inference
+            num_decoding_left_chunks: !ref <num_decoding_left_chunks>

 hift: !new:cosyvoice.hifigan.generator.HiFTGenerator
    in_channels: 80