update

2026-02-04 17:39:25 +08:00 · 2026-01-29 06:13:36 +00:00
parent 66b80dbccb
commit f26cde56df
7 changed files with 90 additions and 73 deletions
--- a/cosyvoice/cli/model.py
+++ b/cosyvoice/cli/model.py
@@ -256,6 +256,10 @@ class CosyVoice2Model(CosyVoiceModel):
        self.fp16 = fp16
        # NOTE must matching training static_chunk_size
        self.token_hop_len = 25
+        # NOTE increase token_hop_len incrementally to avoid duplicate inference 
+        self.token_max_hop_len = 4 * self.token_hop_len
+        self.stream_scale_factor = 2
+        assert self.stream_scale_factor >= 1, 'stream_scale_factor should be greater than 1, change it according to your actual rtf'
        # hift cache
        self.mel_cache_len = 8
        self.source_cache_len = int(self.mel_cache_len * 480)
@@ -353,6 +357,7 @@ class CosyVoice2Model(CosyVoiceModel):
                                                     stream=stream,
                                                     finalize=False)
                    token_offset += this_token_hop_len
+                    self.token_hop_len = min(self.token_max_hop_len, self.token_hop_len * self.stream_scale_factor)
                    yield {'tts_speech': this_tts_speech.cpu()}
                if self.llm_end_dict[this_uuid] is True and len(self.tts_speech_token_dict[this_uuid]) - token_offset < this_token_hop_len + self.flow.pre_lookahead_len:
                    break
@@ -403,6 +408,10 @@ class CosyVoice3Model(CosyVoice2Model):
        self.fp16 = fp16
        # NOTE must matching training static_chunk_size
        self.token_hop_len = 25
+        # NOTE increase token_hop_len incrementally to avoid duplicate inference 
+        self.token_max_hop_len = 4 * self.token_hop_len
+        self.stream_scale_factor = 2
+        assert self.stream_scale_factor >= 1, 'stream_scale_factor should be greater than 1, change it according to your actual rtf'
        # rtf and decoding related
        self.llm_context = torch.cuda.stream(torch.cuda.Stream(self.device)) if torch.cuda.is_available() else nullcontext()
        self.lock = threading.Lock()