From f77c6a85aad67f01b377b1d8b081ab96f74d574f Mon Sep 17 00:00:00 2001 From: "lyuxiang.lx" Date: Tue, 11 Feb 2025 00:07:12 +0800 Subject: [PATCH] fix bug --- cosyvoice/cli/model.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cosyvoice/cli/model.py b/cosyvoice/cli/model.py index d5dcf32..57325ab 100644 --- a/cosyvoice/cli/model.py +++ b/cosyvoice/cli/model.py @@ -398,6 +398,10 @@ class CosyVoice2Model(CosyVoiceModel): prompt_speech_feat=torch.zeros(1, 0, 80), stream=False, speed=1.0, **kwargs): # this_uuid is used to track variables related to this inference thread this_uuid = str(uuid.uuid1()) + # NOTE flow model is only trained with static_chunk_size, so we need to trim flow prompt + n_chunk = int(flow_prompt_speech_token.size(1) / self.token_hop_len) + flow_prompt_speech_token = flow_prompt_speech_token[:, :n_chunk * self.token_hop_len] + prompt_speech_feat = prompt_speech_feat[:, :n_chunk * self.token_hop_len * 2] with self.lock: self.tts_speech_token_dict[this_uuid], self.llm_end_dict[this_uuid] = [], False self.hift_cache_dict[this_uuid] = None