From 69518b2bded1cae4315554eb67e171c4b7d96a84 Mon Sep 17 00:00:00 2001 From: "huzetao.hzt" Date: Thu, 23 Jan 2025 19:08:18 +0800 Subject: [PATCH] fix bistream extra token --- cosyvoice/llm/llm.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cosyvoice/llm/llm.py b/cosyvoice/llm/llm.py index 78d1f9c..bbd3305 100644 --- a/cosyvoice/llm/llm.py +++ b/cosyvoice/llm/llm.py @@ -382,7 +382,10 @@ class Qwen2LM(TransformerLM): if text_cache.size(1) >= self.mix_ratio[0]: lm_input_text = text_cache[:, :self.mix_ratio[0]] logging.info('append {} text token'.format(lm_input_text.size(1))) - lm_input = torch.concat([lm_input, lm_input_text], dim=1) + if len(out_tokens) != 0 and out_tokens[-1] == self.speech_token_size + 2: + lm_input = lm_input_text + else: + lm_input = torch.concat([lm_input, lm_input_text], dim=1) text_cache = text_cache[:, self.mix_ratio[0]:] else: logging.info('not enough text token to decode, wait for more')