diff --git a/cosyvoice/llm/llm.py b/cosyvoice/llm/llm.py index fc82832..59ebd48 100644 --- a/cosyvoice/llm/llm.py +++ b/cosyvoice/llm/llm.py @@ -401,7 +401,7 @@ class Qwen2LM(TransformerLM): speech_token_combined_emb = self.speech_embedding(speech_token_combined) # 3. prepare llm_input/target - lm_target, lm_input, lm_input_len = self.prepare_lm_input_target(text_token.repeat(2, 1), text_token_emb.repeat(2, 1, 1), text_token_len.repeat(2), \ + lm_target, lm_input, lm_input_len = self.prepare_lm_input_target(text_token.repeat(2, 1), text_token_emb.repeat(2, 1, 1), text_token_len.repeat(2), speech_token_combined, speech_token_combined_emb, speech_token_combined_len) lm_target = lm_target.to(device)