From f280558bcb4dd32da9626f454e7f8d476348db7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9B=BE=E8=81=AA?= Date: Wed, 26 Feb 2025 16:48:21 +0800 Subject: [PATCH] update func export_codec_vllm --- cosyvoice/cli/cosyvoice.py | 2 +- cosyvoice/cli/model.py | 3 +++ cosyvoice/llm/llm.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/cosyvoice/cli/cosyvoice.py b/cosyvoice/cli/cosyvoice.py index 00a56a1..319be19 100644 --- a/cosyvoice/cli/cosyvoice.py +++ b/cosyvoice/cli/cosyvoice.py @@ -156,7 +156,7 @@ class CosyVoice2(CosyVoice): self.model.export_codec_vllm(''.join([model_dir, '/codec_vllm_model'])) engine_args = EngineArgs(model=''.join([model_dir, '/codec_vllm_model']), skip_tokenizer_init=True, - gpu_memory_utilization=0.1) + gpu_memory_utilization=0.2) self.vllm_codec_engine = LLMEngine.from_engine_args(engine_args) self.model.vllm_codec_engine = self.vllm_codec_engine diff --git a/cosyvoice/cli/model.py b/cosyvoice/cli/model.py index e6ecd19..5374e7a 100644 --- a/cosyvoice/cli/model.py +++ b/cosyvoice/cli/model.py @@ -347,6 +347,9 @@ class CosyVoice2Model(CosyVoiceModel): self.llm.llm.model.to(dtype) tmp_vocab_size = self.llm.llm.model.config.vocab_size tmp_tie_embedding = self.llm.llm.model.config.tie_word_embeddings + del self.llm.llm.model.generation_config.eos_token_id + del self.llm.llm.model.config.bos_token_id + del self.llm.llm.model.config.eos_token_id self.llm.llm.model.config.vocab_size = pad_vocab_size self.llm.llm.model.config.tie_word_embeddings = False self.llm.llm.model.config.use_bias = True diff --git a/cosyvoice/llm/llm.py b/cosyvoice/llm/llm.py index ac746f9..1b12acf 100644 --- a/cosyvoice/llm/llm.py +++ b/cosyvoice/llm/llm.py @@ -343,7 +343,7 @@ class Qwen2LM(TransformerLM): max_tokens=max_len) request_id = uuid.uuid4() vllm_codec_engine.add_request(request_id, - {"prompt_embeds": lm_input.to(torch.bfloat16).to(device)}, + {"prompt_embeds": lm_input.squeeze(0).to(torch.bfloat16).to(device)}, sampling_params) ## generator out_token_ids = []