mirror of
https://github.com/FunAudioLLM/CosyVoice.git
synced 2026-02-05 09:59:23 +08:00
update dpo
This commit is contained in:
@@ -103,7 +103,7 @@ class CosyVoiceModel:
|
||||
def llm_job(self, text, prompt_text, llm_prompt_speech_token, llm_embedding, uuid):
|
||||
with self.llm_context, torch.cuda.amp.autocast(self.fp16 is True and hasattr(self.llm, 'vllm') is False):
|
||||
if isinstance(text, Generator):
|
||||
assert isinstance(self, CosyVoice2Model), 'streaming input text is only implemented for CosyVoice2!'
|
||||
assert isinstance(self, CosyVoice2Model) and not hasattr(self.llm, 'vllm'), 'streaming input text is only implemented for CosyVoice2 and do not support vllm!'
|
||||
for i in self.llm.inference_bistream(text=text,
|
||||
prompt_text=prompt_text.to(self.device),
|
||||
prompt_text_len=torch.tensor([prompt_text.shape[1]], dtype=torch.int32).to(self.device),
|
||||
@@ -279,6 +279,7 @@ class CosyVoice2Model(CosyVoiceModel):
|
||||
enable_prompt_embeds=True,
|
||||
gpu_memory_utilization=0.2)
|
||||
self.llm.vllm = LLMEngine.from_engine_args(engine_args)
|
||||
self.llm.lock = threading.Lock()
|
||||
del self.llm.llm.model.model.layers
|
||||
|
||||
def token2wav(self, token, prompt_token, prompt_feat, embedding, token_offset, uuid, stream=False, finalize=False, speed=1.0):
|
||||
|
||||
Reference in New Issue
Block a user