From 3b449137823f0ece887db42e583476e35a8dabdd Mon Sep 17 00:00:00 2001 From: "lyuxiang.lx" Date: Mon, 29 Dec 2025 10:30:54 +0000 Subject: [PATCH] fix bug --- cosyvoice/cli/cosyvoice.py | 2 ++ cosyvoice/cli/frontend.py | 2 +- runtime/python/fastapi/server.py | 12 +++--------- runtime/python/grpc/server.py | 12 +++--------- webui.py | 2 +- 5 files changed, 10 insertions(+), 20 deletions(-) diff --git a/cosyvoice/cli/cosyvoice.py b/cosyvoice/cli/cosyvoice.py index 7ab04a7..e91bf09 100644 --- a/cosyvoice/cli/cosyvoice.py +++ b/cosyvoice/cli/cosyvoice.py @@ -89,6 +89,8 @@ class CosyVoice: start_time = time.time() def inference_zero_shot(self, tts_text, prompt_text, prompt_wav, zero_shot_spk_id='', stream=False, speed=1.0, text_frontend=True): + if self.__class__.__name__ == 'CosyVoice3' and '<|endofprompt|>' not in prompt_text + tts_text: + logging.warning('<|endofprompt|> not found in CosyVoice3 inference, check your input text') prompt_text = self.frontend.text_normalize(prompt_text, split=False, text_frontend=text_frontend) for i in tqdm(self.frontend.text_normalize(tts_text, split=True, text_frontend=text_frontend)): if (not isinstance(i, Generator)) and len(i) < 0.5 * len(prompt_text): diff --git a/cosyvoice/cli/frontend.py b/cosyvoice/cli/frontend.py index f0c75dd..7ad6f7c 100644 --- a/cosyvoice/cli/frontend.py +++ b/cosyvoice/cli/frontend.py @@ -183,7 +183,7 @@ class CosyVoiceFrontEnd: 'prompt_speech_feat': speech_feat, 'prompt_speech_feat_len': speech_feat_len, 'llm_embedding': embedding, 'flow_embedding': embedding} else: - model_input = self.spk2info[zero_shot_spk_id] + model_input = {**self.spk2info[zero_shot_spk_id]} model_input['text'] = tts_text_token model_input['text_len'] = tts_text_token_len return model_input diff --git a/runtime/python/fastapi/server.py b/runtime/python/fastapi/server.py index 74c62d8..1502f9c 100644 --- a/runtime/python/fastapi/server.py +++ b/runtime/python/fastapi/server.py @@ -24,7 +24,7 @@ import numpy as np ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) sys.path.append('{}/../../..'.format(ROOT_DIR)) sys.path.append('{}/../../../third_party/Matcha-TTS'.format(ROOT_DIR)) -from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2 +from cosyvoice.cli.cosyvoice import AutoModel from cosyvoice.utils.file_utils import load_wav app = FastAPI() @@ -88,14 +88,8 @@ if __name__ == '__main__': default=50000) parser.add_argument('--model_dir', type=str, - default='iic/CosyVoice-300M', + default='iic/CosyVoice2-0.5B', help='local path or modelscope repo id') args = parser.parse_args() - try: - cosyvoice = CosyVoice(args.model_dir) - except Exception: - try: - cosyvoice = CosyVoice2(args.model_dir) - except Exception: - raise TypeError('no valid model_type!') + cosyvoice = AutoModel(model_dir=args.model_dir) uvicorn.run(app, host="0.0.0.0", port=args.port) diff --git a/runtime/python/grpc/server.py b/runtime/python/grpc/server.py index 76827e6..28ecc19 100644 --- a/runtime/python/grpc/server.py +++ b/runtime/python/grpc/server.py @@ -25,7 +25,7 @@ import numpy as np ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) sys.path.append('{}/../../..'.format(ROOT_DIR)) sys.path.append('{}/../../../third_party/Matcha-TTS'.format(ROOT_DIR)) -from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2 +from cosyvoice.cli.cosyvoice import AutoModel logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s') @@ -33,13 +33,7 @@ logging.basicConfig(level=logging.DEBUG, class CosyVoiceServiceImpl(cosyvoice_pb2_grpc.CosyVoiceServicer): def __init__(self, args): - try: - self.cosyvoice = CosyVoice(args.model_dir, trt_concurrent=args.max_conc) - except Exception: - try: - self.cosyvoice = CosyVoice2(args.model_dir, trt_concurrent=args.max_conc) - except Exception: - raise TypeError('no valid model_type!') + self.cosyvoice = AutoModel(model_dir=args.model_dir) logging.info('grpc service initialized') def Inference(self, request, context): @@ -90,7 +84,7 @@ if __name__ == '__main__': default=4) parser.add_argument('--model_dir', type=str, - default='iic/CosyVoice-300M', + default='iic/CosyVoice2-0.5B', help='local path or modelscope repo id') args = parser.parse_args() main() diff --git a/webui.py b/webui.py index debf5d3..e9b3ec5 100644 --- a/webui.py +++ b/webui.py @@ -167,7 +167,7 @@ if __name__ == '__main__': default=8000) parser.add_argument('--model_dir', type=str, - default='pretrained_models/CosyVoice3-0.5B', + default='pretrained_models/CosyVoice2-0.5B', help='local path or modelscope repo id') args = parser.parse_args() cosyvoice = AutoModel(model_dir=args.model_dir)