From 3b449137823f0ece887db42e583476e35a8dabdd Mon Sep 17 00:00:00 2001
From: "lyuxiang.lx" <lyuxiang.lx@alibaba-inc.com>
Date: Mon, 29 Dec 2025 10:30:54 +0000
Subject: [PATCH] fix bug

---
 cosyvoice/cli/cosyvoice.py       |  2 ++
 cosyvoice/cli/frontend.py        |  2 +-
 runtime/python/fastapi/server.py | 12 +++---------
 runtime/python/grpc/server.py    | 12 +++---------
 webui.py                         |  2 +-
 5 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/cosyvoice/cli/cosyvoice.py b/cosyvoice/cli/cosyvoice.py
index 7ab04a7..e91bf09 100644
--- a/cosyvoice/cli/cosyvoice.py
+++ b/cosyvoice/cli/cosyvoice.py
@@ -89,6 +89,8 @@ class CosyVoice:
                 start_time = time.time()
 
     def inference_zero_shot(self, tts_text, prompt_text, prompt_wav, zero_shot_spk_id='', stream=False, speed=1.0, text_frontend=True):
+        if self.__class__.__name__ == 'CosyVoice3' and '<|endofprompt|>' not in prompt_text + tts_text:
+            logging.warning('<|endofprompt|> not found in CosyVoice3 inference, check your input text')
         prompt_text = self.frontend.text_normalize(prompt_text, split=False, text_frontend=text_frontend)
         for i in tqdm(self.frontend.text_normalize(tts_text, split=True, text_frontend=text_frontend)):
             if (not isinstance(i, Generator)) and len(i) < 0.5 * len(prompt_text):
diff --git a/cosyvoice/cli/frontend.py b/cosyvoice/cli/frontend.py
index f0c75dd..7ad6f7c 100644
--- a/cosyvoice/cli/frontend.py
+++ b/cosyvoice/cli/frontend.py
@@ -183,7 +183,7 @@ class CosyVoiceFrontEnd:
                            'prompt_speech_feat': speech_feat, 'prompt_speech_feat_len': speech_feat_len,
                            'llm_embedding': embedding, 'flow_embedding': embedding}
         else:
-            model_input = self.spk2info[zero_shot_spk_id]
+            model_input = {**self.spk2info[zero_shot_spk_id]}
         model_input['text'] = tts_text_token
         model_input['text_len'] = tts_text_token_len
         return model_input
diff --git a/runtime/python/fastapi/server.py b/runtime/python/fastapi/server.py
index 74c62d8..1502f9c 100644
--- a/runtime/python/fastapi/server.py
+++ b/runtime/python/fastapi/server.py
@@ -24,7 +24,7 @@ import numpy as np
 ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
 sys.path.append('{}/../../..'.format(ROOT_DIR))
 sys.path.append('{}/../../../third_party/Matcha-TTS'.format(ROOT_DIR))
-from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2
+from cosyvoice.cli.cosyvoice import AutoModel
 from cosyvoice.utils.file_utils import load_wav
 
 app = FastAPI()
@@ -88,14 +88,8 @@ if __name__ == '__main__':
                         default=50000)
     parser.add_argument('--model_dir',
                         type=str,
-                        default='iic/CosyVoice-300M',
+                        default='iic/CosyVoice2-0.5B',
                         help='local path or modelscope repo id')
     args = parser.parse_args()
-    try:
-        cosyvoice = CosyVoice(args.model_dir)
-    except Exception:
-        try:
-            cosyvoice = CosyVoice2(args.model_dir)
-        except Exception:
-            raise TypeError('no valid model_type!')
+    cosyvoice = AutoModel(model_dir=args.model_dir)
     uvicorn.run(app, host="0.0.0.0", port=args.port)
diff --git a/runtime/python/grpc/server.py b/runtime/python/grpc/server.py
index 76827e6..28ecc19 100644
--- a/runtime/python/grpc/server.py
+++ b/runtime/python/grpc/server.py
@@ -25,7 +25,7 @@ import numpy as np
 ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
 sys.path.append('{}/../../..'.format(ROOT_DIR))
 sys.path.append('{}/../../../third_party/Matcha-TTS'.format(ROOT_DIR))
-from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2
+from cosyvoice.cli.cosyvoice import AutoModel
 
 logging.basicConfig(level=logging.DEBUG,
                     format='%(asctime)s %(levelname)s %(message)s')
@@ -33,13 +33,7 @@ logging.basicConfig(level=logging.DEBUG,
 
 class CosyVoiceServiceImpl(cosyvoice_pb2_grpc.CosyVoiceServicer):
     def __init__(self, args):
-        try:
-            self.cosyvoice = CosyVoice(args.model_dir, trt_concurrent=args.max_conc)
-        except Exception:
-            try:
-                self.cosyvoice = CosyVoice2(args.model_dir, trt_concurrent=args.max_conc)
-            except Exception:
-                raise TypeError('no valid model_type!')
+        self.cosyvoice = AutoModel(model_dir=args.model_dir)
         logging.info('grpc service initialized')
 
     def Inference(self, request, context):
@@ -90,7 +84,7 @@ if __name__ == '__main__':
                         default=4)
     parser.add_argument('--model_dir',
                         type=str,
-                        default='iic/CosyVoice-300M',
+                        default='iic/CosyVoice2-0.5B',
                         help='local path or modelscope repo id')
     args = parser.parse_args()
     main()
diff --git a/webui.py b/webui.py
index debf5d3..e9b3ec5 100644
--- a/webui.py
+++ b/webui.py
@@ -167,7 +167,7 @@ if __name__ == '__main__':
                         default=8000)
     parser.add_argument('--model_dir',
                         type=str,
-                        default='pretrained_models/CosyVoice3-0.5B',
+                        default='pretrained_models/CosyVoice2-0.5B',
                         help='local path or modelscope repo id')
     args = parser.parse_args()
     cosyvoice = AutoModel(model_dir=args.model_dir)