This commit is contained in:
lyuxiang.lx
2025-12-29 10:30:54 +00:00
parent 4d7295a9a7
commit 3b44913782
5 changed files with 10 additions and 20 deletions

View File

@@ -89,6 +89,8 @@ class CosyVoice:
start_time = time.time() start_time = time.time()
def inference_zero_shot(self, tts_text, prompt_text, prompt_wav, zero_shot_spk_id='', stream=False, speed=1.0, text_frontend=True): def inference_zero_shot(self, tts_text, prompt_text, prompt_wav, zero_shot_spk_id='', stream=False, speed=1.0, text_frontend=True):
if self.__class__.__name__ == 'CosyVoice3' and '<|endofprompt|>' not in prompt_text + tts_text:
logging.warning('<|endofprompt|> not found in CosyVoice3 inference, check your input text')
prompt_text = self.frontend.text_normalize(prompt_text, split=False, text_frontend=text_frontend) prompt_text = self.frontend.text_normalize(prompt_text, split=False, text_frontend=text_frontend)
for i in tqdm(self.frontend.text_normalize(tts_text, split=True, text_frontend=text_frontend)): for i in tqdm(self.frontend.text_normalize(tts_text, split=True, text_frontend=text_frontend)):
if (not isinstance(i, Generator)) and len(i) < 0.5 * len(prompt_text): if (not isinstance(i, Generator)) and len(i) < 0.5 * len(prompt_text):

View File

@@ -183,7 +183,7 @@ class CosyVoiceFrontEnd:
'prompt_speech_feat': speech_feat, 'prompt_speech_feat_len': speech_feat_len, 'prompt_speech_feat': speech_feat, 'prompt_speech_feat_len': speech_feat_len,
'llm_embedding': embedding, 'flow_embedding': embedding} 'llm_embedding': embedding, 'flow_embedding': embedding}
else: else:
model_input = self.spk2info[zero_shot_spk_id] model_input = {**self.spk2info[zero_shot_spk_id]}
model_input['text'] = tts_text_token model_input['text'] = tts_text_token
model_input['text_len'] = tts_text_token_len model_input['text_len'] = tts_text_token_len
return model_input return model_input

View File

@@ -24,7 +24,7 @@ import numpy as np
ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append('{}/../../..'.format(ROOT_DIR)) sys.path.append('{}/../../..'.format(ROOT_DIR))
sys.path.append('{}/../../../third_party/Matcha-TTS'.format(ROOT_DIR)) sys.path.append('{}/../../../third_party/Matcha-TTS'.format(ROOT_DIR))
from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2 from cosyvoice.cli.cosyvoice import AutoModel
from cosyvoice.utils.file_utils import load_wav from cosyvoice.utils.file_utils import load_wav
app = FastAPI() app = FastAPI()
@@ -88,14 +88,8 @@ if __name__ == '__main__':
default=50000) default=50000)
parser.add_argument('--model_dir', parser.add_argument('--model_dir',
type=str, type=str,
default='iic/CosyVoice-300M', default='iic/CosyVoice2-0.5B',
help='local path or modelscope repo id') help='local path or modelscope repo id')
args = parser.parse_args() args = parser.parse_args()
try: cosyvoice = AutoModel(model_dir=args.model_dir)
cosyvoice = CosyVoice(args.model_dir)
except Exception:
try:
cosyvoice = CosyVoice2(args.model_dir)
except Exception:
raise TypeError('no valid model_type!')
uvicorn.run(app, host="0.0.0.0", port=args.port) uvicorn.run(app, host="0.0.0.0", port=args.port)

View File

@@ -25,7 +25,7 @@ import numpy as np
ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append('{}/../../..'.format(ROOT_DIR)) sys.path.append('{}/../../..'.format(ROOT_DIR))
sys.path.append('{}/../../../third_party/Matcha-TTS'.format(ROOT_DIR)) sys.path.append('{}/../../../third_party/Matcha-TTS'.format(ROOT_DIR))
from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2 from cosyvoice.cli.cosyvoice import AutoModel
logging.basicConfig(level=logging.DEBUG, logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(message)s') format='%(asctime)s %(levelname)s %(message)s')
@@ -33,13 +33,7 @@ logging.basicConfig(level=logging.DEBUG,
class CosyVoiceServiceImpl(cosyvoice_pb2_grpc.CosyVoiceServicer): class CosyVoiceServiceImpl(cosyvoice_pb2_grpc.CosyVoiceServicer):
def __init__(self, args): def __init__(self, args):
try: self.cosyvoice = AutoModel(model_dir=args.model_dir)
self.cosyvoice = CosyVoice(args.model_dir, trt_concurrent=args.max_conc)
except Exception:
try:
self.cosyvoice = CosyVoice2(args.model_dir, trt_concurrent=args.max_conc)
except Exception:
raise TypeError('no valid model_type!')
logging.info('grpc service initialized') logging.info('grpc service initialized')
def Inference(self, request, context): def Inference(self, request, context):
@@ -90,7 +84,7 @@ if __name__ == '__main__':
default=4) default=4)
parser.add_argument('--model_dir', parser.add_argument('--model_dir',
type=str, type=str,
default='iic/CosyVoice-300M', default='iic/CosyVoice2-0.5B',
help='local path or modelscope repo id') help='local path or modelscope repo id')
args = parser.parse_args() args = parser.parse_args()
main() main()

View File

@@ -167,7 +167,7 @@ if __name__ == '__main__':
default=8000) default=8000)
parser.add_argument('--model_dir', parser.add_argument('--model_dir',
type=str, type=str,
default='pretrained_models/CosyVoice3-0.5B', default='pretrained_models/CosyVoice2-0.5B',
help='local path or modelscope repo id') help='local path or modelscope repo id')
args = parser.parse_args() args = parser.parse_args()
cosyvoice = AutoModel(model_dir=args.model_dir) cosyvoice = AutoModel(model_dir=args.model_dir)