mirror of
https://github.com/FunAudioLLM/CosyVoice.git
synced 2026-02-04 17:39:25 +08:00
fix bistream bug
This commit is contained in:
12
example.py
12
example.py
@@ -15,15 +15,15 @@ def cosyvoice_example():
|
||||
torchaudio.save('sft_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)
|
||||
|
||||
cosyvoice = AutoModel(model_dir='pretrained_models/CosyVoice-300M')
|
||||
# zero_shot usage, <|zh|><|en|><|jp|><|yue|><|ko|> for Chinese/English/Japanese/Cantonese/Korean
|
||||
# zero_shot usage
|
||||
for i, j in enumerate(cosyvoice.inference_zero_shot('收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。', '希望你以后能够做的比我还好呦。', './asset/zero_shot_prompt.wav')):
|
||||
torchaudio.save('zero_shot_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)
|
||||
# cross_lingual usage
|
||||
# cross_lingual usage, <|zh|><|en|><|jp|><|yue|><|ko|> for Chinese/English/Japanese/Cantonese/Korean
|
||||
for i, j in enumerate(cosyvoice.inference_cross_lingual('<|en|>And then later on, fully acquiring that company. So keeping management in line, interest in line with the asset that\'s coming into the family is a reason why sometimes we don\'t buy the whole thing.',
|
||||
'./asset/cross_lingual_prompt.wav')):
|
||||
torchaudio.save('cross_lingual_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)
|
||||
# vc usage
|
||||
for i, j in enumerate(cosyvoice.inference_vc('./asset/zero_shot_prompt.wav', './asset/cross_lingual_prompt.wav')):
|
||||
for i, j in enumerate(cosyvoice.inference_vc('./asset/cross_lingual_prompt.wav', './asset/zero_shot_prompt.wav')):
|
||||
torchaudio.save('vc_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)
|
||||
|
||||
cosyvoice = AutoModel(model_dir='pretrained_models/CosyVoice-300M-Instruct')
|
||||
@@ -65,7 +65,7 @@ def cosyvoice2_example():
|
||||
yield '让我心中充满了甜蜜的快乐,'
|
||||
yield '笑容如花儿般绽放。'
|
||||
for i, j in enumerate(cosyvoice.inference_zero_shot(text_generator(), '希望你以后能够做的比我还好呦。', './asset/zero_shot_prompt.wav', stream=False)):
|
||||
torchaudio.save('zero_shot_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)
|
||||
torchaudio.save('zero_shot_bistream_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)
|
||||
|
||||
|
||||
def cosyvoice3_example():
|
||||
@@ -97,8 +97,8 @@ def cosyvoice3_example():
|
||||
|
||||
|
||||
def main():
|
||||
cosyvoice_example()
|
||||
cosyvoice2_example()
|
||||
# cosyvoice_example()
|
||||
# cosyvoice2_example()
|
||||
cosyvoice3_example()
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user