update readme

This commit is contained in:
lyuxiang.lx
2024-07-07 16:20:13 +08:00
parent f82916a768
commit 39565cc02c

View File

@@ -33,7 +33,7 @@ sudo yum install sox sox-devel
**Model download** **Model download**
We strongly recommand that you download our pretrained `CosyVoice-300M` `CosyVoice-300M-SFT` `CosyVoice-300M-Instruct` model and `CosyVoice-ttsfrd` resource. We strongly recommend that you download our pretrained `CosyVoice-300M` `CosyVoice-300M-SFT` `CosyVoice-300M-Instruct` model and `CosyVoice-ttsfrd` resource.
If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step. If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
@@ -81,13 +81,13 @@ from cosyvoice.cli.cosyvoice import CosyVoice
from cosyvoice.utils.file_utils import load_wav from cosyvoice.utils.file_utils import load_wav
import torchaudio import torchaudio
cosyvoice = CosyVoice('iic/CosyVoice-300M-SFT') cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT')
# sft usage # sft usage
print(cosyvoice.list_avaliable_spks()) print(cosyvoice.list_avaliable_spks())
output = cosyvoice.inference_sft('你好,我是通义生成式语音大模型,请问有什么可以帮您的吗?', '中文女') output = cosyvoice.inference_sft('你好,我是通义生成式语音大模型,请问有什么可以帮您的吗?', '中文女')
torchaudio.save('sft.wav', output['tts_speech'], 22050) torchaudio.save('sft.wav', output['tts_speech'], 22050)
cosyvoice = CosyVoice('iic/CosyVoice-300M') cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M')
# zero_shot usage # zero_shot usage
prompt_speech_16k = load_wav('zero_shot_prompt.wav', 16000) prompt_speech_16k = load_wav('zero_shot_prompt.wav', 16000)
output = cosyvoice.inference_zero_shot('收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。', '希望你以后能够做的比我还好呦。', prompt_speech_16k) output = cosyvoice.inference_zero_shot('收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。', '希望你以后能够做的比我还好呦。', prompt_speech_16k)
@@ -97,7 +97,7 @@ prompt_speech_16k = load_wav('cross_lingual_prompt.wav', 16000)
output = cosyvoice.inference_cross_lingual('<|en|>And then later on, fully acquiring that company. So keeping management in line, interest in line with the asset that\'s coming into the family is a reason why sometimes we don\'t buy the whole thing.', prompt_speech_16k) output = cosyvoice.inference_cross_lingual('<|en|>And then later on, fully acquiring that company. So keeping management in line, interest in line with the asset that\'s coming into the family is a reason why sometimes we don\'t buy the whole thing.', prompt_speech_16k)
torchaudio.save('cross_lingual.wav', output['tts_speech'], 22050) torchaudio.save('cross_lingual.wav', output['tts_speech'], 22050)
cosyvoice = CosyVoice('iic/CosyVoice-300M-Instruct') cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-Instruct')
# instruct usage # instruct usage
output = cosyvoice.inference_instruct('在面对挑战时,他展现了非凡的<strong>勇气</strong>与<strong>智慧</strong>。', '中文男', 'Theo \'Crimson\', is a fiery, passionate rebel leader. Fights with fervor for justice, but struggles with impulsiveness.') output = cosyvoice.inference_instruct('在面对挑战时,他展现了非凡的<strong>勇气</strong>与<strong>智慧</strong>。', '中文男', 'Theo \'Crimson\', is a fiery, passionate rebel leader. Fights with fervor for justice, but struggles with impulsiveness.')
torchaudio.save('instruct.wav', output['tts_speech'], 22050) torchaudio.save('instruct.wav', output['tts_speech'], 22050)
@@ -112,7 +112,7 @@ Please see the demo website for details.
``` python ``` python
# change iic/CosyVoice-300M-SFT for sft inference, or iic/CosyVoice-300M-Instruct for instruct inference # change iic/CosyVoice-300M-SFT for sft inference, or iic/CosyVoice-300M-Instruct for instruct inference
python3 webui.py --port 50000 --model_dir iic/CosyVoice-300M python3 webui.py --port 50000 --model_dir pretrained_models/CosyVoice-300M
``` ```
**Advanced Usage** **Advanced Usage**