mirror of
https://github.com/FunAudioLLM/CosyVoice.git
synced 2026-02-04 17:39:25 +08:00
update readme
This commit is contained in:
10
README.md
10
README.md
@@ -33,7 +33,7 @@ sudo yum install sox sox-devel
|
||||
|
||||
**Model download**
|
||||
|
||||
We strongly recommand that you download our pretrained `CosyVoice-300M` `CosyVoice-300M-SFT` `CosyVoice-300M-Instruct` model and `CosyVoice-ttsfrd` resource.
|
||||
We strongly recommend that you download our pretrained `CosyVoice-300M` `CosyVoice-300M-SFT` `CosyVoice-300M-Instruct` model and `CosyVoice-ttsfrd` resource.
|
||||
|
||||
If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
|
||||
|
||||
@@ -81,13 +81,13 @@ from cosyvoice.cli.cosyvoice import CosyVoice
|
||||
from cosyvoice.utils.file_utils import load_wav
|
||||
import torchaudio
|
||||
|
||||
cosyvoice = CosyVoice('iic/CosyVoice-300M-SFT')
|
||||
cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT')
|
||||
# sft usage
|
||||
print(cosyvoice.list_avaliable_spks())
|
||||
output = cosyvoice.inference_sft('你好,我是通义生成式语音大模型,请问有什么可以帮您的吗?', '中文女')
|
||||
torchaudio.save('sft.wav', output['tts_speech'], 22050)
|
||||
|
||||
cosyvoice = CosyVoice('iic/CosyVoice-300M')
|
||||
cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M')
|
||||
# zero_shot usage
|
||||
prompt_speech_16k = load_wav('zero_shot_prompt.wav', 16000)
|
||||
output = cosyvoice.inference_zero_shot('收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。', '希望你以后能够做的比我还好呦。', prompt_speech_16k)
|
||||
@@ -97,7 +97,7 @@ prompt_speech_16k = load_wav('cross_lingual_prompt.wav', 16000)
|
||||
output = cosyvoice.inference_cross_lingual('<|en|>And then later on, fully acquiring that company. So keeping management in line, interest in line with the asset that\'s coming into the family is a reason why sometimes we don\'t buy the whole thing.', prompt_speech_16k)
|
||||
torchaudio.save('cross_lingual.wav', output['tts_speech'], 22050)
|
||||
|
||||
cosyvoice = CosyVoice('iic/CosyVoice-300M-Instruct')
|
||||
cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-Instruct')
|
||||
# instruct usage
|
||||
output = cosyvoice.inference_instruct('在面对挑战时,他展现了非凡的<strong>勇气</strong>与<strong>智慧</strong>。', '中文男', 'Theo \'Crimson\', is a fiery, passionate rebel leader. Fights with fervor for justice, but struggles with impulsiveness.')
|
||||
torchaudio.save('instruct.wav', output['tts_speech'], 22050)
|
||||
@@ -112,7 +112,7 @@ Please see the demo website for details.
|
||||
|
||||
``` python
|
||||
# change iic/CosyVoice-300M-SFT for sft inference, or iic/CosyVoice-300M-Instruct for instruct inference
|
||||
python3 webui.py --port 50000 --model_dir iic/CosyVoice-300M
|
||||
python3 webui.py --port 50000 --model_dir pretrained_models/CosyVoice-300M
|
||||
```
|
||||
|
||||
**Advanced Usage**
|
||||
|
||||
Reference in New Issue
Block a user