diff --git a/README.md b/README.md index 259460c..0e01098 100644 --- a/README.md +++ b/README.md @@ -26,9 +26,7 @@ For `SenseVoice`, visit [SenseVoice repo](https://github.com/FunAudioLLM/SenseVo - [ ] 25hz llama based llm model which supports lora finetune - [ ] Support more instruction mode - - [ ] Voice conversion - [ ] Music generation - - [ ] Training script sample based on Mandarin - [ ] CosyVoice-500M trained with more multi-lingual data - [ ] More... @@ -113,7 +111,7 @@ from cosyvoice.cli.cosyvoice import CosyVoice from cosyvoice.utils.file_utils import load_wav import torchaudio -cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT') +cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT', load_jit=True, load_onnx=False, fp16=True) # sft usage print(cosyvoice.list_avaliable_spks()) # change stream=True for chunk stream inference diff --git a/cosyvoice/bin/train.py b/cosyvoice/bin/train.py index 6a65bf2..229d9ee 100644 --- a/cosyvoice/bin/train.py +++ b/cosyvoice/bin/train.py @@ -74,7 +74,7 @@ def get_args(): choices=['model_only', 'model+optimizer'], help='save model/optimizer states') parser.add_argument('--timeout', - default=30, + default=60, type=int, help='timeout (in seconds) of cosyvoice_join.') parser = deepspeed.add_config_arguments(parser) diff --git a/examples/libritts/cosyvoice/run.sh b/examples/libritts/cosyvoice/run.sh index 5dc79c5..5ed5a61 100644 --- a/examples/libritts/cosyvoice/run.sh +++ b/examples/libritts/cosyvoice/run.sh @@ -99,7 +99,6 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then --num_workers ${num_workers} \ --prefetch ${prefetch} \ --pin_memory \ - --timeout 300 \ --deepspeed_config ./conf/ds_stage2.json \ --deepspeed.save_states model+optimizer done