mirror of
https://github.com/FunAudioLLM/CosyVoice.git
synced 2026-02-04 17:39:25 +08:00
add llm train
This commit is contained in:
@@ -169,7 +169,7 @@ sort: !name:cosyvoice.dataset.processor.sort
|
||||
sort_size: 500 # sort_size should be less than shuffle_size
|
||||
batch: !name:cosyvoice.dataset.processor.batch
|
||||
batch_type: 'dynamic'
|
||||
max_frames_in_batch: 2500
|
||||
max_frames_in_batch: 2000
|
||||
padding: !name:cosyvoice.dataset.processor.padding
|
||||
use_spk_embedding: False # change to True during sft
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ stop_stage=3
|
||||
|
||||
data_url=www.openslr.org/resources/60
|
||||
data_dir=/mnt/lyuxiang.lx/data/tts/openslr/libritts
|
||||
pretrained_model_dir=/mnt/lyuxiang.lx/data/tts/models/IIC/CosyVoice2-0.5B/
|
||||
pretrained_model_dir=../../../pretrained_models/CosyVoice2-0.5B
|
||||
|
||||
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
|
||||
echo "Data Download"
|
||||
@@ -86,7 +86,7 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
|
||||
cat data/{train-clean-100,train-clean-360,train-other-500}/parquet/data.list > data/train.data.list
|
||||
cat data/{dev-clean,dev-other}/parquet/data.list > data/dev.data.list
|
||||
# NOTE will update llm/hift training later
|
||||
for model in flow; do
|
||||
for model in llm flow; do
|
||||
torchrun --nnodes=1 --nproc_per_node=$num_gpus \
|
||||
--rdzv_id=$job_id --rdzv_backend="c10d" --rdzv_endpoint="localhost:1234" \
|
||||
cosyvoice/bin/train.py \
|
||||
|
||||
Reference in New Issue
Block a user