mirror of
https://github.com/FunAudioLLM/CosyVoice.git
synced 2026-02-04 17:39:25 +08:00
update dataset
This commit is contained in:
@@ -145,7 +145,11 @@ def Dataset(data_list_file,
|
|||||||
shuffle=shuffle,
|
shuffle=shuffle,
|
||||||
partition=partition)
|
partition=partition)
|
||||||
# map partial arg to padding func
|
# map partial arg to padding func
|
||||||
data_pipeline[-1] = partial(data_pipeline[-1], gan=gan, dpo=dpo)
|
for i in range(1, len(data_pipeline)):
|
||||||
|
if data_pipeline[i].func.__name__ == 'compute_fbank':
|
||||||
|
data_pipeline[i] = partial(data_pipeline[i], token_mel_ratio=0)
|
||||||
|
if data_pipeline[i].func.__name__ == 'padding':
|
||||||
|
data_pipeline[i] = partial(data_pipeline[i], gan=gan, dpo=dpo)
|
||||||
for func in data_pipeline:
|
for func in data_pipeline:
|
||||||
dataset = Processor(dataset, func, mode=mode)
|
dataset = Processor(dataset, func, mode=mode)
|
||||||
return dataset
|
return dataset
|
||||||
|
|||||||
@@ -66,7 +66,6 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
|
|||||||
fi
|
fi
|
||||||
cat data/{train-clean-100,train-clean-360,train-other-500}/parquet/data.list > data/train.data.list
|
cat data/{train-clean-100,train-clean-360,train-other-500}/parquet/data.list > data/train.data.list
|
||||||
cat data/{dev-clean,dev-other}/parquet/data.list > data/dev.data.list
|
cat data/{dev-clean,dev-other}/parquet/data.list > data/dev.data.list
|
||||||
# NOTE will update llm/hift training later
|
|
||||||
for model in llm flow hifigan; do
|
for model in llm flow hifigan; do
|
||||||
torchrun --nnodes=1 --nproc_per_node=$num_gpus \
|
torchrun --nnodes=1 --nproc_per_node=$num_gpus \
|
||||||
--rdzv_id=$job_id --rdzv_backend="c10d" --rdzv_endpoint="localhost:1234" \
|
--rdzv_id=$job_id --rdzv_backend="c10d" --rdzv_endpoint="localhost:1234" \
|
||||||
|
|||||||
@@ -68,7 +68,6 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
|
|||||||
fi
|
fi
|
||||||
cat data/{train-clean-100,train-clean-360,train-other-500}/parquet/data.list > data/train.data.list
|
cat data/{train-clean-100,train-clean-360,train-other-500}/parquet/data.list > data/train.data.list
|
||||||
cat data/{dev-clean,dev-other}/parquet/data.list > data/dev.data.list
|
cat data/{dev-clean,dev-other}/parquet/data.list > data/dev.data.list
|
||||||
# NOTE will update llm/hift training later
|
|
||||||
for model in llm flow hifigan; do
|
for model in llm flow hifigan; do
|
||||||
torchrun --nnodes=1 --nproc_per_node=$num_gpus \
|
torchrun --nnodes=1 --nproc_per_node=$num_gpus \
|
||||||
--rdzv_id=$job_id --rdzv_backend="c10d" --rdzv_endpoint="localhost:1234" \
|
--rdzv_id=$job_id --rdzv_backend="c10d" --rdzv_endpoint="localhost:1234" \
|
||||||
|
|||||||
Reference in New Issue
Block a user