From 4c19646b9af318968edf1941fc8e047d11b9a6e1 Mon Sep 17 00:00:00 2001 From: "lyuxiang.lx" Date: Mon, 29 Dec 2025 12:46:34 +0000 Subject: [PATCH] update dataset --- cosyvoice/dataset/dataset.py | 6 +++++- examples/libritts/cosyvoice2/run.sh | 1 - examples/libritts/cosyvoice3/run.sh | 1 - 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cosyvoice/dataset/dataset.py b/cosyvoice/dataset/dataset.py index 6b54184..6cb400e 100644 --- a/cosyvoice/dataset/dataset.py +++ b/cosyvoice/dataset/dataset.py @@ -145,7 +145,11 @@ def Dataset(data_list_file, shuffle=shuffle, partition=partition) # map partial arg to padding func - data_pipeline[-1] = partial(data_pipeline[-1], gan=gan, dpo=dpo) + for i in range(1, len(data_pipeline)): + if data_pipeline[i].func.__name__ == 'compute_fbank': + data_pipeline[i] = partial(data_pipeline[i], token_mel_ratio=0) + if data_pipeline[i].func.__name__ == 'padding': + data_pipeline[i] = partial(data_pipeline[i], gan=gan, dpo=dpo) for func in data_pipeline: dataset = Processor(dataset, func, mode=mode) return dataset diff --git a/examples/libritts/cosyvoice2/run.sh b/examples/libritts/cosyvoice2/run.sh index ad59c0a..538c71a 100644 --- a/examples/libritts/cosyvoice2/run.sh +++ b/examples/libritts/cosyvoice2/run.sh @@ -66,7 +66,6 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then fi cat data/{train-clean-100,train-clean-360,train-other-500}/parquet/data.list > data/train.data.list cat data/{dev-clean,dev-other}/parquet/data.list > data/dev.data.list - # NOTE will update llm/hift training later for model in llm flow hifigan; do torchrun --nnodes=1 --nproc_per_node=$num_gpus \ --rdzv_id=$job_id --rdzv_backend="c10d" --rdzv_endpoint="localhost:1234" \ diff --git a/examples/libritts/cosyvoice3/run.sh b/examples/libritts/cosyvoice3/run.sh index 6b86227..3eed628 100644 --- a/examples/libritts/cosyvoice3/run.sh +++ b/examples/libritts/cosyvoice3/run.sh @@ -68,7 +68,6 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then fi cat data/{train-clean-100,train-clean-360,train-other-500}/parquet/data.list > data/train.data.list cat data/{dev-clean,dev-other}/parquet/data.list > data/dev.data.list - # NOTE will update llm/hift training later for model in llm flow hifigan; do torchrun --nnodes=1 --nproc_per_node=$num_gpus \ --rdzv_id=$job_id --rdzv_backend="c10d" --rdzv_endpoint="localhost:1234" \