From 6cebcb3410eff522fbdd1cfbb7041cd3c2c43d59 Mon Sep 17 00:00:00 2001 From: "lyuxiang.lx" Date: Thu, 11 Jul 2024 13:15:34 +0800 Subject: [PATCH] move use_spk_embedding to processor --- cosyvoice/dataset/processor.py | 6 +++++- cosyvoice/utils/executor.py | 4 ---- examples/libritts/cosyvoice/conf/cosyvoice.fromscratch.yaml | 2 +- examples/libritts/cosyvoice/conf/cosyvoice.yaml | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cosyvoice/dataset/processor.py b/cosyvoice/dataset/processor.py index cb34a0c..11f31c4 100644 --- a/cosyvoice/dataset/processor.py +++ b/cosyvoice/dataset/processor.py @@ -308,7 +308,7 @@ def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000, m logging.fatal('Unsupported batch type {}'.format(batch_type)) -def padding(data, mode='train'): +def padding(data, use_spk_embedding, mode='train'): """ Padding the data into training data Args: @@ -362,4 +362,8 @@ def padding(data, mode='train'): 'tts_index': tts_index, 'tts_text_token': tts_text_token, 'tts_text_token_len': tts_text_token_len}) + if use_spk_embedding is True: + batch["embedding"] = batch["spk_embedding"] + else: + batch["embedding"] = batch["utt_embedding"] yield batch diff --git a/cosyvoice/utils/executor.py b/cosyvoice/utils/executor.py index f7dfb0e..c12e52d 100644 --- a/cosyvoice/utils/executor.py +++ b/cosyvoice/utils/executor.py @@ -52,10 +52,6 @@ class Executor: info_dict["batch_idx"] = batch_idx if cosyvoice_join(group_join, info_dict): break - if info_dict["use_spk_embedding"] is True: - batch_dict["embedding"] = batch_dict["spk_embedding"] - else: - batch_dict["embedding"] = batch_dict["utt_embedding"] # Disable gradient synchronizations across DDP processes. # Within this context, gradients will be accumulated on module diff --git a/examples/libritts/cosyvoice/conf/cosyvoice.fromscratch.yaml b/examples/libritts/cosyvoice/conf/cosyvoice.fromscratch.yaml index b67b528..30545ff 100644 --- a/examples/libritts/cosyvoice/conf/cosyvoice.fromscratch.yaml +++ b/examples/libritts/cosyvoice/conf/cosyvoice.fromscratch.yaml @@ -167,6 +167,7 @@ batch: !name:cosyvoice.dataset.processor.batch batch_type: 'dynamic' max_frames_in_batch: 12000 padding: !name:cosyvoice.dataset.processor.padding + use_spk_embedding: False # change to True during sft # dataset processor pipeline data_pipeline: [ @@ -190,7 +191,6 @@ train_conf: scheduler: warmuplr scheduler_conf: warmup_steps: 25000 - use_spk_embedding: False # change to True during sft max_epoch: 200 grad_clip: 5 accum_grad: 2 diff --git a/examples/libritts/cosyvoice/conf/cosyvoice.yaml b/examples/libritts/cosyvoice/conf/cosyvoice.yaml index 588086c..f43af16 100644 --- a/examples/libritts/cosyvoice/conf/cosyvoice.yaml +++ b/examples/libritts/cosyvoice/conf/cosyvoice.yaml @@ -167,6 +167,7 @@ batch: !name:cosyvoice.dataset.processor.batch batch_type: 'dynamic' max_frames_in_batch: 2000 padding: !name:cosyvoice.dataset.processor.padding + use_spk_embedding: False # change to True during sft # dataset processor pipeline data_pipeline: [ @@ -190,7 +191,6 @@ train_conf: scheduler: warmuplr # change to constantlr during sft scheduler_conf: warmup_steps: 2500 - use_spk_embedding: False # change to True during sft max_epoch: 200 grad_clip: 5 accum_grad: 2