move use_spk_embedding to processor

2026-02-05 18:09:24 +08:00 · 2024-07-11 13:15:34 +08:00
parent 0fd15bb12b
commit 6cebcb3410
4 changed files with 7 additions and 7 deletions
--- a/cosyvoice/dataset/processor.py
+++ b/cosyvoice/dataset/processor.py
@@ -308,7 +308,7 @@ def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000, m
            logging.fatal('Unsupported batch type {}'.format(batch_type))


-def padding(data, mode='train'):
+def padding(data, use_spk_embedding, mode='train'):
    """ Padding the data into training data

        Args:
@@ -362,4 +362,8 @@ def padding(data, mode='train'):
                          'tts_index': tts_index,
                          'tts_text_token': tts_text_token,
                          'tts_text_token_len': tts_text_token_len})
+        if use_spk_embedding is True:
+            batch["embedding"] = batch["spk_embedding"]
+        else:
+            batch["embedding"] = batch["utt_embedding"]
        yield batch
--- a/cosyvoice/utils/executor.py
+++ b/cosyvoice/utils/executor.py
@@ -52,10 +52,6 @@ class Executor:
                info_dict["batch_idx"] = batch_idx
                if cosyvoice_join(group_join, info_dict):
                    break
-                if info_dict["use_spk_embedding"] is True:
-                    batch_dict["embedding"] = batch_dict["spk_embedding"]
-                else:
-                    batch_dict["embedding"] = batch_dict["utt_embedding"]

                # Disable gradient synchronizations across DDP processes.
                # Within this context, gradients will be accumulated on module
--- a/examples/libritts/cosyvoice/conf/cosyvoice.fromscratch.yaml
+++ b/examples/libritts/cosyvoice/conf/cosyvoice.fromscratch.yaml
@@ -167,6 +167,7 @@ batch: !name:cosyvoice.dataset.processor.batch
    batch_type: 'dynamic'
    max_frames_in_batch: 12000
 padding: !name:cosyvoice.dataset.processor.padding
+    use_spk_embedding: False # change to True during sft

 # dataset processor pipeline
 data_pipeline: [
@@ -190,7 +191,6 @@ train_conf:
    scheduler: warmuplr
    scheduler_conf:
        warmup_steps: 25000
-    use_spk_embedding: False # change to True during sft
    max_epoch: 200
    grad_clip: 5
    accum_grad: 2
--- a/examples/libritts/cosyvoice/conf/cosyvoice.yaml
+++ b/examples/libritts/cosyvoice/conf/cosyvoice.yaml
@@ -167,6 +167,7 @@ batch: !name:cosyvoice.dataset.processor.batch
    batch_type: 'dynamic'
    max_frames_in_batch: 2000
 padding: !name:cosyvoice.dataset.processor.padding
+    use_spk_embedding: False # change to True during sft

 # dataset processor pipeline
 data_pipeline: [
@@ -190,7 +191,6 @@ train_conf:
    scheduler: warmuplr # change to constantlr during sft
    scheduler_conf:
        warmup_steps: 2500
-    use_spk_embedding: False # change to True during sft
    max_epoch: 200
    grad_clip: 5
    accum_grad: 2