From 6cebcb3410eff522fbdd1cfbb7041cd3c2c43d59 Mon Sep 17 00:00:00 2001
From: "lyuxiang.lx" <lyuxiang.lx@alibaba-inc.com>
Date: Thu, 11 Jul 2024 13:15:34 +0800
Subject: [PATCH] move use_spk_embedding to processor

---
 cosyvoice/dataset/processor.py                              | 6 +++++-
 cosyvoice/utils/executor.py                                 | 4 ----
 examples/libritts/cosyvoice/conf/cosyvoice.fromscratch.yaml | 2 +-
 examples/libritts/cosyvoice/conf/cosyvoice.yaml             | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/cosyvoice/dataset/processor.py b/cosyvoice/dataset/processor.py
index cb34a0c..11f31c4 100644
--- a/cosyvoice/dataset/processor.py
+++ b/cosyvoice/dataset/processor.py
@@ -308,7 +308,7 @@ def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000, m
             logging.fatal('Unsupported batch type {}'.format(batch_type))
 
 
-def padding(data, mode='train'):
+def padding(data, use_spk_embedding, mode='train'):
     """ Padding the data into training data
 
         Args:
@@ -362,4 +362,8 @@ def padding(data, mode='train'):
                           'tts_index': tts_index,
                           'tts_text_token': tts_text_token,
                           'tts_text_token_len': tts_text_token_len})
+        if use_spk_embedding is True:
+            batch["embedding"] = batch["spk_embedding"]
+        else:
+            batch["embedding"] = batch["utt_embedding"]
         yield batch
diff --git a/cosyvoice/utils/executor.py b/cosyvoice/utils/executor.py
index f7dfb0e..c12e52d 100644
--- a/cosyvoice/utils/executor.py
+++ b/cosyvoice/utils/executor.py
@@ -52,10 +52,6 @@ class Executor:
                 info_dict["batch_idx"] = batch_idx
                 if cosyvoice_join(group_join, info_dict):
                     break
-                if info_dict["use_spk_embedding"] is True:
-                    batch_dict["embedding"] = batch_dict["spk_embedding"]
-                else:
-                    batch_dict["embedding"] = batch_dict["utt_embedding"]
 
                 # Disable gradient synchronizations across DDP processes.
                 # Within this context, gradients will be accumulated on module
diff --git a/examples/libritts/cosyvoice/conf/cosyvoice.fromscratch.yaml b/examples/libritts/cosyvoice/conf/cosyvoice.fromscratch.yaml
index b67b528..30545ff 100644
--- a/examples/libritts/cosyvoice/conf/cosyvoice.fromscratch.yaml
+++ b/examples/libritts/cosyvoice/conf/cosyvoice.fromscratch.yaml
@@ -167,6 +167,7 @@ batch: !name:cosyvoice.dataset.processor.batch
     batch_type: 'dynamic'
     max_frames_in_batch: 12000
 padding: !name:cosyvoice.dataset.processor.padding
+    use_spk_embedding: False # change to True during sft
 
 # dataset processor pipeline
 data_pipeline: [
@@ -190,7 +191,6 @@ train_conf:
     scheduler: warmuplr
     scheduler_conf:
         warmup_steps: 25000
-    use_spk_embedding: False # change to True during sft
     max_epoch: 200
     grad_clip: 5
     accum_grad: 2
diff --git a/examples/libritts/cosyvoice/conf/cosyvoice.yaml b/examples/libritts/cosyvoice/conf/cosyvoice.yaml
index 588086c..f43af16 100644
--- a/examples/libritts/cosyvoice/conf/cosyvoice.yaml
+++ b/examples/libritts/cosyvoice/conf/cosyvoice.yaml
@@ -167,6 +167,7 @@ batch: !name:cosyvoice.dataset.processor.batch
     batch_type: 'dynamic'
     max_frames_in_batch: 2000
 padding: !name:cosyvoice.dataset.processor.padding
+    use_spk_embedding: False # change to True during sft
 
 # dataset processor pipeline
 data_pipeline: [
@@ -190,7 +191,6 @@ train_conf:
     scheduler: warmuplr # change to constantlr during sft
     scheduler_conf:
         warmup_steps: 2500
-    use_spk_embedding: False # change to True during sft
     max_epoch: 200
     grad_clip: 5
     accum_grad: 2