move use_spk_embedding to processor

This commit is contained in:
lyuxiang.lx
2024-07-11 13:15:34 +08:00
parent 0fd15bb12b
commit 6cebcb3410
4 changed files with 7 additions and 7 deletions

View File

@@ -308,7 +308,7 @@ def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000, m
logging.fatal('Unsupported batch type {}'.format(batch_type)) logging.fatal('Unsupported batch type {}'.format(batch_type))
def padding(data, mode='train'): def padding(data, use_spk_embedding, mode='train'):
""" Padding the data into training data """ Padding the data into training data
Args: Args:
@@ -362,4 +362,8 @@ def padding(data, mode='train'):
'tts_index': tts_index, 'tts_index': tts_index,
'tts_text_token': tts_text_token, 'tts_text_token': tts_text_token,
'tts_text_token_len': tts_text_token_len}) 'tts_text_token_len': tts_text_token_len})
if use_spk_embedding is True:
batch["embedding"] = batch["spk_embedding"]
else:
batch["embedding"] = batch["utt_embedding"]
yield batch yield batch

View File

@@ -52,10 +52,6 @@ class Executor:
info_dict["batch_idx"] = batch_idx info_dict["batch_idx"] = batch_idx
if cosyvoice_join(group_join, info_dict): if cosyvoice_join(group_join, info_dict):
break break
if info_dict["use_spk_embedding"] is True:
batch_dict["embedding"] = batch_dict["spk_embedding"]
else:
batch_dict["embedding"] = batch_dict["utt_embedding"]
# Disable gradient synchronizations across DDP processes. # Disable gradient synchronizations across DDP processes.
# Within this context, gradients will be accumulated on module # Within this context, gradients will be accumulated on module

View File

@@ -167,6 +167,7 @@ batch: !name:cosyvoice.dataset.processor.batch
batch_type: 'dynamic' batch_type: 'dynamic'
max_frames_in_batch: 12000 max_frames_in_batch: 12000
padding: !name:cosyvoice.dataset.processor.padding padding: !name:cosyvoice.dataset.processor.padding
use_spk_embedding: False # change to True during sft
# dataset processor pipeline # dataset processor pipeline
data_pipeline: [ data_pipeline: [
@@ -190,7 +191,6 @@ train_conf:
scheduler: warmuplr scheduler: warmuplr
scheduler_conf: scheduler_conf:
warmup_steps: 25000 warmup_steps: 25000
use_spk_embedding: False # change to True during sft
max_epoch: 200 max_epoch: 200
grad_clip: 5 grad_clip: 5
accum_grad: 2 accum_grad: 2

View File

@@ -167,6 +167,7 @@ batch: !name:cosyvoice.dataset.processor.batch
batch_type: 'dynamic' batch_type: 'dynamic'
max_frames_in_batch: 2000 max_frames_in_batch: 2000
padding: !name:cosyvoice.dataset.processor.padding padding: !name:cosyvoice.dataset.processor.padding
use_spk_embedding: False # change to True during sft
# dataset processor pipeline # dataset processor pipeline
data_pipeline: [ data_pipeline: [
@@ -190,7 +191,6 @@ train_conf:
scheduler: warmuplr # change to constantlr during sft scheduler: warmuplr # change to constantlr during sft
scheduler_conf: scheduler_conf:
warmup_steps: 2500 warmup_steps: 2500
use_spk_embedding: False # change to True during sft
max_epoch: 200 max_epoch: 200
grad_clip: 5 grad_clip: 5
accum_grad: 2 accum_grad: 2