mirror of
https://github.com/FunAudioLLM/CosyVoice.git
synced 2026-02-04 17:39:25 +08:00
move use_spk_embedding to processor
This commit is contained in:
@@ -308,7 +308,7 @@ def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000, m
|
|||||||
logging.fatal('Unsupported batch type {}'.format(batch_type))
|
logging.fatal('Unsupported batch type {}'.format(batch_type))
|
||||||
|
|
||||||
|
|
||||||
def padding(data, mode='train'):
|
def padding(data, use_spk_embedding, mode='train'):
|
||||||
""" Padding the data into training data
|
""" Padding the data into training data
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -362,4 +362,8 @@ def padding(data, mode='train'):
|
|||||||
'tts_index': tts_index,
|
'tts_index': tts_index,
|
||||||
'tts_text_token': tts_text_token,
|
'tts_text_token': tts_text_token,
|
||||||
'tts_text_token_len': tts_text_token_len})
|
'tts_text_token_len': tts_text_token_len})
|
||||||
|
if use_spk_embedding is True:
|
||||||
|
batch["embedding"] = batch["spk_embedding"]
|
||||||
|
else:
|
||||||
|
batch["embedding"] = batch["utt_embedding"]
|
||||||
yield batch
|
yield batch
|
||||||
|
|||||||
@@ -52,10 +52,6 @@ class Executor:
|
|||||||
info_dict["batch_idx"] = batch_idx
|
info_dict["batch_idx"] = batch_idx
|
||||||
if cosyvoice_join(group_join, info_dict):
|
if cosyvoice_join(group_join, info_dict):
|
||||||
break
|
break
|
||||||
if info_dict["use_spk_embedding"] is True:
|
|
||||||
batch_dict["embedding"] = batch_dict["spk_embedding"]
|
|
||||||
else:
|
|
||||||
batch_dict["embedding"] = batch_dict["utt_embedding"]
|
|
||||||
|
|
||||||
# Disable gradient synchronizations across DDP processes.
|
# Disable gradient synchronizations across DDP processes.
|
||||||
# Within this context, gradients will be accumulated on module
|
# Within this context, gradients will be accumulated on module
|
||||||
|
|||||||
@@ -167,6 +167,7 @@ batch: !name:cosyvoice.dataset.processor.batch
|
|||||||
batch_type: 'dynamic'
|
batch_type: 'dynamic'
|
||||||
max_frames_in_batch: 12000
|
max_frames_in_batch: 12000
|
||||||
padding: !name:cosyvoice.dataset.processor.padding
|
padding: !name:cosyvoice.dataset.processor.padding
|
||||||
|
use_spk_embedding: False # change to True during sft
|
||||||
|
|
||||||
# dataset processor pipeline
|
# dataset processor pipeline
|
||||||
data_pipeline: [
|
data_pipeline: [
|
||||||
@@ -190,7 +191,6 @@ train_conf:
|
|||||||
scheduler: warmuplr
|
scheduler: warmuplr
|
||||||
scheduler_conf:
|
scheduler_conf:
|
||||||
warmup_steps: 25000
|
warmup_steps: 25000
|
||||||
use_spk_embedding: False # change to True during sft
|
|
||||||
max_epoch: 200
|
max_epoch: 200
|
||||||
grad_clip: 5
|
grad_clip: 5
|
||||||
accum_grad: 2
|
accum_grad: 2
|
||||||
|
|||||||
@@ -167,6 +167,7 @@ batch: !name:cosyvoice.dataset.processor.batch
|
|||||||
batch_type: 'dynamic'
|
batch_type: 'dynamic'
|
||||||
max_frames_in_batch: 2000
|
max_frames_in_batch: 2000
|
||||||
padding: !name:cosyvoice.dataset.processor.padding
|
padding: !name:cosyvoice.dataset.processor.padding
|
||||||
|
use_spk_embedding: False # change to True during sft
|
||||||
|
|
||||||
# dataset processor pipeline
|
# dataset processor pipeline
|
||||||
data_pipeline: [
|
data_pipeline: [
|
||||||
@@ -190,7 +191,6 @@ train_conf:
|
|||||||
scheduler: warmuplr # change to constantlr during sft
|
scheduler: warmuplr # change to constantlr during sft
|
||||||
scheduler_conf:
|
scheduler_conf:
|
||||||
warmup_steps: 2500
|
warmup_steps: 2500
|
||||||
use_spk_embedding: False # change to True during sft
|
|
||||||
max_epoch: 200
|
max_epoch: 200
|
||||||
grad_clip: 5
|
grad_clip: 5
|
||||||
accum_grad: 2
|
accum_grad: 2
|
||||||
|
|||||||
Reference in New Issue
Block a user