mirror of
https://github.com/FunAudioLLM/CosyVoice.git
synced 2026-02-05 18:09:24 +08:00
[feature] modify pad to trim
Conflicts: cosyvoice/dataset/processor.py
This commit is contained in:
@@ -159,6 +159,7 @@ def truncate(data, truncate_length=24576, mode='train'):
|
|||||||
|
|
||||||
def compute_fbank(data,
|
def compute_fbank(data,
|
||||||
feat_extractor,
|
feat_extractor,
|
||||||
|
token_mel_ratio=0,
|
||||||
mode='train'):
|
mode='train'):
|
||||||
""" Extract fbank
|
""" Extract fbank
|
||||||
|
|
||||||
@@ -174,8 +175,13 @@ def compute_fbank(data,
|
|||||||
assert 'utt' in sample
|
assert 'utt' in sample
|
||||||
assert 'text_token' in sample
|
assert 'text_token' in sample
|
||||||
waveform = sample['speech']
|
waveform = sample['speech']
|
||||||
mat = feat_extractor(waveform).squeeze(dim=0).transpose(0, 1)
|
feat = feat_extractor(waveform).squeeze(dim=0).transpose(0, 1)
|
||||||
sample['speech_feat'] = mat
|
if token_mel_ratio != 0:
|
||||||
|
# trim to align speech_token and speech_feat
|
||||||
|
token_len = int(min(feat.shape[0] / token_mel_ratio, sample["speech_token"].shape[0]))
|
||||||
|
feat = feat[:token_mel_ratio * token_len]
|
||||||
|
sample["speech_token"] = sample["speech_token"][:token_len]
|
||||||
|
sample['speech_feat'] = feat
|
||||||
yield sample
|
yield sample
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user