[feature] modify pad to trim

Conflicts: cosyvoice/dataset/processor.py
2026-02-05 18:09:24 +08:00 · 2025-04-25 10:31:43 +08:00
parent 97f0bc61cd
commit fbab274b6a
1 changed files with 8 additions and 2 deletions
--- a/cosyvoice/dataset/processor.py
+++ b/cosyvoice/dataset/processor.py
@@ -159,6 +159,7 @@ def truncate(data, truncate_length=24576, mode='train'):
 def compute_fbank(data,
                  feat_extractor,
                  token_mel_ratio=0,
                  mode='train'):
    """ Extract fbank
@@ -174,8 +175,13 @@ def compute_fbank(data,
        assert 'utt' in sample
        assert 'text_token' in sample
        waveform = sample['speech']
-        mat = feat_extractor(waveform).squeeze(dim=0).transpose(0, 1)
+        feat = feat_extractor(waveform).squeeze(dim=0).transpose(0, 1)
-        sample['speech_feat'] = mat
+        if token_mel_ratio != 0:
            # trim to align speech_token and speech_feat
            token_len = int(min(feat.shape[0] / token_mel_ratio, sample["speech_token"].shape[0]))
            feat = feat[:token_mel_ratio * token_len]
            sample["speech_token"] = sample["speech_token"][:token_len]
        sample['speech_feat'] = feat
        yield sample