use wav file rather than tensor

This commit is contained in:
lyuxiang.lx
2025-12-08 08:43:09 +00:00
parent d985100326
commit 622a3a19b0
3 changed files with 31 additions and 30 deletions

View File

@@ -41,11 +41,11 @@ def read_json_lists(list_file):
return results
def load_wav(wav, target_sr):
def load_wav(wav, target_sr, min_sr=16000):
speech, sample_rate = torchaudio.load(wav, backend='soundfile')
speech = speech.mean(dim=0, keepdim=True)
if sample_rate != target_sr:
assert sample_rate > target_sr, 'wav sample rate {} must be greater than {}'.format(sample_rate, target_sr)
assert sample_rate >= min_sr, 'wav sample rate {} must be greater than {}'.format(sample_rate, target_sr)
speech = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sr)(speech)
return speech