Use SoX when possible for loading a file with in-place resampling, ffmpeg otherwise.

This commit is contained in:
Yair Lifshitz
2024-02-15 12:25:22 -05:00
parent bf18ea6b56
commit d391f4c302

View File

@@ -122,12 +122,24 @@ class Validator():
def read_audio(path: str,
sampling_rate: int = 16000):
effects = [
['channels', '1'],
['rate', str(sampling_rate)]
]
if 'sox' in torchaudio.list_available_backends():
effects = [
['channels', '1'],
['rate', str(sampling_rate)]
]
wav, sr = torchaudio.sox_effects.apply_effects_file(path, effects=effects)
wav, sr = torchaudio.sox_effects.apply_effects_file(path, effects=effects)
else:
wav, sr = torchaudio.load(path)
if wav.size(0) > 1:
wav = wav.mean(dim=0, keepdim=True)
if sr != sampling_rate:
transform = torchaudio.transforms.Resample(orig_freq=sr,
new_freq=sampling_rate)
wav = transform(wav)
sr = sampling_rate
assert sr == sampling_rate
return wav.squeeze(0)