From d391f4c302f5b954cd727fe33c53ff075d87e854 Mon Sep 17 00:00:00 2001 From: Yair Lifshitz Date: Thu, 15 Feb 2024 12:25:22 -0500 Subject: [PATCH] Use SoX when possible for loading a file with in-place resampling, ffmpeg otherwise. --- utils_vad.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/utils_vad.py b/utils_vad.py index 7ae152b..9a81cef 100644 --- a/utils_vad.py +++ b/utils_vad.py @@ -122,12 +122,24 @@ class Validator(): def read_audio(path: str, sampling_rate: int = 16000): - effects = [ - ['channels', '1'], - ['rate', str(sampling_rate)] - ] + if 'sox' in torchaudio.list_available_backends(): + effects = [ + ['channels', '1'], + ['rate', str(sampling_rate)] + ] - wav, sr = torchaudio.sox_effects.apply_effects_file(path, effects=effects) + wav, sr = torchaudio.sox_effects.apply_effects_file(path, effects=effects) + else: + wav, sr = torchaudio.load(path) + + if wav.size(0) > 1: + wav = wav.mean(dim=0, keepdim=True) + + if sr != sampling_rate: + transform = torchaudio.transforms.Resample(orig_freq=sr, + new_freq=sampling_rate) + wav = transform(wav) + sr = sampling_rate assert sr == sampling_rate return wav.squeeze(0)