Use SoX when possible for loading a file with in-place resampling, ffmpeg otherwise.

2026-02-05 18:09:22 +08:00 · 2024-02-15 12:25:22 -05:00
parent bf18ea6b56
commit d391f4c302
1 changed files with 17 additions and 5 deletions
--- a/utils_vad.py
+++ b/utils_vad.py
@@ -122,12 +122,24 @@ class Validator():
 def read_audio(path: str,
               sampling_rate: int = 16000):
-    effects = [
+    if 'sox' in torchaudio.list_available_backends():
-        ['channels', '1'],
+        effects = [
-        ['rate', str(sampling_rate)]
+            ['channels', '1'],
-    ]
+            ['rate', str(sampling_rate)]
        ]
-    wav, sr = torchaudio.sox_effects.apply_effects_file(path, effects=effects)
+        wav, sr = torchaudio.sox_effects.apply_effects_file(path, effects=effects)
    else:
        wav, sr = torchaudio.load(path)
        if wav.size(0) > 1:
            wav = wav.mean(dim=0, keepdim=True)
        if sr != sampling_rate:
            transform = torchaudio.transforms.Resample(orig_freq=sr,
                                                       new_freq=sampling_rate)
            wav = transform(wav)
            sr = sampling_rate
    assert sr == sampling_rate
    return wav.squeeze(0)