diff --git a/files/model_micro.jit b/files/model_micro.jit new file mode 100644 index 0000000..ec76fca Binary files /dev/null and b/files/model_micro.jit differ diff --git a/files/model_micro.onnx b/files/model_micro.onnx new file mode 100644 index 0000000..1f36b09 Binary files /dev/null and b/files/model_micro.onnx differ diff --git a/hubconf.py b/hubconf.py index 6b8f056..5a515ca 100644 --- a/hubconf.py +++ b/hubconf.py @@ -29,6 +29,23 @@ def silero_vad(**kwargs): return model, utils +def silero_vad_micro(**kwargs): + """Silero Voice Activity Detector + Returns a model with a set of utils + Please see https://github.com/snakers4/silero-vad for usage examples + """ + hub_dir = torch.hub.get_dir() + model = init_jit_model(model_path=f'{hub_dir}/snakers4_silero-vad_master/files/model_micro.jit') + utils = (get_speech_ts, + save_audio, + read_audio, + state_generator, + single_audio_stream, + collect_chunks) + + return model, utils + + def silero_number_detector(**kwargs): """Silero Number Detector Returns a model with a set of utils diff --git a/utils_vad.py b/utils_vad.py index 258001b..30f2408 100644 --- a/utils_vad.py +++ b/utils_vad.py @@ -4,6 +4,7 @@ from typing import List from itertools import repeat from collections import deque import torch.nn.functional as F +import time torchaudio.set_audio_backend("soundfile") # switch backend @@ -60,7 +61,7 @@ def get_speech_ts(wav: torch.Tensor, batch_size: int = 200, num_samples_per_window: int = 4000, min_speech_samples: int = 10000, #samples - min_silence_samples: int = 8000, + min_silence_samples: int = 500, run_function=validate, visualize_probs=False): @@ -308,6 +309,7 @@ def single_audio_stream(model, wav = read_audio(audio) wav_chunks = iter([wav[i:i+num_samples] for i in range(0, len(wav), num_samples)]) for chunk in wav_chunks: + time.sleep(0.1) batch = VADiter.prepare_batch(chunk) outs = run_function(model, batch)