diff --git a/hubconf.py b/hubconf.py index 6f157cd..a444dd5 100644 --- a/hubconf.py +++ b/hubconf.py @@ -1,11 +1,13 @@ -dependencies = ['torch', 'omegaconf', 'torchaudio'] +dependencies = ['torch', 'torchaudio', 'numpy'] import torch from omegaconf import OmegaConf -from utils import (init_jit_model, - read_audio, - read_batch, - split_into_batches, - prepare_model_input) +from utils import (init_jit_model, + get_speech_ts, + save_audio, + read_audio, + state_generator, + single_audio_stream, + collect_speeches) def silero_stt(**kwargs): @@ -13,16 +15,15 @@ def silero_stt(**kwargs): Returns a model and a set of utils Please see https://github.com/snakers4/silero-vad for usage examples """ - torch.hub.download_url_to_file('https://raw.githubusercontent.com/snakers4/silero-vad/master/models.yml', - 'silero_vad_models.yml', + torch.hub.download_url_to_file('https://raw.githubusercontent.com/snakers4/silero-vad/master/files/model.jit', + 'files/model.jit', progress=False) - models = OmegaConf.load('silero_vad_models.yml') - - model = init_jit_model(model_url=models.latest.jit, - **kwargs) - utils = (read_batch, - split_into_batches, - read_audio, - prepare_model_input) + model = init_jit_model(model_url='files/model.jit') + utils = (get_speech_ts, + save_audio, + read_audio, + state_generator, + single_audio_stream, + collect_speeches) return model, utils diff --git a/utils.py b/utils.py index a5c00bb..a55d0ba 100644 --- a/utils.py +++ b/utils.py @@ -224,3 +224,9 @@ def single_audio_stream(model, audio, onnx=False, trig_sum=0.26, if state[0]: states.append(state[0]) yield states + +def collect_speeches(tss, wav): + speech_chunks = [] + for i in tss: + speech_chunks.append(wav[i['start']: i['end']]) + return torch.cat(speech_chunks)