fx

2026-02-05 18:09:22 +08:00 · 2020-12-15 12:34:51 +00:00
parent 557a32ed1b
commit d5a30a8664
2 changed files with 23 additions and 16 deletions
--- a/hubconf.py
+++ b/hubconf.py
@@ -1,11 +1,13 @@
-dependencies = ['torch', 'omegaconf', 'torchaudio']
+dependencies = ['torch', 'torchaudio', 'numpy']
 import torch
 from omegaconf import OmegaConf
-from utils import (init_jit_model,
+from utils import (init_jit_model, 
-                   read_audio,
+                   get_speech_ts,
-                   read_batch,
+                   save_audio, 
-                   split_into_batches,
+                   read_audio, 
-                   prepare_model_input)
+                   state_generator, 
                   single_audio_stream,
                   collect_speeches)
 def silero_stt(**kwargs):
@@ -13,16 +15,15 @@ def silero_stt(**kwargs):
    Returns a model and a set of utils
    Please see https://github.com/snakers4/silero-vad for usage examples
    """
-    torch.hub.download_url_to_file('https://raw.githubusercontent.com/snakers4/silero-vad/master/models.yml',
+    torch.hub.download_url_to_file('https://raw.githubusercontent.com/snakers4/silero-vad/master/files/model.jit',
-                                   'silero_vad_models.yml',
+                                   'files/model.jit',
                                   progress=False)
-    models = OmegaConf.load('silero_vad_models.yml')
+    model = init_jit_model(model_url='files/model.jit')
-
+    utils = (get_speech_ts,
-    model = init_jit_model(model_url=models.latest.jit,
+             save_audio, 
-                           **kwargs)
+             read_audio, 
-    utils = (read_batch,
+             state_generator, 
-             split_into_batches,
+             single_audio_stream,
-             read_audio,
+             collect_speeches)
             prepare_model_input)
    return model, utils
--- a/utils.py
+++ b/utils.py
@@ -224,3 +224,9 @@ def single_audio_stream(model, audio, onnx=False, trig_sum=0.26,
        if state[0]:
            states.append(state[0])
        yield states
 def collect_speeches(tss, wav):
    speech_chunks = []
    for i in tss:
        speech_chunks.append(wav[i['start']: i['end']])
    return torch.cat(speech_chunks)