diff --git a/README.md b/README.md index aa14365..056b121 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,6 @@ **Silero VAD** - pre-trained enterprise-grade [Voice Activity Detector](https://en.wikipedia.org/wiki/Voice_activity_detection) (also see our [STT models](https://github.com/snakers4/silero-models)). -This repository also includes Number Detector and Language classifier [models](https://github.com/snakers4/silero-vad/wiki/Other-Models) -

@@ -79,7 +77,6 @@ https://user-images.githubusercontent.com/36505480/144874384-95f80f6d-a4f1-42cc- - [Examples and Dependencies](https://github.com/snakers4/silero-vad/wiki/Examples-and-Dependencies#dependencies) - [Quality Metrics](https://github.com/snakers4/silero-vad/wiki/Quality-Metrics) - [Performance Metrics](https://github.com/snakers4/silero-vad/wiki/Performance-Metrics) -- [Number Detector and Language classifier models](https://github.com/snakers4/silero-vad/wiki/Other-Models) - [Versions and Available Models](https://github.com/snakers4/silero-vad/wiki/Version-history-and-Available-Models) - [Further reading](https://github.com/snakers4/silero-models#further-reading) - [FAQ](https://github.com/snakers4/silero-vad/wiki/FAQ) diff --git a/files/silero_vad.jit b/files/silero_vad.jit index 501cb7c..38237dc 100644 Binary files a/files/silero_vad.jit and b/files/silero_vad.jit differ diff --git a/hubconf.py b/hubconf.py index 7aa108d..1f5cefe 100644 --- a/hubconf.py +++ b/hubconf.py @@ -58,6 +58,7 @@ def silero_number_detector(onnx=False, force_onnx_cpu=False): Returns a model with a set of utils Please see https://github.com/snakers4/silero-vad for usage examples """ + raise NotImplementedError('This model has been deprecated and is not supported anymore.') if onnx: url = 'https://models.silero.ai/vad_models/number_detector.onnx' else: @@ -77,6 +78,7 @@ def silero_lang_detector(onnx=False, force_onnx_cpu=False): Returns a model with a set of utils Please see https://github.com/snakers4/silero-vad for usage examples """ + raise NotImplementedError('This model has been deprecated and is not supported anymore.') if onnx: url = 'https://models.silero.ai/vad_models/number_detector.onnx' else: @@ -93,6 +95,7 @@ def silero_lang_detector_95(onnx=False, force_onnx_cpu=False): Returns a model with a set of utils Please see https://github.com/snakers4/silero-vad for usage examples """ + raise NotImplementedError('This model has been deprecated and is not supported anymore.') if onnx: url = 'https://models.silero.ai/vad_models/lang_classifier_95.onnx' else: diff --git a/silero-vad.ipynb b/silero-vad.ipynb index d9c62e8..f3b521d 100644 --- a/silero-vad.ipynb +++ b/silero-vad.ipynb @@ -1,14 +1,5 @@ { "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "FpMplOCA2Fwp" - }, - "source": [ - "#VAD" - ] - }, { "cell_type": "markdown", "metadata": { @@ -170,238 +161,6 @@ "\n", "print(speech_probs[:10]) # first 10 chunks predicts" ] - }, - { - "cell_type": "markdown", - "metadata": { - "heading_collapsed": true, - "id": "36jY0niD2Fww" - }, - "source": [ - "# Number detector" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "heading_collapsed": true, - "hidden": true, - "id": "scd1DlS42Fwx" - }, - "source": [ - "## Install Dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hidden": true, - "id": "Kq5gQuYq2Fwx" - }, - "outputs": [], - "source": [ - "#@title Install and Import Dependencies\n", - "\n", - "# this assumes that you have a relevant version of PyTorch installed\n", - "!pip install -q torchaudio\n", - "\n", - "SAMPLING_RATE = 16000\n", - "\n", - "import torch\n", - "torch.set_num_threads(1)\n", - "\n", - "from IPython.display import Audio\n", - "from pprint import pprint\n", - "# download example\n", - "torch.hub.download_url_to_file('https://models.silero.ai/vad_models/en_num.wav', 'en_number_example.wav')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "dPwCFHmFycUF" - }, - "outputs": [], - "source": [ - "USE_ONNX = False # change this to True if you want to test onnx model\n", - "if USE_ONNX:\n", - " !pip install -q onnxruntime\n", - " \n", - "model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',\n", - " model='silero_number_detector',\n", - " force_reload=True,\n", - " onnx=USE_ONNX)\n", - "\n", - "(get_number_ts,\n", - " save_audio,\n", - " read_audio,\n", - " collect_chunks,\n", - " drop_chunks) = utils\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "heading_collapsed": true, - "hidden": true, - "id": "qhPa30ij2Fwy" - }, - "source": [ - "## Full audio" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hidden": true, - "id": "EXpau6xq2Fwy" - }, - "outputs": [], - "source": [ - "wav = read_audio('en_number_example.wav', sampling_rate=SAMPLING_RATE)\n", - "# get number timestamps from full audio file\n", - "number_timestamps = get_number_ts(wav, model)\n", - "pprint(number_timestamps)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hidden": true, - "id": "u-KfXRhZ2Fwy" - }, - "outputs": [], - "source": [ - "# convert ms in timestamps to samples\n", - "for timestamp in number_timestamps:\n", - " timestamp['start'] = int(timestamp['start'] * SAMPLING_RATE / 1000)\n", - " timestamp['end'] = int(timestamp['end'] * SAMPLING_RATE / 1000)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hidden": true, - "id": "iwYEC4aZ2Fwy" - }, - "outputs": [], - "source": [ - "# merge all number chunks to one audio\n", - "save_audio('only_numbers.wav',\n", - " collect_chunks(number_timestamps, wav), SAMPLING_RATE) \n", - "Audio('only_numbers.wav')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hidden": true, - "id": "fHaYejX12Fwy" - }, - "outputs": [], - "source": [ - "# drop all number chunks from audio\n", - "save_audio('no_numbers.wav',\n", - " drop_chunks(number_timestamps, wav), SAMPLING_RATE) \n", - "Audio('no_numbers.wav')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "heading_collapsed": true, - "id": "PnKtJKbq2Fwz" - }, - "source": [ - "# Language detector" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "heading_collapsed": true, - "hidden": true, - "id": "F5cAmMbP2Fwz" - }, - "source": [ - "## Install Dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hidden": true, - "id": "Zu9D0t6n2Fwz" - }, - "outputs": [], - "source": [ - "#@title Install and Import Dependencies\n", - "\n", - "# this assumes that you have a relevant version of PyTorch installed\n", - "!pip install -q torchaudio\n", - "\n", - "SAMPLING_RATE = 16000\n", - "\n", - "import torch\n", - "torch.set_num_threads(1)\n", - "\n", - "from IPython.display import Audio\n", - "from pprint import pprint\n", - "# download example\n", - "torch.hub.download_url_to_file('https://models.silero.ai/vad_models/en.wav', 'en_example.wav')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "JfRKDZiRztFe" - }, - "outputs": [], - "source": [ - "USE_ONNX = False # change this to True if you want to test onnx model\n", - "if USE_ONNX:\n", - " !pip install -q onnxruntime\n", - " \n", - "model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',\n", - " model='silero_lang_detector',\n", - " force_reload=True,\n", - " onnx=USE_ONNX)\n", - "\n", - "get_language, read_audio = utils" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "heading_collapsed": true, - "hidden": true, - "id": "iC696eMX2Fwz" - }, - "source": [ - "## Full audio" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hidden": true, - "id": "c8UYnYBF2Fw0" - }, - "outputs": [], - "source": [ - "wav = read_audio('en_example.wav', sampling_rate=SAMPLING_RATE)\n", - "lang = get_language(wav, model)\n", - "print(lang)" - ] } ], "metadata": { diff --git a/utils_vad.py b/utils_vad.py index 83d97e7..90c00ef 100644 --- a/utils_vad.py +++ b/utils_vad.py @@ -13,11 +13,11 @@ class OnnxWrapper(): import numpy as np global np import onnxruntime - + opts = onnxruntime.SessionOptions() opts.inter_op_num_threads = 1 opts.intra_op_num_threads = 1 - + if force_onnx_cpu and 'CPUExecutionProvider' in onnxruntime.get_available_providers(): self.session = onnxruntime.InferenceSession(path, providers=['CPUExecutionProvider'], sess_options=opts) else: @@ -291,7 +291,7 @@ def get_speech_timestamps(audio: torch.Tensor, triggered = True current_speech['start'] = window_size_samples * i continue - + if triggered and (window_size_samples * i) - current_speech['start'] > max_speech_samples: if prev_end: current_speech['end'] = prev_end @@ -309,7 +309,6 @@ def get_speech_timestamps(audio: torch.Tensor, prev_end = next_start = temp_end = 0 triggered = False continue - if (speech_prob < neg_threshold) and triggered: if not temp_end: