diff --git a/silero-vad.ipynb b/silero-vad.ipynb index d9c62e8..f3b521d 100644 --- a/silero-vad.ipynb +++ b/silero-vad.ipynb @@ -1,14 +1,5 @@ { "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "FpMplOCA2Fwp" - }, - "source": [ - "#VAD" - ] - }, { "cell_type": "markdown", "metadata": { @@ -170,238 +161,6 @@ "\n", "print(speech_probs[:10]) # first 10 chunks predicts" ] - }, - { - "cell_type": "markdown", - "metadata": { - "heading_collapsed": true, - "id": "36jY0niD2Fww" - }, - "source": [ - "# Number detector" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "heading_collapsed": true, - "hidden": true, - "id": "scd1DlS42Fwx" - }, - "source": [ - "## Install Dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hidden": true, - "id": "Kq5gQuYq2Fwx" - }, - "outputs": [], - "source": [ - "#@title Install and Import Dependencies\n", - "\n", - "# this assumes that you have a relevant version of PyTorch installed\n", - "!pip install -q torchaudio\n", - "\n", - "SAMPLING_RATE = 16000\n", - "\n", - "import torch\n", - "torch.set_num_threads(1)\n", - "\n", - "from IPython.display import Audio\n", - "from pprint import pprint\n", - "# download example\n", - "torch.hub.download_url_to_file('https://models.silero.ai/vad_models/en_num.wav', 'en_number_example.wav')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "dPwCFHmFycUF" - }, - "outputs": [], - "source": [ - "USE_ONNX = False # change this to True if you want to test onnx model\n", - "if USE_ONNX:\n", - " !pip install -q onnxruntime\n", - " \n", - "model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',\n", - " model='silero_number_detector',\n", - " force_reload=True,\n", - " onnx=USE_ONNX)\n", - "\n", - "(get_number_ts,\n", - " save_audio,\n", - " read_audio,\n", - " collect_chunks,\n", - " drop_chunks) = utils\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "heading_collapsed": true, - "hidden": true, - "id": "qhPa30ij2Fwy" - }, - "source": [ - "## Full audio" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hidden": true, - "id": "EXpau6xq2Fwy" - }, - "outputs": [], - "source": [ - "wav = read_audio('en_number_example.wav', sampling_rate=SAMPLING_RATE)\n", - "# get number timestamps from full audio file\n", - "number_timestamps = get_number_ts(wav, model)\n", - "pprint(number_timestamps)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hidden": true, - "id": "u-KfXRhZ2Fwy" - }, - "outputs": [], - "source": [ - "# convert ms in timestamps to samples\n", - "for timestamp in number_timestamps:\n", - " timestamp['start'] = int(timestamp['start'] * SAMPLING_RATE / 1000)\n", - " timestamp['end'] = int(timestamp['end'] * SAMPLING_RATE / 1000)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hidden": true, - "id": "iwYEC4aZ2Fwy" - }, - "outputs": [], - "source": [ - "# merge all number chunks to one audio\n", - "save_audio('only_numbers.wav',\n", - " collect_chunks(number_timestamps, wav), SAMPLING_RATE) \n", - "Audio('only_numbers.wav')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hidden": true, - "id": "fHaYejX12Fwy" - }, - "outputs": [], - "source": [ - "# drop all number chunks from audio\n", - "save_audio('no_numbers.wav',\n", - " drop_chunks(number_timestamps, wav), SAMPLING_RATE) \n", - "Audio('no_numbers.wav')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "heading_collapsed": true, - "id": "PnKtJKbq2Fwz" - }, - "source": [ - "# Language detector" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "heading_collapsed": true, - "hidden": true, - "id": "F5cAmMbP2Fwz" - }, - "source": [ - "## Install Dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hidden": true, - "id": "Zu9D0t6n2Fwz" - }, - "outputs": [], - "source": [ - "#@title Install and Import Dependencies\n", - "\n", - "# this assumes that you have a relevant version of PyTorch installed\n", - "!pip install -q torchaudio\n", - "\n", - "SAMPLING_RATE = 16000\n", - "\n", - "import torch\n", - "torch.set_num_threads(1)\n", - "\n", - "from IPython.display import Audio\n", - "from pprint import pprint\n", - "# download example\n", - "torch.hub.download_url_to_file('https://models.silero.ai/vad_models/en.wav', 'en_example.wav')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "JfRKDZiRztFe" - }, - "outputs": [], - "source": [ - "USE_ONNX = False # change this to True if you want to test onnx model\n", - "if USE_ONNX:\n", - " !pip install -q onnxruntime\n", - " \n", - "model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',\n", - " model='silero_lang_detector',\n", - " force_reload=True,\n", - " onnx=USE_ONNX)\n", - "\n", - "get_language, read_audio = utils" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "heading_collapsed": true, - "hidden": true, - "id": "iC696eMX2Fwz" - }, - "source": [ - "## Full audio" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hidden": true, - "id": "c8UYnYBF2Fw0" - }, - "outputs": [], - "source": [ - "wav = read_audio('en_example.wav', sampling_rate=SAMPLING_RATE)\n", - "lang = get_language(wav, model)\n", - "print(lang)" - ] } ], "metadata": {