From fbbbf1249777bef14aaddbc2a104a37cd61783a9 Mon Sep 17 00:00:00 2001 From: adamnsandle Date: Tue, 15 Dec 2020 13:11:11 +0000 Subject: [PATCH] fx notebook --- silero-vad.ipynb | 204 ++++++++++++++++++++--------------------------- 1 file changed, 87 insertions(+), 117 deletions(-) diff --git a/silero-vad.ipynb b/silero-vad.ipynb index 0bfec50..b0af378 100644 --- a/silero-vad.ipynb +++ b/silero-vad.ipynb @@ -7,26 +7,42 @@ "# Jit example" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -q ipython # For jupyter audio display" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-15T11:54:25.940761Z", - "start_time": "2020-12-15T11:54:25.933842Z" + "end_time": "2020-12-15T13:09:54.623434Z", + "start_time": "2020-12-15T13:09:54.241855Z" } }, "outputs": [], "source": [ - "# imports\n", + "# dependencies\n", "import glob\n", "import torch\n", - "from IPython.display import Audio\n", "torch.set_num_threads(1)\n", + "from IPython.display import Audio\n", "\n", - "from utils import (init_jit_model, get_speech_ts,\n", - " save_audio, read_audio, \n", - " state_generator, single_audio_stream)" + "model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',\n", + " model='silero_vad')\n", + "\n", + "\n", + "(get_speech_ts,\n", + " save_audio,\n", + " read_audio,\n", + " state_generator,\n", + " single_audio_stream,\n", + " collect_speeches) = utils" ] }, { @@ -41,40 +57,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-15T11:54:27.939388Z", - "start_time": "2020-12-15T11:54:27.936636Z" - } - }, - "outputs": [], - "source": [ - "def collect_speeches(tss, wav):\n", - " speech_chunks = []\n", - " for i in tss:\n", - " speech_chunks.append(wav[i['start']: i['end']])\n", - " return torch.cat(speech_chunks)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-15T11:54:28.415177Z", - "start_time": "2020-12-15T11:54:28.231677Z" - } - }, - "outputs": [], - "source": [ - "model = init_jit_model('files/model.jit', 'cpu')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-15T11:54:28.560822Z", - "start_time": "2020-12-15T11:54:28.549811Z" + "end_time": "2020-12-15T13:09:56.879818Z", + "start_time": "2020-12-15T13:09:56.864765Z" } }, "outputs": [], @@ -87,8 +71,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-15T11:54:30.088721Z", - "start_time": "2020-12-15T11:54:29.019358Z" + "end_time": "2020-12-15T13:09:58.876034Z", + "start_time": "2020-12-15T13:09:57.139254Z" } }, "outputs": [], @@ -101,8 +85,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-15T11:54:30.198484Z", - "start_time": "2020-12-15T11:54:30.188311Z" + "end_time": "2020-12-15T13:09:58.885802Z", + "start_time": "2020-12-15T13:09:58.877327Z" } }, "outputs": [], @@ -115,8 +99,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-15T11:54:30.816893Z", - "start_time": "2020-12-15T11:54:30.782667Z" + "end_time": "2020-12-15T13:09:58.941063Z", + "start_time": "2020-12-15T13:09:58.887006Z" } }, "outputs": [], @@ -137,13 +121,12 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-15T11:54:31.886189Z", - "start_time": "2020-12-15T11:54:31.572194Z" + "end_time": "2020-12-15T13:09:59.199321Z", + "start_time": "2020-12-15T13:09:59.196823Z" } }, "outputs": [], "source": [ - "model = init_jit_model('files/model.jit', 'cpu')\n", "wav = 'files/en.wav'" ] }, @@ -152,15 +135,15 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-15T11:54:35.624279Z", - "start_time": "2020-12-15T11:54:32.049532Z" + "end_time": "2020-12-15T13:10:03.585644Z", + "start_time": "2020-12-15T13:09:59.429757Z" } }, "outputs": [], "source": [ - "for i in single_audio_stream(model, wav):\n", - " if i:\n", - " print(i)" + "for batch in single_audio_stream(model, wav):\n", + " if batch:\n", + " print(batch)" ] }, { @@ -175,22 +158,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-15T11:40:13.406225Z", - "start_time": "2020-12-15T11:40:13.206354Z" - } - }, - "outputs": [], - "source": [ - "model = init_jit_model('files/model.jit', 'cpu')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-15T11:41:08.470917Z", - "start_time": "2020-12-15T11:41:08.467369Z" + "end_time": "2020-12-15T13:10:03.590358Z", + "start_time": "2020-12-15T13:10:03.587071Z" } }, "outputs": [], @@ -204,24 +173,17 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-15T11:41:25.685356Z", - "start_time": "2020-12-15T11:41:16.222672Z" + "end_time": "2020-12-15T13:10:15.762491Z", + "start_time": "2020-12-15T13:10:03.591388Z" } }, "outputs": [], "source": [ - "for i in state_generator(model, audios_for_stream, audios_in_stream=2): # 2 audio stream\n", - " if i:\n", - " print(i)" + "for batch in state_generator(model, audios_for_stream, audios_in_stream=2): # 2 audio stream\n", + " if batch:\n", + " print(batch)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -229,18 +191,28 @@ "# Onnx example" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -q ipython # For jupyter audio display\n", + "!pip install -q onnxruntime" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-15T11:55:45.597504Z", - "start_time": "2020-12-15T11:55:45.582356Z" + "end_time": "2020-12-15T13:09:05.932256Z", + "start_time": "2020-12-15T13:09:05.043659Z" } }, "outputs": [], "source": [ - "# imports\n", + "# dependencies\n", "import glob\n", "import torch\n", "from IPython.display import Audio\n", @@ -248,7 +220,7 @@ "import onnxruntime\n", "\n", "from utils import (get_speech_ts, save_audio, read_audio, \n", - " state_generator, single_audio_stream)\n", + " state_generator, single_audio_stream, collect_speeches)\n", "\n", "def init_onnx_model(model_path: str):\n", " return onnxruntime.InferenceSession(model_path)\n", @@ -273,8 +245,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-15T11:55:56.874376Z", - "start_time": "2020-12-15T11:55:56.782230Z" + "end_time": "2020-12-15T13:09:06.643812Z", + "start_time": "2020-12-15T13:09:06.473386Z" } }, "outputs": [], @@ -288,8 +260,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-15T11:56:12.159463Z", - "start_time": "2020-12-15T11:56:11.446991Z" + "end_time": "2020-12-15T13:09:08.094414Z", + "start_time": "2020-12-15T13:09:07.073253Z" } }, "outputs": [], @@ -302,8 +274,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-15T11:56:20.488863Z", - "start_time": "2020-12-15T11:56:20.485485Z" + "end_time": "2020-12-15T13:09:08.107584Z", + "start_time": "2020-12-15T13:09:08.096550Z" } }, "outputs": [], @@ -316,8 +288,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-15T11:56:27.908128Z", - "start_time": "2020-12-15T11:56:27.870978Z" + "end_time": "2020-12-15T13:09:08.862421Z", + "start_time": "2020-12-15T13:09:08.820014Z" } }, "outputs": [], @@ -338,8 +310,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-15T11:58:09.012892Z", - "start_time": "2020-12-15T11:58:08.940907Z" + "end_time": "2020-12-15T13:09:09.606031Z", + "start_time": "2020-12-15T13:09:09.504239Z" } }, "outputs": [], @@ -353,15 +325,15 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-15T11:58:11.562186Z", - "start_time": "2020-12-15T11:58:09.949825Z" + "end_time": "2020-12-15T13:09:11.453171Z", + "start_time": "2020-12-15T13:09:09.633435Z" } }, "outputs": [], "source": [ - "for i in single_audio_stream(model, wav, run_function=validate_onnx):\n", - " if i:\n", - " print(i)" + "for batch in single_audio_stream(model, wav, run_function=validate_onnx):\n", + " if batch:\n", + " print(batch)" ] }, { @@ -374,7 +346,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2020-12-15T13:09:11.540423Z", + "start_time": "2020-12-15T13:09:11.455706Z" + } + }, "outputs": [], "source": [ "model = init_onnx_model('files/model.onnx')" @@ -385,8 +362,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-15T11:59:09.381687Z", - "start_time": "2020-12-15T11:59:09.378552Z" + "end_time": "2020-12-15T13:09:11.550815Z", + "start_time": "2020-12-15T13:09:11.542954Z" } }, "outputs": [], @@ -400,23 +377,16 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-15T11:59:27.712905Z", - "start_time": "2020-12-15T11:59:21.608435Z" + "end_time": "2020-12-15T13:09:19.565434Z", + "start_time": "2020-12-15T13:09:11.552097Z" } }, "outputs": [], "source": [ - "for i in state_generator(model, audios_for_stream, audios_in_stream=2, run_function=validate_onnx): # 2 audio stream\n", - " if i:\n", - " print(i)" + "for batch in state_generator(model, audios_for_stream, audios_in_stream=2, run_function=validate_onnx): # 2 audio stream\n", + " if batch:\n", + " print(batch)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": {