{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Jit example" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:54:25.940761Z", "start_time": "2020-12-15T11:54:25.933842Z" } }, "outputs": [], "source": [ "# imports\n", "import glob\n", "import torch\n", "from IPython.display import Audio\n", "torch.set_num_threads(1)\n", "\n", "from utils import (init_jit_model, get_speech_ts,\n", " save_audio, read_audio, \n", " state_generator, single_audio_stream)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Full audio" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:54:27.939388Z", "start_time": "2020-12-15T11:54:27.936636Z" } }, "outputs": [], "source": [ "def collect_speeches(tss, wav):\n", " speech_chunks = []\n", " for i in tss:\n", " speech_chunks.append(wav[i['start']: i['end']])\n", " return torch.cat(speech_chunks)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:54:28.415177Z", "start_time": "2020-12-15T11:54:28.231677Z" } }, "outputs": [], "source": [ "model = init_jit_model('files/model.jit', 'cpu')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:54:28.560822Z", "start_time": "2020-12-15T11:54:28.549811Z" } }, "outputs": [], "source": [ "wav = read_audio('files/en.wav')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:54:30.088721Z", "start_time": "2020-12-15T11:54:29.019358Z" } }, "outputs": [], "source": [ "speech_timestamps = get_speech_ts(wav, model, num_steps=4) # get speech timestamps from full audio file" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:54:30.198484Z", "start_time": "2020-12-15T11:54:30.188311Z" } }, "outputs": [], "source": [ "speech_timestamps" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:54:30.816893Z", "start_time": "2020-12-15T11:54:30.782667Z" } }, "outputs": [], "source": [ "save_audio('only_speech.wav', collect_speeches(speech_timestamps, wav), 16000) # merge all speech chunks to one audio\n", "Audio('only_speech.wav')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Single audio stream" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:54:31.886189Z", "start_time": "2020-12-15T11:54:31.572194Z" } }, "outputs": [], "source": [ "model = init_jit_model('files/model.jit', 'cpu')\n", "wav = 'files/en.wav'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:54:35.624279Z", "start_time": "2020-12-15T11:54:32.049532Z" } }, "outputs": [], "source": [ "for i in single_audio_stream(model, wav):\n", " if i:\n", " print(i)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Multiple audio stream" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:40:13.406225Z", "start_time": "2020-12-15T11:40:13.206354Z" } }, "outputs": [], "source": [ "model = init_jit_model('files/model.jit', 'cpu')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:41:08.470917Z", "start_time": "2020-12-15T11:41:08.467369Z" } }, "outputs": [], "source": [ "audios_for_stream = glob.glob('files/*.wav')\n", "len(audios_for_stream) # total 4 audios" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:41:25.685356Z", "start_time": "2020-12-15T11:41:16.222672Z" } }, "outputs": [], "source": [ "for i in state_generator(model, audios_for_stream, audios_in_stream=2): # 2 audio stream\n", " if i:\n", " print(i)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Onnx example" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:55:45.597504Z", "start_time": "2020-12-15T11:55:45.582356Z" } }, "outputs": [], "source": [ "# imports\n", "import glob\n", "import torch\n", "from IPython.display import Audio\n", "torch.set_num_threads(1)\n", "import onnxruntime\n", "\n", "from utils import (get_speech_ts, save_audio, read_audio, \n", " state_generator, single_audio_stream)\n", "\n", "def init_onnx_model(model_path: str):\n", " return onnxruntime.InferenceSession(model_path)\n", "\n", "def validate_onnx(model, inputs):\n", " with torch.no_grad():\n", " ort_inputs = {'input': inputs.cpu().numpy()}\n", " outs = model.run(None, ort_inputs)\n", " outs = [torch.Tensor(x) for x in outs]\n", " return outs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Full audio" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:55:56.874376Z", "start_time": "2020-12-15T11:55:56.782230Z" } }, "outputs": [], "source": [ "model = init_onnx_model('files/model.onnx')\n", "wav = read_audio('files/en.wav')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:56:12.159463Z", "start_time": "2020-12-15T11:56:11.446991Z" } }, "outputs": [], "source": [ "speech_timestamps = get_speech_ts(wav, model, num_steps=4, run_function=validate_onnx) # get speech timestamps from full audio file" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:56:20.488863Z", "start_time": "2020-12-15T11:56:20.485485Z" } }, "outputs": [], "source": [ "speech_timestamps" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:56:27.908128Z", "start_time": "2020-12-15T11:56:27.870978Z" } }, "outputs": [], "source": [ "save_audio('only_speech.wav', collect_speeches(speech_timestamps, wav), 16000) # merge all speech chunks to one audio\n", "Audio('only_speech.wav')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Single audio stream" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:58:09.012892Z", "start_time": "2020-12-15T11:58:08.940907Z" } }, "outputs": [], "source": [ "model = init_onnx_model('files/model.onnx')\n", "wav = 'files/en.wav'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:58:11.562186Z", "start_time": "2020-12-15T11:58:09.949825Z" } }, "outputs": [], "source": [ "for i in single_audio_stream(model, wav, run_function=validate_onnx):\n", " if i:\n", " print(i)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Multiple audio stream" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model = init_onnx_model('files/model.onnx')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:59:09.381687Z", "start_time": "2020-12-15T11:59:09.378552Z" } }, "outputs": [], "source": [ "audios_for_stream = glob.glob('files/*.wav')\n", "len(audios_for_stream) # total 4 audios" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T11:59:27.712905Z", "start_time": "2020-12-15T11:59:21.608435Z" } }, "outputs": [], "source": [ "for i in state_generator(model, audios_for_stream, audios_in_stream=2, run_function=validate_onnx): # 2 audio stream\n", " if i:\n", " print(i)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }