{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-14T13:43:24.487521Z", "start_time": "2020-12-14T13:43:23.780570Z" } }, "outputs": [], "source": [ "import glob\n", "import torch\n", "from IPython.display import Audio\n", "torch.set_num_threads(1)\n", "from utils import (init_jit_model, get_speech_ts, \n", " save_audio, read_audio, \n", " state_generator, single_audio_stream, init_onnx_model)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Full audio example" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-14T13:43:24.492506Z", "start_time": "2020-12-14T13:43:24.489440Z" } }, "outputs": [], "source": [ "def collect_speeches(tss, wav):\n", " speech_chunks = []\n", " for i in tss:\n", " speech_chunks.append(wav[i['start']: i['end']])\n", " return torch.cat(speech_chunks)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-14T13:43:24.760714Z", "start_time": "2020-12-14T13:43:24.493992Z" } }, "outputs": [], "source": [ "model = init_jit_model('files/joint_VAD_just_RU_jit_cut_q.pth.tar', 'cpu') # from yml file\n", "model = init_onnx_model('files/joint_VAD_just_RU.onnx')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-14T13:43:24.793384Z", "start_time": "2020-12-14T13:43:24.762311Z" } }, "outputs": [], "source": [ "Audio('files/test_audio_2.wav')\n", "wav = read_audio('files/test_audio_2.wav')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-14T13:43:25.320324Z", "start_time": "2020-12-14T13:43:24.808594Z" } }, "outputs": [], "source": [ "speech_timestamps = get_speech_ts(wav, model, num_steps=4) # kill extractor" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-14T13:43:25.324901Z", "start_time": "2020-12-14T13:43:25.321759Z" } }, "outputs": [], "source": [ "speech_timestamps" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-14T13:43:25.344065Z", "start_time": "2020-12-14T13:43:25.326162Z" } }, "outputs": [], "source": [ "save_audio('only_speech.wav', collect_speeches(speech_timestamps, wav), 16000)\n", "Audio('only_speech.wav')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Single stream example" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-14T13:43:25.778585Z", "start_time": "2020-12-14T13:43:25.496583Z" } }, "outputs": [], "source": [ "model = init_jit_model('files/joint_VAD_just_RU_jit_cut_q.pth.tar', 'cpu') # from yml file\n", "#model = init_onnx_model('files/joint_VAD_just_RU.onnx')\n", "audio = 'files/test_audio_6.wav'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-14T13:43:29.402604Z", "start_time": "2020-12-14T13:43:25.780037Z" } }, "outputs": [], "source": [ "for i in single_audio_stream(model, audio):\n", " if i:\n", " print(i)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Multiple stream example" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-14T13:43:29.674262Z", "start_time": "2020-12-14T13:43:29.403972Z" } }, "outputs": [], "source": [ "model = init_jit_model('files/joint_VAD_just_RU_jit_cut_q.pth.tar', 'cpu') # from yml file\n", "model = init_onnx_model('files/joint_VAD_just_RU.onnx')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-14T13:43:29.678449Z", "start_time": "2020-12-14T13:43:29.675519Z" } }, "outputs": [], "source": [ "audios_for_stream = glob.glob('files/test*.wav')\n", "len(audios_for_stream)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-14T13:43:40.236387Z", "start_time": "2020-12-14T13:43:29.679274Z" } }, "outputs": [], "source": [ "for i in state_generator(model, audios_for_stream, audios_in_stream=2):\n", " if i:\n", " print(i)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2020-12-14T13:46:49.812052Z", "start_time": "2020-12-14T13:46:49.586637Z" } }, "outputs": [], "source": [ "!cp ../silero-models-research/model_saves/joint_VAD_just_RU_jit_cut_q.pth.tar files/" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }