diff --git a/files/joint_VAD_just_RU_jit_cut_q.pth.tar b/files/joint_VAD_just_RU_jit_cut_q.pth.tar deleted file mode 100644 index 67be4fb..0000000 Binary files a/files/joint_VAD_just_RU_jit_cut_q.pth.tar and /dev/null differ diff --git a/silero-vad.ipynb b/silero-vad.ipynb index 77acd3e..12561fb 100644 --- a/silero-vad.ipynb +++ b/silero-vad.ipynb @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-15T14:00:15.701867Z", @@ -29,24 +29,12 @@ "cellView": "form", "id": "rllMjjsekbjt" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Downloading: \"https://github.com/snakers4/silero-vad/archive/master.zip\" to /home/keras/.cache/torch/hub/master.zip\n", - "/opt/conda/lib/python3.8/site-packages/torchaudio/backend/utils.py:53: UserWarning: \"sox\" backend is being deprecated. The default backend will be changed to \"sox_io\" backend in 0.8.0 and \"sox\" backend will be removed in 0.9.0. Please migrate to \"sox_io\" backend. Please refer to https://github.com/pytorch/audio/issues/903 for the detail.\n", - " warnings.warn(\n", - "/opt/conda/lib/python3.8/site-packages/torchaudio/backend/utils.py:63: UserWarning: The interface of \"soundfile\" backend is planned to change in 0.8.0 to match that of \"sox_io\" backend and the current interface will be removed in 0.9.0. To use the new interface, do `torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE = False` before setting the backend to \"soundfile\". Please refer to https://github.com/pytorch/audio/issues/903 for the detail.\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ "#@title Install and Import Dependencies\n", "\n", "# this assumes that you have a relevant version of PyTorch installed\n", - "#!pip install -q torchaudio soundfile\n", + "!pip install -q torchaudio soundfile\n", "\n", "import glob\n", "import torch\n", @@ -64,32 +52,9 @@ " read_audio,\n", " state_generator,\n", " single_audio_stream,\n", - " collect_speeches) = utils" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-15T14:03:38.006309Z", - "start_time": "2020-12-15T14:03:38.002613Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'/home/keras/.cache/torch/hub'" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "torch.hub.get_dir()" + " collect_speeches) = utils\n", + "\n", + "files_dir = torch.hub.get_dir() + '/snakers4_silero-vad_master/files'" ] }, { @@ -113,7 +78,7 @@ }, "outputs": [], "source": [ - "wav = read_audio('files/en.wav')\n", + "wav = read_audio(f'{files_dir}/en.wav')\n", "# get speech timestamps from full audio file\n", "speech_timestamps = get_speech_ts(wav, model,\n", " num_steps=4)\n", @@ -159,7 +124,7 @@ }, "outputs": [], "source": [ - "wav = 'files/en.wav'\n", + "wav = f'{files_dir}/en.wav'\n", "\n", "for batch in single_audio_stream(model, wav):\n", " if batch:\n", @@ -187,7 +152,7 @@ }, "outputs": [], "source": [ - "audios_for_stream = glob.glob('files/*.wav')\n", + "audios_for_stream = glob.glob(f'{files_dir}/*.wav')\n", "len(audios_for_stream) # total 4 audios" ] }, @@ -230,6 +195,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "cellView": "form", "id": "Q4QIfSpprnkI" }, "outputs": [], @@ -255,7 +221,9 @@ " single_audio_stream,\n", " collect_speeches) = utils\n", "\n", - " def init_onnx_model(model_path: str):\n", + "files_dir = torch.hub.get_dir() + '/snakers4_silero-vad_master/files'\n", + "\n", + "def init_onnx_model(model_path: str):\n", " return onnxruntime.InferenceSession(model_path)\n", "\n", "def validate_onnx(model, inputs):\n", @@ -287,8 +255,8 @@ }, "outputs": [], "source": [ - "model = init_onnx_model('files/model.onnx')\n", - "wav = read_audio('files/en.wav')\n", + "model = init_onnx_model(f'{files_dir}/model.onnx')\n", + "wav = read_audio(f'{files_dir}/en.wav')\n", "\n", "# get speech timestamps from full audio file\n", "speech_timestamps = get_speech_ts(wav, model, num_steps=4, run_function=validate_onnx) \n", @@ -333,8 +301,8 @@ }, "outputs": [], "source": [ - "model = init_onnx_model('files/model.onnx')\n", - "wav = 'files/en.wav'" + "model = init_onnx_model(f'{files_dir}/model.onnx')\n", + "wav = f'{files_dir}/en.wav'" ] }, { @@ -375,8 +343,8 @@ }, "outputs": [], "source": [ - "model = init_onnx_model('files/model.onnx')\n", - "audios_for_stream = glob.glob('files/*.wav')\n", + "model = init_onnx_model(f'{files_dir}/model.onnx')\n", + "audios_for_stream = glob.glob(f'{files_dir}/*.wav')\n", "print(len(audios_for_stream)) # total 4 audios" ] }, @@ -435,5 +403,5 @@ } }, "nbformat": 4, - "nbformat_minor": 1 + "nbformat_minor": 0 }