Merge branch 'master' of github.com:snakers4/silero-vad into master

2026-02-05 18:09:22 +08:00 · 2020-12-15 14:51:39 +00:00
parent 8996d5e519 b7914cfe7c
commit cc95d20b15
2 changed files with 20 additions and 52 deletions
--- a/files/joint_VAD_just_RU_jit_cut_q.pth.tar
+++ b/files/joint_VAD_just_RU_jit_cut_q.pth.tar
--- a/silero-vad.ipynb
+++ b/silero-vad.ipynb
@@ -20,7 +20,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-15T14:00:15.701867Z",
@@ -29,24 +29,12 @@
    "cellView": "form",
    "id": "rllMjjsekbjt"
   },
-   "outputs": [
+   "outputs": [],
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Downloading: \"https://github.com/snakers4/silero-vad/archive/master.zip\" to /home/keras/.cache/torch/hub/master.zip\n",
      "/opt/conda/lib/python3.8/site-packages/torchaudio/backend/utils.py:53: UserWarning: \"sox\" backend is being deprecated. The default backend will be changed to \"sox_io\" backend in 0.8.0 and \"sox\" backend will be removed in 0.9.0. Please migrate to \"sox_io\" backend. Please refer to https://github.com/pytorch/audio/issues/903 for the detail.\n",
      "  warnings.warn(\n",
      "/opt/conda/lib/python3.8/site-packages/torchaudio/backend/utils.py:63: UserWarning: The interface of \"soundfile\" backend is planned to change in 0.8.0 to match that of \"sox_io\" backend and the current interface will be removed in 0.9.0. To use the new interface, do `torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE = False` before setting the backend to \"soundfile\". Please refer to https://github.com/pytorch/audio/issues/903 for the detail.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "#@title Install and Import Dependencies\n",
    "\n",
    "# this assumes that you have a relevant version of PyTorch installed\n",
-    "#!pip install -q torchaudio soundfile\n",
+    "!pip install -q torchaudio soundfile\n",
    "\n",
    "import glob\n",
    "import torch\n",
@@ -64,32 +52,9 @@
    " read_audio,\n",
    " state_generator,\n",
    " single_audio_stream,\n",
-    " collect_speeches) = utils"
+    " collect_speeches) = utils\n",
-   ]
+    "\n",
-  },
+    "files_dir = torch.hub.get_dir() + '/snakers4_silero-vad_master/files'"
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-15T14:03:38.006309Z",
     "start_time": "2020-12-15T14:03:38.002613Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/home/keras/.cache/torch/hub'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "torch.hub.get_dir()"
   ]
  },
  {
@@ -113,7 +78,7 @@
   },
   "outputs": [],
   "source": [
-    "wav = read_audio('files/en.wav')\n",
+    "wav = read_audio(f'{files_dir}/en.wav')\n",
    "# get speech timestamps from full audio file\n",
    "speech_timestamps = get_speech_ts(wav, model,\n",
    "                                  num_steps=4)\n",
@@ -159,7 +124,7 @@
   },
   "outputs": [],
   "source": [
-    "wav = 'files/en.wav'\n",
+    "wav = f'{files_dir}/en.wav'\n",
    "\n",
    "for batch in single_audio_stream(model, wav):\n",
    "    if batch:\n",
@@ -187,7 +152,7 @@
   },
   "outputs": [],
   "source": [
-    "audios_for_stream = glob.glob('files/*.wav')\n",
+    "audios_for_stream = glob.glob(f'{files_dir}/*.wav')\n",
    "len(audios_for_stream) # total 4 audios"
   ]
  },
@@ -230,6 +195,7 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cellView": "form",
    "id": "Q4QIfSpprnkI"
   },
   "outputs": [],
@@ -255,7 +221,9 @@
    " single_audio_stream,\n",
    " collect_speeches) = utils\n",
    "\n",
-    " def init_onnx_model(model_path: str):\n",
+    "files_dir = torch.hub.get_dir() + '/snakers4_silero-vad_master/files'\n",
    "\n",
    "def init_onnx_model(model_path: str):\n",
    "    return onnxruntime.InferenceSession(model_path)\n",
    "\n",
    "def validate_onnx(model, inputs):\n",
@@ -287,8 +255,8 @@
   },
   "outputs": [],
   "source": [
-    "model = init_onnx_model('files/model.onnx')\n",
+    "model = init_onnx_model(f'{files_dir}/model.onnx')\n",
-    "wav = read_audio('files/en.wav')\n",
+    "wav = read_audio(f'{files_dir}/en.wav')\n",
    "\n",
    "# get speech timestamps from full audio file\n",
    "speech_timestamps = get_speech_ts(wav, model, num_steps=4, run_function=validate_onnx) \n",
@@ -333,8 +301,8 @@
   },
   "outputs": [],
   "source": [
-    "model = init_onnx_model('files/model.onnx')\n",
+    "model = init_onnx_model(f'{files_dir}/model.onnx')\n",
-    "wav = 'files/en.wav'"
+    "wav = f'{files_dir}/en.wav'"
   ]
  },
  {
@@ -375,8 +343,8 @@
   },
   "outputs": [],
   "source": [
-    "model = init_onnx_model('files/model.onnx')\n",
+    "model = init_onnx_model(f'{files_dir}/model.onnx')\n",
-    "audios_for_stream = glob.glob('files/*.wav')\n",
+    "audios_for_stream = glob.glob(f'{files_dir}/*.wav')\n",
    "print(len(audios_for_stream)) # total 4 audios"
   ]
  },
@@ -435,5 +403,5 @@
  }
 },
 "nbformat": 4,
- "nbformat_minor": 1
+ "nbformat_minor": 0
 }