This commit is contained in:
adamnsandle
2020-12-15 15:30:39 +00:00
parent b7914cfe7c
commit 3153a3c62f

View File

@@ -27,6 +27,7 @@
"start_time": "2020-12-15T14:00:09.512876Z" "start_time": "2020-12-15T14:00:09.512876Z"
}, },
"cellView": "form", "cellView": "form",
"collapsed": true,
"id": "rllMjjsekbjt" "id": "rllMjjsekbjt"
}, },
"outputs": [], "outputs": [],
@@ -41,7 +42,7 @@
"torch.set_num_threads(1)\n", "torch.set_num_threads(1)\n",
"\n", "\n",
"from IPython.display import Audio\n", "from IPython.display import Audio\n",
"\n", "from pprint import pprint\n",
"\n", "\n",
"model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',\n", "model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',\n",
" model='silero_vad',\n", " model='silero_vad',\n",
@@ -63,7 +64,7 @@
"id": "fXbbaUO3jsrw" "id": "fXbbaUO3jsrw"
}, },
"source": [ "source": [
"## Full audio" "## Full Audio"
] ]
}, },
{ {
@@ -82,7 +83,7 @@
"# get speech timestamps from full audio file\n", "# get speech timestamps from full audio file\n",
"speech_timestamps = get_speech_ts(wav, model,\n", "speech_timestamps = get_speech_ts(wav, model,\n",
" num_steps=4)\n", " num_steps=4)\n",
"print(speech_timestamps)" "pprint(speech_timestamps)"
] ]
}, },
{ {
@@ -170,7 +171,7 @@
"source": [ "source": [
"for batch in state_generator(model, audios_for_stream, audios_in_stream=2): # 2 audio stream\n", "for batch in state_generator(model, audios_for_stream, audios_in_stream=2): # 2 audio stream\n",
" if batch:\n", " if batch:\n",
" print(batch)" " pprint(batch)"
] ]
}, },
{ {
@@ -207,6 +208,7 @@
"\n", "\n",
"import glob\n", "import glob\n",
"import onnxruntime\n", "import onnxruntime\n",
"from pprint import pprint\n",
"\n", "\n",
"from IPython.display import Audio\n", "from IPython.display import Audio\n",
"\n", "\n",
@@ -240,7 +242,7 @@
"id": "5JHErdB7jsr0" "id": "5JHErdB7jsr0"
}, },
"source": [ "source": [
"## Full audio" "## Full Audio"
] ]
}, },
{ {
@@ -260,7 +262,7 @@
"\n", "\n",
"# get speech timestamps from full audio file\n", "# get speech timestamps from full audio file\n",
"speech_timestamps = get_speech_ts(wav, model, num_steps=4, run_function=validate_onnx) \n", "speech_timestamps = get_speech_ts(wav, model, num_steps=4, run_function=validate_onnx) \n",
"print(speech_timestamps)" "pprint(speech_timestamps)"
] ]
}, },
{ {
@@ -286,7 +288,7 @@
"id": "Rio9W50gjsr1" "id": "Rio9W50gjsr1"
}, },
"source": [ "source": [
"## Single audio stream" "## Single Audio Stream"
] ]
}, },
{ {
@@ -319,7 +321,7 @@
"source": [ "source": [
"for batch in single_audio_stream(model, wav, run_function=validate_onnx):\n", "for batch in single_audio_stream(model, wav, run_function=validate_onnx):\n",
" if batch:\n", " if batch:\n",
" print(batch)" " pprint(batch)"
] ]
}, },
{ {
@@ -328,7 +330,7 @@
"id": "WNZ42u0ajsr1" "id": "WNZ42u0ajsr1"
}, },
"source": [ "source": [
"## Multiple audio stream" "## Multiple Audio Streams"
] ]
}, },
{ {
@@ -345,7 +347,7 @@
"source": [ "source": [
"model = init_onnx_model(f'{files_dir}/model.onnx')\n", "model = init_onnx_model(f'{files_dir}/model.onnx')\n",
"audios_for_stream = glob.glob(f'{files_dir}/*.wav')\n", "audios_for_stream = glob.glob(f'{files_dir}/*.wav')\n",
"print(len(audios_for_stream)) # total 4 audios" "pprint(len(audios_for_stream)) # total 4 audios"
] ]
}, },
{ {
@@ -362,7 +364,7 @@
"source": [ "source": [
"for batch in state_generator(model, audios_for_stream, audios_in_stream=2, run_function=validate_onnx): # 2 audio stream\n", "for batch in state_generator(model, audios_for_stream, audios_in_stream=2, run_function=validate_onnx): # 2 audio stream\n",
" if batch:\n", " if batch:\n",
" print(batch)" " pprint(batch)"
] ]
} }
], ],