mirror of
https://github.com/snakers4/silero-vad.git
synced 2026-02-05 18:09:22 +08:00
Merge branch 'master' of github.com:snakers4/silero-vad into master
This commit is contained in:
Binary file not shown.
@@ -20,7 +20,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"ExecuteTime": {
|
"ExecuteTime": {
|
||||||
"end_time": "2020-12-15T14:00:15.701867Z",
|
"end_time": "2020-12-15T14:00:15.701867Z",
|
||||||
@@ -29,24 +29,12 @@
|
|||||||
"cellView": "form",
|
"cellView": "form",
|
||||||
"id": "rllMjjsekbjt"
|
"id": "rllMjjsekbjt"
|
||||||
},
|
},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Downloading: \"https://github.com/snakers4/silero-vad/archive/master.zip\" to /home/keras/.cache/torch/hub/master.zip\n",
|
|
||||||
"/opt/conda/lib/python3.8/site-packages/torchaudio/backend/utils.py:53: UserWarning: \"sox\" backend is being deprecated. The default backend will be changed to \"sox_io\" backend in 0.8.0 and \"sox\" backend will be removed in 0.9.0. Please migrate to \"sox_io\" backend. Please refer to https://github.com/pytorch/audio/issues/903 for the detail.\n",
|
|
||||||
" warnings.warn(\n",
|
|
||||||
"/opt/conda/lib/python3.8/site-packages/torchaudio/backend/utils.py:63: UserWarning: The interface of \"soundfile\" backend is planned to change in 0.8.0 to match that of \"sox_io\" backend and the current interface will be removed in 0.9.0. To use the new interface, do `torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE = False` before setting the backend to \"soundfile\". Please refer to https://github.com/pytorch/audio/issues/903 for the detail.\n",
|
|
||||||
" warnings.warn(\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"#@title Install and Import Dependencies\n",
|
"#@title Install and Import Dependencies\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this assumes that you have a relevant version of PyTorch installed\n",
|
"# this assumes that you have a relevant version of PyTorch installed\n",
|
||||||
"#!pip install -q torchaudio soundfile\n",
|
"!pip install -q torchaudio soundfile\n",
|
||||||
"\n",
|
"\n",
|
||||||
"import glob\n",
|
"import glob\n",
|
||||||
"import torch\n",
|
"import torch\n",
|
||||||
@@ -64,32 +52,9 @@
|
|||||||
" read_audio,\n",
|
" read_audio,\n",
|
||||||
" state_generator,\n",
|
" state_generator,\n",
|
||||||
" single_audio_stream,\n",
|
" single_audio_stream,\n",
|
||||||
" collect_speeches) = utils"
|
" collect_speeches) = utils\n",
|
||||||
]
|
"\n",
|
||||||
},
|
"files_dir = torch.hub.get_dir() + '/snakers4_silero-vad_master/files'"
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 5,
|
|
||||||
"metadata": {
|
|
||||||
"ExecuteTime": {
|
|
||||||
"end_time": "2020-12-15T14:03:38.006309Z",
|
|
||||||
"start_time": "2020-12-15T14:03:38.002613Z"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"'/home/keras/.cache/torch/hub'"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 5,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"torch.hub.get_dir()"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -113,7 +78,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"wav = read_audio('files/en.wav')\n",
|
"wav = read_audio(f'{files_dir}/en.wav')\n",
|
||||||
"# get speech timestamps from full audio file\n",
|
"# get speech timestamps from full audio file\n",
|
||||||
"speech_timestamps = get_speech_ts(wav, model,\n",
|
"speech_timestamps = get_speech_ts(wav, model,\n",
|
||||||
" num_steps=4)\n",
|
" num_steps=4)\n",
|
||||||
@@ -159,7 +124,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"wav = 'files/en.wav'\n",
|
"wav = f'{files_dir}/en.wav'\n",
|
||||||
"\n",
|
"\n",
|
||||||
"for batch in single_audio_stream(model, wav):\n",
|
"for batch in single_audio_stream(model, wav):\n",
|
||||||
" if batch:\n",
|
" if batch:\n",
|
||||||
@@ -187,7 +152,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"audios_for_stream = glob.glob('files/*.wav')\n",
|
"audios_for_stream = glob.glob(f'{files_dir}/*.wav')\n",
|
||||||
"len(audios_for_stream) # total 4 audios"
|
"len(audios_for_stream) # total 4 audios"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -230,6 +195,7 @@
|
|||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"cellView": "form",
|
||||||
"id": "Q4QIfSpprnkI"
|
"id": "Q4QIfSpprnkI"
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -255,7 +221,9 @@
|
|||||||
" single_audio_stream,\n",
|
" single_audio_stream,\n",
|
||||||
" collect_speeches) = utils\n",
|
" collect_speeches) = utils\n",
|
||||||
"\n",
|
"\n",
|
||||||
" def init_onnx_model(model_path: str):\n",
|
"files_dir = torch.hub.get_dir() + '/snakers4_silero-vad_master/files'\n",
|
||||||
|
"\n",
|
||||||
|
"def init_onnx_model(model_path: str):\n",
|
||||||
" return onnxruntime.InferenceSession(model_path)\n",
|
" return onnxruntime.InferenceSession(model_path)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def validate_onnx(model, inputs):\n",
|
"def validate_onnx(model, inputs):\n",
|
||||||
@@ -287,8 +255,8 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"model = init_onnx_model('files/model.onnx')\n",
|
"model = init_onnx_model(f'{files_dir}/model.onnx')\n",
|
||||||
"wav = read_audio('files/en.wav')\n",
|
"wav = read_audio(f'{files_dir}/en.wav')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# get speech timestamps from full audio file\n",
|
"# get speech timestamps from full audio file\n",
|
||||||
"speech_timestamps = get_speech_ts(wav, model, num_steps=4, run_function=validate_onnx) \n",
|
"speech_timestamps = get_speech_ts(wav, model, num_steps=4, run_function=validate_onnx) \n",
|
||||||
@@ -333,8 +301,8 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"model = init_onnx_model('files/model.onnx')\n",
|
"model = init_onnx_model(f'{files_dir}/model.onnx')\n",
|
||||||
"wav = 'files/en.wav'"
|
"wav = f'{files_dir}/en.wav'"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -375,8 +343,8 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"model = init_onnx_model('files/model.onnx')\n",
|
"model = init_onnx_model(f'{files_dir}/model.onnx')\n",
|
||||||
"audios_for_stream = glob.glob('files/*.wav')\n",
|
"audios_for_stream = glob.glob(f'{files_dir}/*.wav')\n",
|
||||||
"print(len(audios_for_stream)) # total 4 audios"
|
"print(len(audios_for_stream)) # total 4 audios"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -435,5 +403,5 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 1
|
"nbformat_minor": 0
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user