mirror of
https://github.com/snakers4/silero-vad.git
synced 2026-02-04 17:39:22 +08:00
fx
This commit is contained in:
Binary file not shown.
@@ -20,7 +20,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-12-15T14:00:15.701867Z",
|
||||
@@ -29,24 +29,12 @@
|
||||
"cellView": "form",
|
||||
"id": "rllMjjsekbjt"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Downloading: \"https://github.com/snakers4/silero-vad/archive/master.zip\" to /home/keras/.cache/torch/hub/master.zip\n",
|
||||
"/opt/conda/lib/python3.8/site-packages/torchaudio/backend/utils.py:53: UserWarning: \"sox\" backend is being deprecated. The default backend will be changed to \"sox_io\" backend in 0.8.0 and \"sox\" backend will be removed in 0.9.0. Please migrate to \"sox_io\" backend. Please refer to https://github.com/pytorch/audio/issues/903 for the detail.\n",
|
||||
" warnings.warn(\n",
|
||||
"/opt/conda/lib/python3.8/site-packages/torchaudio/backend/utils.py:63: UserWarning: The interface of \"soundfile\" backend is planned to change in 0.8.0 to match that of \"sox_io\" backend and the current interface will be removed in 0.9.0. To use the new interface, do `torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE = False` before setting the backend to \"soundfile\". Please refer to https://github.com/pytorch/audio/issues/903 for the detail.\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#@title Install and Import Dependencies\n",
|
||||
"\n",
|
||||
"# this assumes that you have a relevant version of PyTorch installed\n",
|
||||
"#!pip install -q torchaudio soundfile\n",
|
||||
"!pip install -q torchaudio soundfile\n",
|
||||
"\n",
|
||||
"import glob\n",
|
||||
"import torch\n",
|
||||
@@ -64,32 +52,9 @@
|
||||
" read_audio,\n",
|
||||
" state_generator,\n",
|
||||
" single_audio_stream,\n",
|
||||
" collect_speeches) = utils"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-12-15T14:03:38.006309Z",
|
||||
"start_time": "2020-12-15T14:03:38.002613Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'/home/keras/.cache/torch/hub'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"torch.hub.get_dir()"
|
||||
" collect_speeches) = utils\n",
|
||||
"\n",
|
||||
"files_dir = torch.hub.get_dir() + '/snakers4_silero-vad_master/files'"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -113,7 +78,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"wav = read_audio('files/en.wav')\n",
|
||||
"wav = read_audio(f'{files_dir}/en.wav')\n",
|
||||
"# get speech timestamps from full audio file\n",
|
||||
"speech_timestamps = get_speech_ts(wav, model,\n",
|
||||
" num_steps=4)\n",
|
||||
@@ -159,7 +124,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"wav = 'files/en.wav'\n",
|
||||
"wav = f'{files_dir}/en.wav'\n",
|
||||
"\n",
|
||||
"for batch in single_audio_stream(model, wav):\n",
|
||||
" if batch:\n",
|
||||
@@ -187,7 +152,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"audios_for_stream = glob.glob('files/*.wav')\n",
|
||||
"audios_for_stream = glob.glob(f'{files_dir}/*.wav')\n",
|
||||
"len(audios_for_stream) # total 4 audios"
|
||||
]
|
||||
},
|
||||
@@ -230,6 +195,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"cellView": "form",
|
||||
"id": "Q4QIfSpprnkI"
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -255,7 +221,9 @@
|
||||
" single_audio_stream,\n",
|
||||
" collect_speeches) = utils\n",
|
||||
"\n",
|
||||
" def init_onnx_model(model_path: str):\n",
|
||||
"files_dir = torch.hub.get_dir() + '/snakers4_silero-vad_master/files'\n",
|
||||
"\n",
|
||||
"def init_onnx_model(model_path: str):\n",
|
||||
" return onnxruntime.InferenceSession(model_path)\n",
|
||||
"\n",
|
||||
"def validate_onnx(model, inputs):\n",
|
||||
@@ -287,8 +255,8 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = init_onnx_model('files/model.onnx')\n",
|
||||
"wav = read_audio('files/en.wav')\n",
|
||||
"model = init_onnx_model(f'{files_dir}/model.onnx')\n",
|
||||
"wav = read_audio(f'{files_dir}/en.wav')\n",
|
||||
"\n",
|
||||
"# get speech timestamps from full audio file\n",
|
||||
"speech_timestamps = get_speech_ts(wav, model, num_steps=4, run_function=validate_onnx) \n",
|
||||
@@ -333,8 +301,8 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = init_onnx_model('files/model.onnx')\n",
|
||||
"wav = 'files/en.wav'"
|
||||
"model = init_onnx_model(f'{files_dir}/model.onnx')\n",
|
||||
"wav = f'{files_dir}/en.wav'"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -375,8 +343,8 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = init_onnx_model('files/model.onnx')\n",
|
||||
"audios_for_stream = glob.glob('files/*.wav')\n",
|
||||
"model = init_onnx_model(f'{files_dir}/model.onnx')\n",
|
||||
"audios_for_stream = glob.glob(f'{files_dir}/*.wav')\n",
|
||||
"print(len(audios_for_stream)) # total 4 audios"
|
||||
]
|
||||
},
|
||||
@@ -435,5 +403,5 @@
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user