mirror of
https://github.com/snakers4/silero-vad.git
synced 2026-02-05 18:09:22 +08:00
fx old examples
This commit is contained in:
@@ -17,6 +17,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"#!apt install ffmpeg\n",
|
||||||
"!pip -q install pydub\n",
|
"!pip -q install pydub\n",
|
||||||
"from google.colab import output\n",
|
"from google.colab import output\n",
|
||||||
"from base64 import b64decode, b64encode\n",
|
"from base64 import b64decode, b64encode\n",
|
||||||
@@ -37,13 +38,12 @@
|
|||||||
" model='silero_vad',\n",
|
" model='silero_vad',\n",
|
||||||
" force_reload=True)\n",
|
" force_reload=True)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def int2float(sound):\n",
|
"def int2float(audio):\n",
|
||||||
" abs_max = np.abs(sound).max()\n",
|
" samples = audio.get_array_of_samples()\n",
|
||||||
" sound = sound.astype('float32')\n",
|
" new_sound = audio._spawn(samples)\n",
|
||||||
" if abs_max > 0:\n",
|
" arr = np.array(samples).astype(np.float32)\n",
|
||||||
" sound *= 1/32768\n",
|
" arr = arr / np.abs(arr).max()\n",
|
||||||
" sound = sound.squeeze()\n",
|
" return arr\n",
|
||||||
" return sound\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"AUDIO_HTML = \"\"\"\n",
|
"AUDIO_HTML = \"\"\"\n",
|
||||||
"<script>\n",
|
"<script>\n",
|
||||||
@@ -68,10 +68,10 @@
|
|||||||
" //bitsPerSecond: 8000, //chrome seems to ignore, always 48k\n",
|
" //bitsPerSecond: 8000, //chrome seems to ignore, always 48k\n",
|
||||||
" mimeType : 'audio/webm;codecs=opus'\n",
|
" mimeType : 'audio/webm;codecs=opus'\n",
|
||||||
" //mimeType : 'audio/webm;codecs=pcm'\n",
|
" //mimeType : 'audio/webm;codecs=pcm'\n",
|
||||||
" }; \n",
|
" };\n",
|
||||||
" //recorder = new MediaRecorder(stream, options);\n",
|
" //recorder = new MediaRecorder(stream, options);\n",
|
||||||
" recorder = new MediaRecorder(stream);\n",
|
" recorder = new MediaRecorder(stream);\n",
|
||||||
" recorder.ondataavailable = function(e) { \n",
|
" recorder.ondataavailable = function(e) {\n",
|
||||||
" var url = URL.createObjectURL(e.data);\n",
|
" var url = URL.createObjectURL(e.data);\n",
|
||||||
" // var preview = document.createElement('audio');\n",
|
" // var preview = document.createElement('audio');\n",
|
||||||
" // preview.controls = true;\n",
|
" // preview.controls = true;\n",
|
||||||
@@ -79,7 +79,7 @@
|
|||||||
" // document.body.appendChild(preview);\n",
|
" // document.body.appendChild(preview);\n",
|
||||||
"\n",
|
"\n",
|
||||||
" reader = new FileReader();\n",
|
" reader = new FileReader();\n",
|
||||||
" reader.readAsDataURL(e.data); \n",
|
" reader.readAsDataURL(e.data);\n",
|
||||||
" reader.onloadend = function() {\n",
|
" reader.onloadend = function() {\n",
|
||||||
" base64data = reader.result;\n",
|
" base64data = reader.result;\n",
|
||||||
" //console.log(\"Inside FileReader:\" + base64data);\n",
|
" //console.log(\"Inside FileReader:\" + base64data);\n",
|
||||||
@@ -121,7 +121,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"}\n",
|
"}\n",
|
||||||
"});\n",
|
"});\n",
|
||||||
" \n",
|
"\n",
|
||||||
"</script>\n",
|
"</script>\n",
|
||||||
"\"\"\"\n",
|
"\"\"\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -133,8 +133,8 @@
|
|||||||
" audio.export('test.mp3', format='mp3')\n",
|
" audio.export('test.mp3', format='mp3')\n",
|
||||||
" audio = audio.set_channels(1)\n",
|
" audio = audio.set_channels(1)\n",
|
||||||
" audio = audio.set_frame_rate(16000)\n",
|
" audio = audio.set_frame_rate(16000)\n",
|
||||||
" audio_float = int2float(np.array(audio.get_array_of_samples()))\n",
|
" audio_float = int2float(audio)\n",
|
||||||
" audio_tens = torch.tensor(audio_float )\n",
|
" audio_tens = torch.tensor(audio_float)\n",
|
||||||
" return audio_tens\n",
|
" return audio_tens\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def make_animation(probs, audio_duration, interval=40):\n",
|
"def make_animation(probs, audio_duration, interval=40):\n",
|
||||||
@@ -154,19 +154,18 @@
|
|||||||
" def animate(i):\n",
|
" def animate(i):\n",
|
||||||
" x = i * interval / 1000 - 0.04\n",
|
" x = i * interval / 1000 - 0.04\n",
|
||||||
" y = np.linspace(0, 1.02, 2)\n",
|
" y = np.linspace(0, 1.02, 2)\n",
|
||||||
" \n",
|
"\n",
|
||||||
" line.set_data(x, y)\n",
|
" line.set_data(x, y)\n",
|
||||||
" line.set_color('#990000')\n",
|
" line.set_color('#990000')\n",
|
||||||
" return line,\n",
|
" return line,\n",
|
||||||
|
" anim = FuncAnimation(fig, animate, init_func=init, interval=interval, save_count=int(audio_duration / (interval / 1000)))\n",
|
||||||
"\n",
|
"\n",
|
||||||
" anim = FuncAnimation(fig, animate, init_func=init, interval=interval, save_count=audio_duration / (interval / 1000))\n",
|
" f = r\"animation.mp4\"\n",
|
||||||
"\n",
|
" writervideo = FFMpegWriter(fps=1000/interval)\n",
|
||||||
" f = r\"animation.mp4\" \n",
|
|
||||||
" writervideo = FFMpegWriter(fps=1000/interval) \n",
|
|
||||||
" anim.save(f, writer=writervideo)\n",
|
" anim.save(f, writer=writervideo)\n",
|
||||||
" plt.close('all')\n",
|
" plt.close('all')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def combine_audio(vidname, audname, outname, fps=25): \n",
|
"def combine_audio(vidname, audname, outname, fps=25):\n",
|
||||||
" my_clip = mpe.VideoFileClip(vidname, verbose=False)\n",
|
" my_clip = mpe.VideoFileClip(vidname, verbose=False)\n",
|
||||||
" audio_background = mpe.AudioFileClip(audname)\n",
|
" audio_background = mpe.AudioFileClip(audname)\n",
|
||||||
" final_clip = my_clip.set_audio(audio_background)\n",
|
" final_clip = my_clip.set_audio(audio_background)\n",
|
||||||
@@ -174,15 +173,10 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"def record_make_animation():\n",
|
"def record_make_animation():\n",
|
||||||
" tensor = record()\n",
|
" tensor = record()\n",
|
||||||
"\n",
|
|
||||||
" print('Calculating probabilities...')\n",
|
" print('Calculating probabilities...')\n",
|
||||||
" speech_probs = []\n",
|
" speech_probs = []\n",
|
||||||
" window_size_samples = 512\n",
|
" window_size_samples = 512\n",
|
||||||
" for i in range(0, len(tensor), window_size_samples):\n",
|
" speech_probs = model.audio_forward(tensor, sr=16000)[0].tolist()\n",
|
||||||
" if len(tensor[i: i+ window_size_samples]) < window_size_samples:\n",
|
|
||||||
" break\n",
|
|
||||||
" speech_prob = model(tensor[i: i+ window_size_samples], 16000).item()\n",
|
|
||||||
" speech_probs.append(speech_prob)\n",
|
|
||||||
" model.reset_states()\n",
|
" model.reset_states()\n",
|
||||||
" print('Making animation...')\n",
|
" print('Making animation...')\n",
|
||||||
" make_animation(speech_probs, len(tensor) / 16000)\n",
|
" make_animation(speech_probs, len(tensor) / 16000)\n",
|
||||||
@@ -196,7 +190,9 @@
|
|||||||
" <video width=800 controls>\n",
|
" <video width=800 controls>\n",
|
||||||
" <source src=\"%s\" type=\"video/mp4\">\n",
|
" <source src=\"%s\" type=\"video/mp4\">\n",
|
||||||
" </video>\n",
|
" </video>\n",
|
||||||
" \"\"\" % data_url))"
|
" \"\"\" % data_url))\n",
|
||||||
|
"\n",
|
||||||
|
" return speech_probs"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -216,7 +212,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"record_make_animation()"
|
"speech_probs = record_make_animation()"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|||||||
Reference in New Issue
Block a user