fx old examples

This commit is contained in:
adamnsandle
2024-09-24 12:01:47 +00:00
parent 1d48167271
commit fa8036ae1c

View File

@@ -17,6 +17,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"#!apt install ffmpeg\n",
"!pip -q install pydub\n", "!pip -q install pydub\n",
"from google.colab import output\n", "from google.colab import output\n",
"from base64 import b64decode, b64encode\n", "from base64 import b64decode, b64encode\n",
@@ -37,13 +38,12 @@
" model='silero_vad',\n", " model='silero_vad',\n",
" force_reload=True)\n", " force_reload=True)\n",
"\n", "\n",
"def int2float(sound):\n", "def int2float(audio):\n",
" abs_max = np.abs(sound).max()\n", " samples = audio.get_array_of_samples()\n",
" sound = sound.astype('float32')\n", " new_sound = audio._spawn(samples)\n",
" if abs_max > 0:\n", " arr = np.array(samples).astype(np.float32)\n",
" sound *= 1/32768\n", " arr = arr / np.abs(arr).max()\n",
" sound = sound.squeeze()\n", " return arr\n",
" return sound\n",
"\n", "\n",
"AUDIO_HTML = \"\"\"\n", "AUDIO_HTML = \"\"\"\n",
"<script>\n", "<script>\n",
@@ -68,10 +68,10 @@
" //bitsPerSecond: 8000, //chrome seems to ignore, always 48k\n", " //bitsPerSecond: 8000, //chrome seems to ignore, always 48k\n",
" mimeType : 'audio/webm;codecs=opus'\n", " mimeType : 'audio/webm;codecs=opus'\n",
" //mimeType : 'audio/webm;codecs=pcm'\n", " //mimeType : 'audio/webm;codecs=pcm'\n",
" }; \n", " };\n",
" //recorder = new MediaRecorder(stream, options);\n", " //recorder = new MediaRecorder(stream, options);\n",
" recorder = new MediaRecorder(stream);\n", " recorder = new MediaRecorder(stream);\n",
" recorder.ondataavailable = function(e) { \n", " recorder.ondataavailable = function(e) {\n",
" var url = URL.createObjectURL(e.data);\n", " var url = URL.createObjectURL(e.data);\n",
" // var preview = document.createElement('audio');\n", " // var preview = document.createElement('audio');\n",
" // preview.controls = true;\n", " // preview.controls = true;\n",
@@ -79,7 +79,7 @@
" // document.body.appendChild(preview);\n", " // document.body.appendChild(preview);\n",
"\n", "\n",
" reader = new FileReader();\n", " reader = new FileReader();\n",
" reader.readAsDataURL(e.data); \n", " reader.readAsDataURL(e.data);\n",
" reader.onloadend = function() {\n", " reader.onloadend = function() {\n",
" base64data = reader.result;\n", " base64data = reader.result;\n",
" //console.log(\"Inside FileReader:\" + base64data);\n", " //console.log(\"Inside FileReader:\" + base64data);\n",
@@ -121,7 +121,7 @@
"\n", "\n",
"}\n", "}\n",
"});\n", "});\n",
" \n", "\n",
"</script>\n", "</script>\n",
"\"\"\"\n", "\"\"\"\n",
"\n", "\n",
@@ -133,8 +133,8 @@
" audio.export('test.mp3', format='mp3')\n", " audio.export('test.mp3', format='mp3')\n",
" audio = audio.set_channels(1)\n", " audio = audio.set_channels(1)\n",
" audio = audio.set_frame_rate(16000)\n", " audio = audio.set_frame_rate(16000)\n",
" audio_float = int2float(np.array(audio.get_array_of_samples()))\n", " audio_float = int2float(audio)\n",
" audio_tens = torch.tensor(audio_float )\n", " audio_tens = torch.tensor(audio_float)\n",
" return audio_tens\n", " return audio_tens\n",
"\n", "\n",
"def make_animation(probs, audio_duration, interval=40):\n", "def make_animation(probs, audio_duration, interval=40):\n",
@@ -154,19 +154,18 @@
" def animate(i):\n", " def animate(i):\n",
" x = i * interval / 1000 - 0.04\n", " x = i * interval / 1000 - 0.04\n",
" y = np.linspace(0, 1.02, 2)\n", " y = np.linspace(0, 1.02, 2)\n",
" \n", "\n",
" line.set_data(x, y)\n", " line.set_data(x, y)\n",
" line.set_color('#990000')\n", " line.set_color('#990000')\n",
" return line,\n", " return line,\n",
" anim = FuncAnimation(fig, animate, init_func=init, interval=interval, save_count=int(audio_duration / (interval / 1000)))\n",
"\n", "\n",
" anim = FuncAnimation(fig, animate, init_func=init, interval=interval, save_count=audio_duration / (interval / 1000))\n", " f = r\"animation.mp4\"\n",
"\n", " writervideo = FFMpegWriter(fps=1000/interval)\n",
" f = r\"animation.mp4\" \n",
" writervideo = FFMpegWriter(fps=1000/interval) \n",
" anim.save(f, writer=writervideo)\n", " anim.save(f, writer=writervideo)\n",
" plt.close('all')\n", " plt.close('all')\n",
"\n", "\n",
"def combine_audio(vidname, audname, outname, fps=25): \n", "def combine_audio(vidname, audname, outname, fps=25):\n",
" my_clip = mpe.VideoFileClip(vidname, verbose=False)\n", " my_clip = mpe.VideoFileClip(vidname, verbose=False)\n",
" audio_background = mpe.AudioFileClip(audname)\n", " audio_background = mpe.AudioFileClip(audname)\n",
" final_clip = my_clip.set_audio(audio_background)\n", " final_clip = my_clip.set_audio(audio_background)\n",
@@ -174,15 +173,10 @@
"\n", "\n",
"def record_make_animation():\n", "def record_make_animation():\n",
" tensor = record()\n", " tensor = record()\n",
"\n",
" print('Calculating probabilities...')\n", " print('Calculating probabilities...')\n",
" speech_probs = []\n", " speech_probs = []\n",
" window_size_samples = 512\n", " window_size_samples = 512\n",
" for i in range(0, len(tensor), window_size_samples):\n", " speech_probs = model.audio_forward(tensor, sr=16000)[0].tolist()\n",
" if len(tensor[i: i+ window_size_samples]) < window_size_samples:\n",
" break\n",
" speech_prob = model(tensor[i: i+ window_size_samples], 16000).item()\n",
" speech_probs.append(speech_prob)\n",
" model.reset_states()\n", " model.reset_states()\n",
" print('Making animation...')\n", " print('Making animation...')\n",
" make_animation(speech_probs, len(tensor) / 16000)\n", " make_animation(speech_probs, len(tensor) / 16000)\n",
@@ -196,7 +190,9 @@
" <video width=800 controls>\n", " <video width=800 controls>\n",
" <source src=\"%s\" type=\"video/mp4\">\n", " <source src=\"%s\" type=\"video/mp4\">\n",
" </video>\n", " </video>\n",
" \"\"\" % data_url))" " \"\"\" % data_url))\n",
"\n",
" return speech_probs"
] ]
}, },
{ {
@@ -216,7 +212,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"record_make_animation()" "speech_probs = record_make_animation()"
] ]
} }
], ],