From fa8036ae1c901c865eb82eb5235466c35c804e89 Mon Sep 17 00:00:00 2001 From: adamnsandle Date: Tue, 24 Sep 2024 12:01:47 +0000 Subject: [PATCH] fx old examples --- examples/colab_record_example.ipynb | 50 +++++++++++++---------------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/examples/colab_record_example.ipynb b/examples/colab_record_example.ipynb index 9b11d5e..4de7e26 100644 --- a/examples/colab_record_example.ipynb +++ b/examples/colab_record_example.ipynb @@ -17,6 +17,7 @@ }, "outputs": [], "source": [ + "#!apt install ffmpeg\n", "!pip -q install pydub\n", "from google.colab import output\n", "from base64 import b64decode, b64encode\n", @@ -37,13 +38,12 @@ " model='silero_vad',\n", " force_reload=True)\n", "\n", - "def int2float(sound):\n", - " abs_max = np.abs(sound).max()\n", - " sound = sound.astype('float32')\n", - " if abs_max > 0:\n", - " sound *= 1/32768\n", - " sound = sound.squeeze()\n", - " return sound\n", + "def int2float(audio):\n", + " samples = audio.get_array_of_samples()\n", + " new_sound = audio._spawn(samples)\n", + " arr = np.array(samples).astype(np.float32)\n", + " arr = arr / np.abs(arr).max()\n", + " return arr\n", "\n", "AUDIO_HTML = \"\"\"\n", "\n", "\"\"\"\n", "\n", @@ -133,8 +133,8 @@ " audio.export('test.mp3', format='mp3')\n", " audio = audio.set_channels(1)\n", " audio = audio.set_frame_rate(16000)\n", - " audio_float = int2float(np.array(audio.get_array_of_samples()))\n", - " audio_tens = torch.tensor(audio_float )\n", + " audio_float = int2float(audio)\n", + " audio_tens = torch.tensor(audio_float)\n", " return audio_tens\n", "\n", "def make_animation(probs, audio_duration, interval=40):\n", @@ -154,19 +154,18 @@ " def animate(i):\n", " x = i * interval / 1000 - 0.04\n", " y = np.linspace(0, 1.02, 2)\n", - " \n", + "\n", " line.set_data(x, y)\n", " line.set_color('#990000')\n", " return line,\n", + " anim = FuncAnimation(fig, animate, init_func=init, interval=interval, save_count=int(audio_duration / (interval / 1000)))\n", "\n", - " anim = FuncAnimation(fig, animate, init_func=init, interval=interval, save_count=audio_duration / (interval / 1000))\n", - "\n", - " f = r\"animation.mp4\" \n", - " writervideo = FFMpegWriter(fps=1000/interval) \n", + " f = r\"animation.mp4\"\n", + " writervideo = FFMpegWriter(fps=1000/interval)\n", " anim.save(f, writer=writervideo)\n", " plt.close('all')\n", "\n", - "def combine_audio(vidname, audname, outname, fps=25): \n", + "def combine_audio(vidname, audname, outname, fps=25):\n", " my_clip = mpe.VideoFileClip(vidname, verbose=False)\n", " audio_background = mpe.AudioFileClip(audname)\n", " final_clip = my_clip.set_audio(audio_background)\n", @@ -174,15 +173,10 @@ "\n", "def record_make_animation():\n", " tensor = record()\n", - "\n", " print('Calculating probabilities...')\n", " speech_probs = []\n", " window_size_samples = 512\n", - " for i in range(0, len(tensor), window_size_samples):\n", - " if len(tensor[i: i+ window_size_samples]) < window_size_samples:\n", - " break\n", - " speech_prob = model(tensor[i: i+ window_size_samples], 16000).item()\n", - " speech_probs.append(speech_prob)\n", + " speech_probs = model.audio_forward(tensor, sr=16000)[0].tolist()\n", " model.reset_states()\n", " print('Making animation...')\n", " make_animation(speech_probs, len(tensor) / 16000)\n", @@ -196,7 +190,9 @@ " \n", - " \"\"\" % data_url))" + " \"\"\" % data_url))\n", + "\n", + " return speech_probs" ] }, { @@ -216,7 +212,7 @@ }, "outputs": [], "source": [ - "record_make_animation()" + "speech_probs = record_make_animation()" ] } ],