Update pyproject.toml

Merge pull request #549 from snakers4/adamnsandle
2026-02-04 17:39:22 +08:00 · 2024-10-09 12:49:27 +03:00 · 2024-10-09 12:47:08 +03:00 · 2024-10-09 12:32:09 +03:00 · 2024-10-09 09:26:39 +00:00 · 2024-10-09 08:49:39 +00:00
6 changed files with 123 additions and 78 deletions
--- a/README.md
+++ b/README.md
@@ -31,19 +31,30 @@ https://user-images.githubusercontent.com/36505480/144874384-95f80f6d-a4f1-42cc-
 <details>
 <summary>Dependencies</summary>
  **Silero VAD uses torchaudio library for audio file I/O functionalities, which are torchaudio.info, torchaudio.load, and torchaudio.save, so a proper audio backend is required:**
  - Option №1 - [**FFmpeg**](https://www.ffmpeg.org/) backend. `conda install -c conda-forge 'ffmpeg<7'`
  - Option №2 - [**sox_io**](https://pypi.org/project/sox/) backend. `apt-get install sox`, TorchAudio is tested on libsox 14.4.2.
  - Option №3 - [**soundfile**](https://pypi.org/project/soundfile/) backend. `pip install soundfile`
-  **Additional dependencies:**
+  System requirements to run python examples on `x86-64` systems:
  - **torch>=1.12.0**
  - **torchaudio>=0.12.0** (for I/O functionalities only)
  - **onnxruntime>=1.16.1** (for ONNX model usage)
  - `python 3.8+`;
  - 1G+ RAM;
  - A modern CPU with AVX, AVX2, AVX-512 or AMX instruction sets.
  Dependencies:
  - `torch>=1.12.0`;
  - `torchaudio>=0.12.0` (for I/O only);
  - `onnxruntime>=1.16.1` (for ONNX model usage).
  Silero VAD uses torchaudio library for audio I/O (`torchaudio.info`, `torchaudio.load`, and `torchaudio.save`), so a proper audio backend is required:
  - Option №1 - [**FFmpeg**](https://www.ffmpeg.org/) backend. `conda install -c conda-forge 'ffmpeg<7'`;
  - Option №2 - [**sox_io**](https://pypi.org/project/sox/) backend. `apt-get install sox`, TorchAudio is tested on libsox 14.4.2;
  - Option №3 - [**soundfile**](https://pypi.org/project/soundfile/) backend. `pip install soundfile`.
 If you are planning to run the VAD using solely the `onnx-runtime`, it will run on any other system architectures where onnx-runtume is [supported](https://onnxruntime.ai/getting-started). In this case please note that:
 - You will have to implement the I/O;
 - You will have to adapt the existing wrappers / examples / post-processing for your use-case.
 </details>
 **Using pip**:
--- a/examples/colab_record_example.ipynb
+++ b/examples/colab_record_example.ipynb
@@ -17,6 +17,7 @@
   },
   "outputs": [],
   "source": [
    "#!apt install ffmpeg\n",
    "!pip -q install pydub\n",
    "from google.colab import output\n",
    "from base64 import b64decode, b64encode\n",
@@ -37,13 +38,12 @@
    "                              model='silero_vad',\n",
    "                              force_reload=True)\n",
    "\n",
-    "def int2float(sound):\n",
+    "def int2float(audio):\n",
-    "    abs_max = np.abs(sound).max()\n",
+    "    samples = audio.get_array_of_samples()\n",
-    "    sound = sound.astype('float32')\n",
+    "    new_sound = audio._spawn(samples)\n",
-    "    if abs_max > 0:\n",
+    "    arr = np.array(samples).astype(np.float32)\n",
-    "        sound *= 1/32768\n",
+    "    arr = arr / np.abs(arr).max()\n",
-    "    sound = sound.squeeze()\n",
+    "    return arr\n",
    "    return sound\n",
    "\n",
    "AUDIO_HTML = \"\"\"\n",
    "<script>\n",
@@ -68,10 +68,10 @@
    "    //bitsPerSecond: 8000, //chrome seems to ignore, always 48k\n",
    "    mimeType : 'audio/webm;codecs=opus'\n",
    "    //mimeType : 'audio/webm;codecs=pcm'\n",
-    "  };            \n",
+    "  };\n",
    "  //recorder = new MediaRecorder(stream, options);\n",
    "  recorder = new MediaRecorder(stream);\n",
-    "  recorder.ondataavailable = function(e) {            \n",
+    "  recorder.ondataavailable = function(e) {\n",
    "    var url = URL.createObjectURL(e.data);\n",
    "    // var preview = document.createElement('audio');\n",
    "    // preview.controls = true;\n",
@@ -79,7 +79,7 @@
    "    // document.body.appendChild(preview);\n",
    "\n",
    "    reader = new FileReader();\n",
-    "    reader.readAsDataURL(e.data); \n",
+    "    reader.readAsDataURL(e.data);\n",
    "    reader.onloadend = function() {\n",
    "      base64data = reader.result;\n",
    "      //console.log(\"Inside FileReader:\" + base64data);\n",
@@ -121,7 +121,7 @@
    "\n",
    "}\n",
    "});\n",
-    "      \n",
+    "\n",
    "</script>\n",
    "\"\"\"\n",
    "\n",
@@ -133,8 +133,8 @@
    "    audio.export('test.mp3', format='mp3')\n",
    "    audio = audio.set_channels(1)\n",
    "    audio = audio.set_frame_rate(16000)\n",
-    "    audio_float = int2float(np.array(audio.get_array_of_samples()))\n",
+    "    audio_float = int2float(audio)\n",
-    "    audio_tens = torch.tensor(audio_float )\n",
+    "    audio_tens = torch.tensor(audio_float)\n",
    "    return audio_tens\n",
    "\n",
    "def make_animation(probs, audio_duration, interval=40):\n",
@@ -154,19 +154,18 @@
    "    def animate(i):\n",
    "        x = i * interval / 1000 - 0.04\n",
    "        y = np.linspace(0, 1.02, 2)\n",
-    "        \n",
+    "\n",
    "        line.set_data(x, y)\n",
    "        line.set_color('#990000')\n",
    "        return line,\n",
    "    anim = FuncAnimation(fig, animate, init_func=init, interval=interval, save_count=int(audio_duration / (interval / 1000)))\n",
    "\n",
-    "    anim = FuncAnimation(fig, animate, init_func=init, interval=interval, save_count=audio_duration / (interval / 1000))\n",
+    "    f = r\"animation.mp4\"\n",
-    "\n",
+    "    writervideo = FFMpegWriter(fps=1000/interval)\n",
    "    f = r\"animation.mp4\" \n",
    "    writervideo = FFMpegWriter(fps=1000/interval) \n",
    "    anim.save(f, writer=writervideo)\n",
    "    plt.close('all')\n",
    "\n",
-    "def combine_audio(vidname, audname, outname, fps=25): \n",
+    "def combine_audio(vidname, audname, outname, fps=25):\n",
    "    my_clip = mpe.VideoFileClip(vidname, verbose=False)\n",
    "    audio_background = mpe.AudioFileClip(audname)\n",
    "    final_clip = my_clip.set_audio(audio_background)\n",
@@ -174,15 +173,10 @@
    "\n",
    "def record_make_animation():\n",
    "  tensor = record()\n",
    "\n",
    "  print('Calculating probabilities...')\n",
    "  speech_probs = []\n",
    "  window_size_samples = 512\n",
-    "  for i in range(0, len(tensor), window_size_samples):\n",
+    "  speech_probs = model.audio_forward(tensor, sr=16000)[0].tolist()\n",
    "      if len(tensor[i: i+ window_size_samples]) < window_size_samples:\n",
    "        break\n",
    "      speech_prob = model(tensor[i: i+ window_size_samples], 16000).item()\n",
    "      speech_probs.append(speech_prob)\n",
    "  model.reset_states()\n",
    "  print('Making animation...')\n",
    "  make_animation(speech_probs, len(tensor) / 16000)\n",
@@ -196,7 +190,9 @@
    "  <video width=800 controls>\n",
    "        <source src=\"%s\" type=\"video/mp4\">\n",
    "  </video>\n",
-    "  \"\"\" % data_url))"
+    "  \"\"\" % data_url))\n",
    "\n",
    "  return speech_probs"
   ]
  },
  {
@@ -216,7 +212,7 @@
   },
   "outputs": [],
   "source": [
-    "record_make_animation()"
+    "speech_probs = record_make_animation()"
   ]
  }
 ],
--- a/examples/parallel_example.ipynb
+++ b/examples/parallel_example.ipynb
@@ -1,7 +1,6 @@
 {
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
@@ -18,17 +17,19 @@
    "SAMPLING_RATE = 16000\n",
    "import torch\n",
    "from pprint import pprint\n",
    "import time\n",
    "import shutil\n",
    "\n",
    "torch.set_num_threads(1)\n",
    "NUM_PROCESS=4 # set to the number of CPU cores in the machine\n",
    "NUM_COPIES=8\n",
    "# download wav files, make multiple copies\n",
-    "for idx in range(NUM_COPIES):\n",
+    "torch.hub.download_url_to_file('https://models.silero.ai/vad_models/en.wav', f\"en_example0.wav\")\n",
-    "    torch.hub.download_url_to_file('https://models.silero.ai/vad_models/en.wav', f\"en_example{idx}.wav\")\n"
+    "for idx in range(NUM_COPIES-1):\n",
    "    shutil.copy(f\"en_example0.wav\", f\"en_example{idx+1}.wav\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
@@ -54,7 +55,6 @@
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
@@ -99,7 +99,6 @@
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
@@ -127,7 +126,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "diarization",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
@@ -141,7 +140,20 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.15"
+   "version": "3.10.14"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
--- a/examples/pyaudio-streaming/README.md
+++ b/examples/pyaudio-streaming/README.md
@@ -7,6 +7,8 @@ It has been designed as a low-level example for binary real-time streaming using
 Currently, the notebook consits of two examples:
 - One that records audio of a predefined length from the microphone, process it with Silero-VAD, and plots it afterwards.
 - The other one plots the speech probabilities in real-time (using jupyterplot) and records the audio until you press enter.
 This example does not work in google colab! For local usage only.
 ## Example Video for the Real-Time Visualization
--- a/examples/pyaudio-streaming/pyaudio-streaming-examples.ipynb
+++ b/examples/pyaudio-streaming/pyaudio-streaming-examples.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "markdown",
-   "id": "62a0cccb",
+   "id": "76aa55ba",
   "metadata": {},
   "source": [
    "# Pyaudio Microphone Streaming Examples\n",
@@ -12,12 +12,14 @@
    "I created it as an example on how binary data from a stream could be feed into Silero VAD.\n",
    "\n",
    "\n",
-    "Has been tested on Ubuntu 21.04 (x86). After you installed the dependencies below, no additional setup is required."
+    "Has been tested on Ubuntu 21.04 (x86). After you installed the dependencies below, no additional setup is required.\n",
    "\n",
    "This notebook does not work in google colab! For local usage only."
   ]
  },
  {
   "cell_type": "markdown",
-   "id": "64cbe1eb",
+   "id": "4a4e15c2",
   "metadata": {},
   "source": [
    "## Dependencies\n",
@@ -26,22 +28,27 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
-   "id": "57bc2aac",
+   "id": "24205cce",
-   "metadata": {},
+   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-10-09T08:47:34.056898Z",
     "start_time": "2024-10-09T08:47:34.053418Z"
    }
   },
   "outputs": [],
   "source": [
-    "#!pip install numpy==2.0.2\n",
+    "#!pip install numpy>=1.24.0\n",
-    "#!pip install torch==2.4.1\n",
+    "#!pip install torch>=1.12.0\n",
-    "#!pip install matplotlib==3.9.2\n",
+    "#!pip install matplotlib>=3.6.0\n",
-    "#!pip install torchaudio==2.4.1\n",
+    "#!pip install torchaudio>=0.12.0\n",
    "#!pip install soundfile==0.12.1\n",
-    "#!pip install pyaudio==0.2.11"
+    "#!apt install python3-pyaudio (linux) or pip install pyaudio (windows)"
   ]
  },
  {
   "cell_type": "markdown",
-   "id": "110de761",
+   "id": "cd22818f",
   "metadata": {},
   "source": [
    "## Imports"
@@ -49,10 +56,27 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
-   "id": "5a647d8d",
+   "id": "994d7f3a",
-   "metadata": {},
+   "metadata": {
-   "outputs": [],
+    "ExecuteTime": {
     "end_time": "2024-10-09T08:47:39.005032Z",
     "start_time": "2024-10-09T08:47:36.489952Z"
    }
   },
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'pyaudio'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[2], line 8\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\n\u001b[1;32m      7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpylab\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[0;32m----> 8\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpyaudio\u001b[39;00m\n",
      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pyaudio'"
     ]
    }
   ],
   "source": [
    "import io\n",
    "import numpy as np\n",
@@ -67,7 +91,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "725d7066",
+   "id": "ac5c52f7",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -79,7 +103,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "1c0b2ea7",
+   "id": "ad5919dc",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -92,7 +116,7 @@
  },
  {
   "cell_type": "markdown",
-   "id": "f9112603",
+   "id": "784d1ab6",
   "metadata": {},
   "source": [
    "### Helper Methods"
@@ -101,7 +125,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "5abc6330",
+   "id": "af4bca64",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -124,7 +148,7 @@
  },
  {
   "cell_type": "markdown",
-   "id": "5124095e",
+   "id": "ca13e514",
   "metadata": {},
   "source": [
    "## Pyaudio Set-up"
@@ -133,7 +157,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "a845356e",
+   "id": "75f99022",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -147,7 +171,7 @@
  },
  {
   "cell_type": "markdown",
-   "id": "0b910c99",
+   "id": "4da7d2ef",
   "metadata": {},
   "source": [
    "## Simple Example\n",
@@ -157,7 +181,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "9d3d2c10",
+   "id": "6fe77661",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -167,7 +191,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "3cb44a4a",
+   "id": "23f4da3e",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -207,7 +231,7 @@
  },
  {
   "cell_type": "markdown",
-   "id": "a3dda982",
+   "id": "fd243e8f",
   "metadata": {},
   "source": [
    "## Real Time Visualization\n",
@@ -220,7 +244,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "05ef4100",
+   "id": "d36980c2",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -230,7 +254,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "d1d4cdd6",
+   "id": "5607b616",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -287,7 +311,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "1e398009",
+   "id": "dc4f0108",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -311,7 +335,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.10"
+   "version": "3.10.14"
  },
  "toc": {
   "base_numbering": 1,
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,7 +3,7 @@ requires = ["hatchling"]
 build-backend = "hatchling.build"
 [project]
 name = "silero-vad"
-version = "5.1"
+version = "5.1.2"
 authors = [
  {name="Silero Team", email="hello@silero.ai"},
 ]
@@ -32,4 +32,4 @@ dependencies = [
 [project.urls]
 Homepage = "https://github.com/snakers4/silero-vad"
-Issues = "https://github.com/snakers4/silero-vad/issues"
+Issues = "https://github.com/snakers4/silero-vad/issues"
Author	SHA1	Message	Date
Alexander Veysov	6478567951	Update pyproject.toml	2024-10-09 12:49:27 +03:00
Alexander Veysov	35d601adc6	Update pyproject.toml	2024-10-09 12:47:08 +03:00
Dimitrii Voronin	032ca21a70	Merge pull request #549 from snakers4/adamnsandle Adamnsandle	2024-10-09 12:32:09 +03:00
adamnsandle	001d57d6ff	fx dependencies	2024-10-09 09:26:39 +00:00
adamnsandle	6e6da04e7a	fix pyaudio streaming example	2024-10-09 08:49:39 +00:00
Alexander Veysov	9c1eff9169	Delete files/real_time_example.mp4	2024-10-09 10:10:03 +03:00
Alexander Veysov	36b759d053	Add files via upload	2024-10-09 10:02:04 +03:00
Dimitrii Voronin	1a7499607a	Merge pull request #543 from snakers4/adamnsandle Adamnsandle	2024-09-24 15:19:30 +03:00
Alexander Veysov	87451b059f	Update README.md	2024-09-24 15:16:18 +03:00
Alexander Veysov	becc7770c7	Update README.md	2024-09-24 15:15:10 +03:00
Alexander Veysov	3f2eff0303	Merge pull request #542 from snakers4/snakers4-patch-1 Update README.md	2024-09-24 15:14:18 +03:00
Alexander Veysov	3a25110cf9	Update README.md	2024-09-24 15:13:34 +03:00
adamnsandle	d23867da10	fx parallel example	2024-09-24 12:03:07 +00:00
adamnsandle	2043282182	Merge branch 'master' of github.com:snakers4/silero-vad into adamnsandle	2024-09-24 12:02:00 +00:00
adamnsandle	fa8036ae1c	fx old examples	2024-09-24 12:01:47 +00:00
Dimitrii Voronin	2fff4b8ce8	Merge pull request #541 from snakers4/adamnsandle-1 Update README.md	2024-09-24 14:48:51 +03:00
Dimitrii Voronin	64b863d2ff	Update README.md	2024-09-24 14:48:35 +03:00
Dimitrii Voronin	8a3600665b	Merge pull request #540 from snakers4/adamnsandle-patch-2 Update README.md	2024-09-24 13:45:31 +03:00