mirror of
https://github.com/snakers4/silero-vad.git
synced 2026-02-05 18:09:22 +08:00
@@ -8,6 +8,8 @@ Currently, the notebook consits of two examples:
|
|||||||
- One that records audio of a predefined length from the microphone, process it with Silero-VAD, and plots it afterwards.
|
- One that records audio of a predefined length from the microphone, process it with Silero-VAD, and plots it afterwards.
|
||||||
- The other one plots the speech probabilities in real-time (using jupyterplot) and records the audio until you press enter.
|
- The other one plots the speech probabilities in real-time (using jupyterplot) and records the audio until you press enter.
|
||||||
|
|
||||||
|
This example does not work in google colab! For local usage only.
|
||||||
|
|
||||||
## Example Video for the Real-Time Visualization
|
## Example Video for the Real-Time Visualization
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "62a0cccb",
|
"id": "76aa55ba",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Pyaudio Microphone Streaming Examples\n",
|
"# Pyaudio Microphone Streaming Examples\n",
|
||||||
@@ -12,12 +12,14 @@
|
|||||||
"I created it as an example on how binary data from a stream could be feed into Silero VAD.\n",
|
"I created it as an example on how binary data from a stream could be feed into Silero VAD.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Has been tested on Ubuntu 21.04 (x86). After you installed the dependencies below, no additional setup is required."
|
"Has been tested on Ubuntu 21.04 (x86). After you installed the dependencies below, no additional setup is required.\n",
|
||||||
|
"\n",
|
||||||
|
"This notebook does not work in google colab! For local usage only."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "64cbe1eb",
|
"id": "4a4e15c2",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Dependencies\n",
|
"## Dependencies\n",
|
||||||
@@ -26,22 +28,27 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 1,
|
||||||
"id": "57bc2aac",
|
"id": "24205cce",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-10-09T08:47:34.056898Z",
|
||||||
|
"start_time": "2024-10-09T08:47:34.053418Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"#!pip install numpy==2.0.2\n",
|
"#!pip install numpy>=1.24.0\n",
|
||||||
"#!pip install torch==2.4.1\n",
|
"#!pip install torch>=1.12.0\n",
|
||||||
"#!pip install matplotlib==3.9.2\n",
|
"#!pip install matplotlib>=3.6.0\n",
|
||||||
"#!pip install torchaudio==2.4.1\n",
|
"#!pip install torchaudio>=0.12.0\n",
|
||||||
"#!pip install soundfile==0.12.1\n",
|
"#!pip install soundfile==0.12.1\n",
|
||||||
"#!pip install pyaudio==0.2.11"
|
"#!apt install python3-pyaudio (linux) or pip install pyaudio (windows)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "110de761",
|
"id": "cd22818f",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Imports"
|
"## Imports"
|
||||||
@@ -49,10 +56,27 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 2,
|
||||||
"id": "5a647d8d",
|
"id": "994d7f3a",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
"outputs": [],
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-10-09T08:47:39.005032Z",
|
||||||
|
"start_time": "2024-10-09T08:47:36.489952Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"ename": "ModuleNotFoundError",
|
||||||
|
"evalue": "No module named 'pyaudio'",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
|
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
||||||
|
"Cell \u001b[0;32mIn[2], line 8\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpylab\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[0;32m----> 8\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpyaudio\u001b[39;00m\n",
|
||||||
|
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pyaudio'"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"import io\n",
|
"import io\n",
|
||||||
"import numpy as np\n",
|
"import numpy as np\n",
|
||||||
@@ -67,7 +91,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "725d7066",
|
"id": "ac5c52f7",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -79,7 +103,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "1c0b2ea7",
|
"id": "ad5919dc",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -92,7 +116,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "f9112603",
|
"id": "784d1ab6",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Helper Methods"
|
"### Helper Methods"
|
||||||
@@ -101,7 +125,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "5abc6330",
|
"id": "af4bca64",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -124,7 +148,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "5124095e",
|
"id": "ca13e514",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Pyaudio Set-up"
|
"## Pyaudio Set-up"
|
||||||
@@ -133,7 +157,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "a845356e",
|
"id": "75f99022",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -147,7 +171,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "0b910c99",
|
"id": "4da7d2ef",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Simple Example\n",
|
"## Simple Example\n",
|
||||||
@@ -157,7 +181,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "9d3d2c10",
|
"id": "6fe77661",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -167,7 +191,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "3cb44a4a",
|
"id": "23f4da3e",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -207,7 +231,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "a3dda982",
|
"id": "fd243e8f",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Real Time Visualization\n",
|
"## Real Time Visualization\n",
|
||||||
@@ -220,7 +244,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "05ef4100",
|
"id": "d36980c2",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -230,7 +254,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "d1d4cdd6",
|
"id": "5607b616",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -287,7 +311,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "1e398009",
|
"id": "dc4f0108",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -311,7 +335,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.9.10"
|
"version": "3.10.14"
|
||||||
},
|
},
|
||||||
"toc": {
|
"toc": {
|
||||||
"base_numbering": 1,
|
"base_numbering": 1,
|
||||||
|
|||||||
Reference in New Issue
Block a user