mirror of
https://github.com/snakers4/silero-vad.git
synced 2026-02-04 09:29:22 +08:00
First commit
This commit is contained in:
52
silero-vad/.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
52
silero-vad/.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
---
|
||||
name: Bug report
|
||||
about: Create a report to help us improve
|
||||
title: Bug report - [X]
|
||||
labels: bug
|
||||
assignees: snakers4
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Bug
|
||||
|
||||
<!-- A clear and concise description of what the bug is. -->
|
||||
|
||||
## To Reproduce
|
||||
|
||||
Steps to reproduce the behavior:
|
||||
|
||||
1.
|
||||
2.
|
||||
3.
|
||||
|
||||
<!-- If you have a code sample, error messages, stack traces, please provide it here as well -->
|
||||
|
||||
## Expected behavior
|
||||
|
||||
<!-- A clear and concise description of what you expected to happen. -->
|
||||
|
||||
## Environment
|
||||
|
||||
Please copy and paste the output from this
|
||||
[environment collection script](https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py)
|
||||
(or fill out the checklist below manually).
|
||||
|
||||
You can get the script and run it with:
|
||||
```
|
||||
wget https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py
|
||||
# For security purposes, please check the contents of collect_env.py before running it.
|
||||
python collect_env.py
|
||||
```
|
||||
|
||||
- PyTorch Version (e.g., 1.0):
|
||||
- OS (e.g., Linux):
|
||||
- How you installed PyTorch (`conda`, `pip`, source):
|
||||
- Build command you used (if compiling from source):
|
||||
- Python version:
|
||||
- CUDA/cuDNN version:
|
||||
- GPU models and configuration:
|
||||
- Any other relevant information:
|
||||
|
||||
## Additional context
|
||||
|
||||
<!-- Add any other context about the problem here. -->
|
||||
27
silero-vad/.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
27
silero-vad/.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
---
|
||||
name: Feature request
|
||||
about: Suggest an idea for this project
|
||||
title: Feature request - [X]
|
||||
labels: enhancement
|
||||
assignees: snakers4
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Feature
|
||||
<!-- A clear and concise description of the feature proposal -->
|
||||
|
||||
## Motivation
|
||||
|
||||
<!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
|
||||
|
||||
## Pitch
|
||||
|
||||
<!-- A clear and concise description of what you want to happen. -->
|
||||
|
||||
## Alternatives
|
||||
|
||||
<!-- A clear and concise description of any alternative solutions or features you've considered, if any. -->
|
||||
|
||||
## Additional context
|
||||
|
||||
<!-- Add any other context or screenshots about the feature request here. -->
|
||||
12
silero-vad/.github/ISSUE_TEMPLATE/questions---help---support.md
vendored
Normal file
12
silero-vad/.github/ISSUE_TEMPLATE/questions---help---support.md
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
---
|
||||
name: Questions / Help / Support
|
||||
about: Ask for help, support or ask a question
|
||||
title: "❓ Questions / Help / Support"
|
||||
labels: help wanted
|
||||
assignees: snakers4
|
||||
|
||||
---
|
||||
|
||||
## ❓ Questions and Help
|
||||
|
||||
We have a [wiki](https://github.com/snakers4/silero-models/wiki) available for our users. Please make sure you have checked it out first.
|
||||
76
silero-vad/CODE_OF_CONDUCT.md
Normal file
76
silero-vad/CODE_OF_CONDUCT.md
Normal file
@@ -0,0 +1,76 @@
|
||||
# Contributor Covenant Code of Conduct
|
||||
|
||||
## Our Pledge
|
||||
|
||||
In the interest of fostering an open and welcoming environment, we as
|
||||
contributors and maintainers pledge to making participation in our project and
|
||||
our community a harassment-free experience for everyone, regardless of age, body
|
||||
size, disability, ethnicity, sex characteristics, gender identity and expression,
|
||||
level of experience, education, socio-economic status, nationality, personal
|
||||
appearance, race, religion, or sexual identity and orientation.
|
||||
|
||||
## Our Standards
|
||||
|
||||
Examples of behavior that contributes to creating a positive environment
|
||||
include:
|
||||
|
||||
* Using welcoming and inclusive language
|
||||
* Being respectful of differing viewpoints and experiences
|
||||
* Gracefully accepting constructive criticism
|
||||
* Focusing on what is best for the community
|
||||
* Showing empathy towards other community members
|
||||
|
||||
Examples of unacceptable behavior by participants include:
|
||||
|
||||
* The use of sexualized language or imagery and unwelcome sexual attention or
|
||||
advances
|
||||
* Trolling, insulting/derogatory comments, and personal or political attacks
|
||||
* Public or private harassment
|
||||
* Publishing others' private information, such as a physical or electronic
|
||||
address, without explicit permission
|
||||
* Other conduct which could reasonably be considered inappropriate in a
|
||||
professional setting
|
||||
|
||||
## Our Responsibilities
|
||||
|
||||
Project maintainers are responsible for clarifying the standards of acceptable
|
||||
behavior and are expected to take appropriate and fair corrective action in
|
||||
response to any instances of unacceptable behavior.
|
||||
|
||||
Project maintainers have the right and responsibility to remove, edit, or
|
||||
reject comments, commits, code, wiki edits, issues, and other contributions
|
||||
that are not aligned to this Code of Conduct, or to ban temporarily or
|
||||
permanently any contributor for other behaviors that they deem inappropriate,
|
||||
threatening, offensive, or harmful.
|
||||
|
||||
## Scope
|
||||
|
||||
This Code of Conduct applies both within project spaces and in public spaces
|
||||
when an individual is representing the project or its community. Examples of
|
||||
representing a project or community include using an official project e-mail
|
||||
address, posting via an official social media account, or acting as an appointed
|
||||
representative at an online or offline event. Representation of a project may be
|
||||
further defined and clarified by project maintainers.
|
||||
|
||||
## Enforcement
|
||||
|
||||
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
||||
reported by contacting the project team at aveysov@gmail.com. All
|
||||
complaints will be reviewed and investigated and will result in a response that
|
||||
is deemed necessary and appropriate to the circumstances. The project team is
|
||||
obligated to maintain confidentiality with regard to the reporter of an incident.
|
||||
Further details of specific enforcement policies may be posted separately.
|
||||
|
||||
Project maintainers who do not follow or enforce the Code of Conduct in good
|
||||
faith may face temporary or permanent repercussions as determined by other
|
||||
members of the project's leadership.
|
||||
|
||||
## Attribution
|
||||
|
||||
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
|
||||
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
|
||||
|
||||
[homepage]: https://www.contributor-covenant.org
|
||||
|
||||
For answers to common questions about this code of conduct, see
|
||||
https://www.contributor-covenant.org/faq
|
||||
154
silero-vad/README.md
Normal file
154
silero-vad/README.md
Normal file
@@ -0,0 +1,154 @@
|
||||
[](mailto:hello@silero.ai) [](https://t.me/joinchat/Bv9tjhpdXTI22OUgpOIIDg) [](https://github.com/snakers4/silero-models/blob/master/LICENSE)
|
||||
|
||||
[](https://pytorch.org/hub/snakers4_silero-models_stt/) [](https://tfhub.dev/silero/collections/silero-stt/1)
|
||||
|
||||
[](https://colab.research.google.com/github/snakers4/silero-models/blob/master/examples.ipynb)
|
||||
|
||||

|
||||
|
||||
- [Silero VAD](#silero-vad)
|
||||
- [Getting Started](#getting-started)
|
||||
- [PyTorch](#pytorch)
|
||||
- [ONNX](#onnx)
|
||||
- [Metrics](#metrics)
|
||||
- [Performance Metrics](#performance-metrics)
|
||||
- [Quality Metrics](#quality-metrics)
|
||||
- [Contact](#contact)
|
||||
- [Get in Touch](#get-in-touch)
|
||||
- [Commercial Inquiries](#commercial-inquiries)
|
||||
|
||||
|
||||
# Silero VAD
|
||||
|
||||
Silero VAD: pre-trained enterprise-grade Voice Activity and Number Detector.
|
||||
Enterprise-grade Speech Products made refreshingly simple (all see our [STT](https://github.com/snakers4/silero-models)).
|
||||
|
||||
Currently, there are hardly any high quality / modern / free / public voice activity detectors except for WebRTC Voice Activity Detector ([link](https://github.com/wiseman/py-webrtcvad)).
|
||||
|
||||
Also in enterprise it is crucial to be able to anonymize large-scale spoken corpora (i.e. remove personal data). Typically personal data is considered to be private / sensitive if it contains (i) a name (ii) some private ID. Name recognition is highly subjective and would depend on location, but Voice Activity and Number detections are quite general tasks.
|
||||
|
||||
**Key advantages:**
|
||||
|
||||
- Modern, portable;
|
||||
- Small memory footprint (?);
|
||||
- Trained on huge spoken corpora and noise / sound libraries;
|
||||
- Slower than WebRTC, but sufficiently fast for IOT / edge / mobile applications;
|
||||
|
||||
**Typical use cases:**
|
||||
|
||||
- Spoken corpora anonymization;
|
||||
- Voice detection for IOT / edge / mobile use cases;
|
||||
- Data cleaning and preparation, number and voice detection in general;
|
||||
|
||||
|
||||
Key features / differences:
|
||||
|
||||
## Getting Started
|
||||
|
||||
All of the provided models are listed in the [models.yml](https://github.com/snakers4/silero-models/blob/master/models.yml) file.
|
||||
Any meta-data and newer versions will be added there.
|
||||
|
||||
Currently we provide the following checkpoints:
|
||||
|
||||
| | PyTorch | ONNX | Quantization | Languages | Colab |
|
||||
|-----------------|--------------------|--------------------|--------------|---------|-------|
|
||||
| VAD v1 (vad_v1) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | `ru`, `en`, `de`, `es` | [](https://colab.research.google.com/github/snakers4/silero-models/blob/master/examples.ipynb) |
|
||||
|
||||
|
||||
### PyTorch
|
||||
|
||||
[](https://colab.research.google.com/github/snakers4/silero-models/blob/master/examples.ipynb)
|
||||
|
||||
[](https://pytorch.org/hub/snakers4_silero-models_stt/)
|
||||
|
||||
```python
|
||||
import torch
|
||||
import zipfile
|
||||
import torchaudio
|
||||
from glob import glob
|
||||
|
||||
device = torch.device('cpu') # gpu also works, but our models are fast enough for CPU
|
||||
model, decoder, utils = torch.hub.load(repo_or_dir='snakers4/silero-models',
|
||||
model='silero_stt',
|
||||
language='en', # also available 'de', 'es'
|
||||
device=device)
|
||||
(read_batch, split_into_batches,
|
||||
read_audio, prepare_model_input) = utils # see function signature for details
|
||||
|
||||
# download a single file, any format compatible with TorchAudio (soundfile backend)
|
||||
torch.hub.download_url_to_file('https://opus-codec.org/static/examples/samples/speech_orig.wav',
|
||||
dst ='speech_orig.wav', progress=True)
|
||||
test_files = glob('speech_orig.wav')
|
||||
batches = split_into_batches(test_files, batch_size=10)
|
||||
input = prepare_model_input(read_batch(batches[0]),
|
||||
device=device)
|
||||
|
||||
output = model(input)
|
||||
for example in output:
|
||||
print(decoder(example.cpu()))
|
||||
```
|
||||
|
||||
### ONNX
|
||||
|
||||
[](https://colab.research.google.com/github/snakers4/silero-models/blob/master/examples.ipynb)
|
||||
|
||||
You can run our model everywhere, where you can import the ONNX model or run ONNX runtime.
|
||||
|
||||
```python
|
||||
import onnx
|
||||
import torch
|
||||
import onnxruntime
|
||||
from omegaconf import OmegaConf
|
||||
|
||||
language = 'en' # also available 'de', 'es'
|
||||
|
||||
# load provided utils
|
||||
_, decoder, utils = torch.hub.load(repo_or_dir='snakers4/silero-models', model='silero_stt', language=language)
|
||||
(read_batch, split_into_batches,
|
||||
read_audio, prepare_model_input) = utils
|
||||
|
||||
# see available models
|
||||
torch.hub.download_url_to_file('https://raw.githubusercontent.com/snakers4/silero-models/master/models.yml', 'models.yml')
|
||||
models = OmegaConf.load('models.yml')
|
||||
available_languages = list(models.stt_models.keys())
|
||||
assert language in available_languages
|
||||
|
||||
# load the actual ONNX model
|
||||
torch.hub.download_url_to_file(models.stt_models.en.latest.onnx, 'model.onnx', progress=True)
|
||||
onnx_model = onnx.load('model.onnx')
|
||||
onnx.checker.check_model(onnx_model)
|
||||
ort_session = onnxruntime.InferenceSession('model.onnx')
|
||||
|
||||
# download a single file, any format compatible with TorchAudio (soundfile backend)
|
||||
torch.hub.download_url_to_file('https://opus-codec.org/static/examples/samples/speech_orig.wav', dst ='speech_orig.wav', progress=True)
|
||||
test_files = ['speech_orig.wav']
|
||||
batches = split_into_batches(test_files, batch_size=10)
|
||||
input = prepare_model_input(read_batch(batches[0]))
|
||||
|
||||
# actual onnx inference and decoding
|
||||
onnx_input = input.detach().cpu().numpy()
|
||||
ort_inputs = {'input': onnx_input}
|
||||
ort_outs = ort_session.run(None, ort_inputs)
|
||||
decoded = decoder(torch.Tensor(ort_outs[0])[0])
|
||||
print(decoded)
|
||||
```
|
||||
|
||||
## Metrics
|
||||
|
||||
### Performance Metrics
|
||||
|
||||
Speed metrics here.
|
||||
|
||||
### Quality Metrics
|
||||
|
||||
Quality metrics here.
|
||||
|
||||
## Contact
|
||||
|
||||
### Get in Touch
|
||||
|
||||
Try our models, create an [issue](https://github.com/snakers4/silero-models/issues/new), join our [chat](https://t.me/joinchat/Bv9tjhpdXTI22OUgpOIIDg), [email](mailto:hello@silero.ai) us.
|
||||
|
||||
### Commercial Inquiries
|
||||
|
||||
Please see our [wiki](https://github.com/snakers4/silero-models/wiki) and [tiers](https://github.com/snakers4/silero-models/wiki/Licensing-and-Tiers) for relevant information and [email](mailto:hello@silero.ai) us.
|
||||
BIN
silero-vad/files/silero_logo.jpg
Normal file
BIN
silero-vad/files/silero_logo.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 23 KiB |
28
silero-vad/hubconf.py
Normal file
28
silero-vad/hubconf.py
Normal file
@@ -0,0 +1,28 @@
|
||||
dependencies = ['torch', 'omegaconf', 'torchaudio']
|
||||
import torch
|
||||
from omegaconf import OmegaConf
|
||||
from utils import (init_jit_model,
|
||||
read_audio,
|
||||
read_batch,
|
||||
split_into_batches,
|
||||
prepare_model_input)
|
||||
|
||||
|
||||
def silero_stt(**kwargs):
|
||||
"""Silero Voice Activity and Number Detector Models
|
||||
Returns a model and a set of utils
|
||||
Please see https://github.com/snakers4/silero-vad for usage examples
|
||||
"""
|
||||
torch.hub.download_url_to_file('https://raw.githubusercontent.com/snakers4/silero-vad/master/models.yml',
|
||||
'silero_vad_models.yml',
|
||||
progress=False)
|
||||
models = OmegaConf.load('silero_vad_models.yml')
|
||||
|
||||
model = init_jit_model(model_url=models.latest.jit,
|
||||
**kwargs)
|
||||
utils = (read_batch,
|
||||
split_into_batches,
|
||||
read_audio,
|
||||
prepare_model_input)
|
||||
|
||||
return model, utils
|
||||
14
silero-vad/models.yml
Normal file
14
silero-vad/models.yml
Normal file
@@ -0,0 +1,14 @@
|
||||
# Pre-trained Voice Activity Detector and Number Detector
|
||||
stt_models:
|
||||
latest:
|
||||
meta:
|
||||
name: "vad_v1"
|
||||
languages: ['ru', 'en', 'de', 'es']
|
||||
samples:
|
||||
en: ""
|
||||
de: ""
|
||||
es: ""
|
||||
ru: ""
|
||||
jit: "https://silero-models.ams3.cdn.digitaloceanspaces.com/models/vad/vad_v1_jit.model"
|
||||
jit_q: "https://silero-models.ams3.cdn.digitaloceanspaces.com/models/vad/vad_v1_jit_q.model"
|
||||
onnx: "https://silero-models.ams3.cdn.digitaloceanspaces.com/models/vad/vad_v1.onnx"
|
||||
60
silero-vad/utils.py
Normal file
60
silero-vad/utils.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import torch
|
||||
import tempfile
|
||||
import torchaudio
|
||||
from typing import List
|
||||
|
||||
torchaudio.set_audio_backend("soundfile") # switch backend
|
||||
|
||||
|
||||
def read_batch(audio_paths: List[str]):
|
||||
return [read_audio(audio_path)
|
||||
for audio_path
|
||||
in audio_paths]
|
||||
|
||||
|
||||
def split_into_batches(lst: List[str],
|
||||
batch_size: int = 10):
|
||||
return [lst[i:i + batch_size]
|
||||
for i in
|
||||
range(0, len(lst), batch_size)]
|
||||
|
||||
|
||||
def read_audio(path: str,
|
||||
target_sr: int = 16000):
|
||||
|
||||
assert torchaudio.get_audio_backend() == 'soundfile'
|
||||
wav, sr = torchaudio.load(path)
|
||||
|
||||
if wav.size(0) > 1:
|
||||
wav = wav.mean(dim=0, keepdim=True)
|
||||
|
||||
if sr != target_sr:
|
||||
transform = torchaudio.transforms.Resample(orig_freq=sr,
|
||||
new_freq=target_sr)
|
||||
wav = transform(wav)
|
||||
sr = target_sr
|
||||
|
||||
assert sr == target_sr
|
||||
return wav.squeeze(0)
|
||||
|
||||
|
||||
def prepare_model_input(batch: List[torch.Tensor],
|
||||
device=torch.device('cpu')):
|
||||
max_seqlength = max(max([len(_) for _ in batch]), 12800)
|
||||
inputs = torch.zeros(len(batch), max_seqlength)
|
||||
for i, wav in enumerate(batch):
|
||||
inputs[i, :len(wav)].copy_(wav)
|
||||
inputs = inputs.to(device)
|
||||
return inputs
|
||||
|
||||
|
||||
def init_jit_model(model_url: str,
|
||||
device: torch.device = torch.device('cpu')):
|
||||
torch.set_grad_enabled(False)
|
||||
with tempfile.NamedTemporaryFile('wb', suffix='.model') as f:
|
||||
torch.hub.download_url_to_file(model_url,
|
||||
f.name,
|
||||
progress=True)
|
||||
model = torch.jit.load(f.name, map_location=device)
|
||||
model.eval()
|
||||
return model
|
||||
Reference in New Issue
Block a user