commit 56860ee673881e211ff2bda6f560cf9bc496575e Author: snakers41 Date: Mon Nov 23 10:28:37 2020 +0000 First commit diff --git a/silero-vad/.github/ISSUE_TEMPLATE/bug_report.md b/silero-vad/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..af3202d --- /dev/null +++ b/silero-vad/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,52 @@ +--- +name: Bug report +about: Create a report to help us improve +title: Bug report - [X] +labels: bug +assignees: snakers4 + +--- + +## 🐛 Bug + + + +## To Reproduce + +Steps to reproduce the behavior: + +1. +2. +3. + + + +## Expected behavior + + + +## Environment + +Please copy and paste the output from this +[environment collection script](https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py) +(or fill out the checklist below manually). + +You can get the script and run it with: +``` +wget https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py +# For security purposes, please check the contents of collect_env.py before running it. +python collect_env.py +``` + + - PyTorch Version (e.g., 1.0): + - OS (e.g., Linux): + - How you installed PyTorch (`conda`, `pip`, source): + - Build command you used (if compiling from source): + - Python version: + - CUDA/cuDNN version: + - GPU models and configuration: + - Any other relevant information: + +## Additional context + + diff --git a/silero-vad/.github/ISSUE_TEMPLATE/feature_request.md b/silero-vad/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..bfe42a2 --- /dev/null +++ b/silero-vad/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,27 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: Feature request - [X] +labels: enhancement +assignees: snakers4 + +--- + +## 🚀 Feature + + +## Motivation + + + +## Pitch + + + +## Alternatives + + + +## Additional context + + diff --git a/silero-vad/.github/ISSUE_TEMPLATE/questions---help---support.md b/silero-vad/.github/ISSUE_TEMPLATE/questions---help---support.md new file mode 100644 index 0000000..1eed38e --- /dev/null +++ b/silero-vad/.github/ISSUE_TEMPLATE/questions---help---support.md @@ -0,0 +1,12 @@ +--- +name: Questions / Help / Support +about: Ask for help, support or ask a question +title: "❓ Questions / Help / Support" +labels: help wanted +assignees: snakers4 + +--- + +## ❓ Questions and Help + +We have a [wiki](https://github.com/snakers4/silero-models/wiki) available for our users. Please make sure you have checked it out first. diff --git a/silero-vad/CODE_OF_CONDUCT.md b/silero-vad/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..c69125e --- /dev/null +++ b/silero-vad/CODE_OF_CONDUCT.md @@ -0,0 +1,76 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at aveysov@gmail.com. All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq diff --git a/silero-vad/README.md b/silero-vad/README.md new file mode 100644 index 0000000..eac8179 --- /dev/null +++ b/silero-vad/README.md @@ -0,0 +1,154 @@ + [![Mailing list : test](http://img.shields.io/badge/Email-gray.svg?style=for-the-badge&logo=gmail)](mailto:hello@silero.ai) [![Mailing list : test](http://img.shields.io/badge/Telegram-blue.svg?style=for-the-badge&logo=telegram)](https://t.me/joinchat/Bv9tjhpdXTI22OUgpOIIDg) [![License: CC BY-NC 4.0](https://img.shields.io/badge/License-GNU%20AGPL%203.0-lightgrey.svg?style=for-the-badge)](https://github.com/snakers4/silero-models/blob/master/LICENSE) + + [![Open on Torch Hub](https://img.shields.io/badge/Torch-Hub-red?logo=pytorch&style=for-the-badge)](https://pytorch.org/hub/snakers4_silero-models_stt/) [![Open on TF Hub](https://img.shields.io/badge/TF-Hub-yellow?logo=tensorflow&style=for-the-badge)](https://tfhub.dev/silero/collections/silero-stt/1) + +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/snakers4/silero-models/blob/master/examples.ipynb) + +![header)](https://user-images.githubusercontent.com/12515440/89997349-b3523080-dc94-11ea-9906-ca2e8bc50535.png) + +- [Silero VAD](#silero-vad) + - [Getting Started](#getting-started) + - [PyTorch](#pytorch) + - [ONNX](#onnx) + - [Metrics](#metrics) + - [Performance Metrics](#performance-metrics) + - [Quality Metrics](#quality-metrics) + - [Contact](#contact) + - [Get in Touch](#get-in-touch) + - [Commercial Inquiries](#commercial-inquiries) + + +# Silero VAD + +Silero VAD: pre-trained enterprise-grade Voice Activity and Number Detector. +Enterprise-grade Speech Products made refreshingly simple (all see our [STT](https://github.com/snakers4/silero-models)). + +Currently, there are hardly any high quality / modern / free / public voice activity detectors except for WebRTC Voice Activity Detector ([link](https://github.com/wiseman/py-webrtcvad)). + +Also in enterprise it is crucial to be able to anonymize large-scale spoken corpora (i.e. remove personal data). Typically personal data is considered to be private / sensitive if it contains (i) a name (ii) some private ID. Name recognition is highly subjective and would depend on location, but Voice Activity and Number detections are quite general tasks. + +**Key advantages:** + +- Modern, portable; +- Small memory footprint (?); +- Trained on huge spoken corpora and noise / sound libraries; +- Slower than WebRTC, but sufficiently fast for IOT / edge / mobile applications; + +**Typical use cases:** + +- Spoken corpora anonymization; +- Voice detection for IOT / edge / mobile use cases; +- Data cleaning and preparation, number and voice detection in general; + + +Key features / differences: + +## Getting Started + +All of the provided models are listed in the [models.yml](https://github.com/snakers4/silero-models/blob/master/models.yml) file. +Any meta-data and newer versions will be added there. + +Currently we provide the following checkpoints: + +| | PyTorch | ONNX | Quantization | Languages | Colab | +|-----------------|--------------------|--------------------|--------------|---------|-------| +| VAD v1 (vad_v1) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | `ru`, `en`, `de`, `es` | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/snakers4/silero-models/blob/master/examples.ipynb) | + + +### PyTorch + +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/snakers4/silero-models/blob/master/examples.ipynb) + +[![Open on Torch Hub](https://img.shields.io/badge/Torch-Hub-red?logo=pytorch&style=for-the-badge)](https://pytorch.org/hub/snakers4_silero-models_stt/) + +```python +import torch +import zipfile +import torchaudio +from glob import glob + +device = torch.device('cpu') # gpu also works, but our models are fast enough for CPU +model, decoder, utils = torch.hub.load(repo_or_dir='snakers4/silero-models', + model='silero_stt', + language='en', # also available 'de', 'es' + device=device) +(read_batch, split_into_batches, + read_audio, prepare_model_input) = utils # see function signature for details + +# download a single file, any format compatible with TorchAudio (soundfile backend) +torch.hub.download_url_to_file('https://opus-codec.org/static/examples/samples/speech_orig.wav', + dst ='speech_orig.wav', progress=True) +test_files = glob('speech_orig.wav') +batches = split_into_batches(test_files, batch_size=10) +input = prepare_model_input(read_batch(batches[0]), + device=device) + +output = model(input) +for example in output: + print(decoder(example.cpu())) +``` + +### ONNX + +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/snakers4/silero-models/blob/master/examples.ipynb) + +You can run our model everywhere, where you can import the ONNX model or run ONNX runtime. + +```python +import onnx +import torch +import onnxruntime +from omegaconf import OmegaConf + +language = 'en' # also available 'de', 'es' + +# load provided utils +_, decoder, utils = torch.hub.load(repo_or_dir='snakers4/silero-models', model='silero_stt', language=language) +(read_batch, split_into_batches, + read_audio, prepare_model_input) = utils + +# see available models +torch.hub.download_url_to_file('https://raw.githubusercontent.com/snakers4/silero-models/master/models.yml', 'models.yml') +models = OmegaConf.load('models.yml') +available_languages = list(models.stt_models.keys()) +assert language in available_languages + +# load the actual ONNX model +torch.hub.download_url_to_file(models.stt_models.en.latest.onnx, 'model.onnx', progress=True) +onnx_model = onnx.load('model.onnx') +onnx.checker.check_model(onnx_model) +ort_session = onnxruntime.InferenceSession('model.onnx') + +# download a single file, any format compatible with TorchAudio (soundfile backend) +torch.hub.download_url_to_file('https://opus-codec.org/static/examples/samples/speech_orig.wav', dst ='speech_orig.wav', progress=True) +test_files = ['speech_orig.wav'] +batches = split_into_batches(test_files, batch_size=10) +input = prepare_model_input(read_batch(batches[0])) + +# actual onnx inference and decoding +onnx_input = input.detach().cpu().numpy() +ort_inputs = {'input': onnx_input} +ort_outs = ort_session.run(None, ort_inputs) +decoded = decoder(torch.Tensor(ort_outs[0])[0]) +print(decoded) +``` + +## Metrics + +### Performance Metrics + +Speed metrics here. + +### Quality Metrics + +Quality metrics here. + +## Contact + +### Get in Touch + +Try our models, create an [issue](https://github.com/snakers4/silero-models/issues/new), join our [chat](https://t.me/joinchat/Bv9tjhpdXTI22OUgpOIIDg), [email](mailto:hello@silero.ai) us. + +### Commercial Inquiries + +Please see our [wiki](https://github.com/snakers4/silero-models/wiki) and [tiers](https://github.com/snakers4/silero-models/wiki/Licensing-and-Tiers) for relevant information and [email](mailto:hello@silero.ai) us. diff --git a/silero-vad/files/silero_logo.jpg b/silero-vad/files/silero_logo.jpg new file mode 100644 index 0000000..0ced194 Binary files /dev/null and b/silero-vad/files/silero_logo.jpg differ diff --git a/silero-vad/hubconf.py b/silero-vad/hubconf.py new file mode 100644 index 0000000..6f157cd --- /dev/null +++ b/silero-vad/hubconf.py @@ -0,0 +1,28 @@ +dependencies = ['torch', 'omegaconf', 'torchaudio'] +import torch +from omegaconf import OmegaConf +from utils import (init_jit_model, + read_audio, + read_batch, + split_into_batches, + prepare_model_input) + + +def silero_stt(**kwargs): + """Silero Voice Activity and Number Detector Models + Returns a model and a set of utils + Please see https://github.com/snakers4/silero-vad for usage examples + """ + torch.hub.download_url_to_file('https://raw.githubusercontent.com/snakers4/silero-vad/master/models.yml', + 'silero_vad_models.yml', + progress=False) + models = OmegaConf.load('silero_vad_models.yml') + + model = init_jit_model(model_url=models.latest.jit, + **kwargs) + utils = (read_batch, + split_into_batches, + read_audio, + prepare_model_input) + + return model, utils diff --git a/silero-vad/models.yml b/silero-vad/models.yml new file mode 100644 index 0000000..899c16a --- /dev/null +++ b/silero-vad/models.yml @@ -0,0 +1,14 @@ +# Pre-trained Voice Activity Detector and Number Detector +stt_models: + latest: + meta: + name: "vad_v1" + languages: ['ru', 'en', 'de', 'es'] + samples: + en: "" + de: "" + es: "" + ru: "" + jit: "https://silero-models.ams3.cdn.digitaloceanspaces.com/models/vad/vad_v1_jit.model" + jit_q: "https://silero-models.ams3.cdn.digitaloceanspaces.com/models/vad/vad_v1_jit_q.model" + onnx: "https://silero-models.ams3.cdn.digitaloceanspaces.com/models/vad/vad_v1.onnx" diff --git a/silero-vad/utils.py b/silero-vad/utils.py new file mode 100644 index 0000000..23019df --- /dev/null +++ b/silero-vad/utils.py @@ -0,0 +1,60 @@ +import torch +import tempfile +import torchaudio +from typing import List + +torchaudio.set_audio_backend("soundfile") # switch backend + + +def read_batch(audio_paths: List[str]): + return [read_audio(audio_path) + for audio_path + in audio_paths] + + +def split_into_batches(lst: List[str], + batch_size: int = 10): + return [lst[i:i + batch_size] + for i in + range(0, len(lst), batch_size)] + + +def read_audio(path: str, + target_sr: int = 16000): + + assert torchaudio.get_audio_backend() == 'soundfile' + wav, sr = torchaudio.load(path) + + if wav.size(0) > 1: + wav = wav.mean(dim=0, keepdim=True) + + if sr != target_sr: + transform = torchaudio.transforms.Resample(orig_freq=sr, + new_freq=target_sr) + wav = transform(wav) + sr = target_sr + + assert sr == target_sr + return wav.squeeze(0) + + +def prepare_model_input(batch: List[torch.Tensor], + device=torch.device('cpu')): + max_seqlength = max(max([len(_) for _ in batch]), 12800) + inputs = torch.zeros(len(batch), max_seqlength) + for i, wav in enumerate(batch): + inputs[i, :len(wav)].copy_(wav) + inputs = inputs.to(device) + return inputs + + +def init_jit_model(model_url: str, + device: torch.device = torch.device('cpu')): + torch.set_grad_enabled(False) + with tempfile.NamedTemporaryFile('wb', suffix='.model') as f: + torch.hub.download_url_to_file(model_url, + f.name, + progress=True) + model = torch.jit.load(f.name, map_location=device) + model.eval() + return model