diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 03bd2b7..7cda633 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # python code formatting - repo: https://github.com/psf/black - rev: 23.1.0 + rev: 23.9.1 hooks: - id: black args: [--line-length, "120"] @@ -32,14 +32,14 @@ repos: # python upgrading syntax to newer version - repo: https://github.com/asottile/pyupgrade - rev: v3.3.1 + rev: v3.14.0 hooks: - id: pyupgrade args: [--py38-plus] # python check (PEP8), programming errors and code complexity - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 + rev: 6.1.0 hooks: - id: flake8 args: @@ -54,6 +54,6 @@ repos: # pylint - repo: https://github.com/pycqa/pylint - rev: v2.8.2 + rev: v3.0.0 hooks: - id: pylint diff --git a/.pylintrc b/.pylintrc index 11aeb8d..7ab186a 100644 --- a/.pylintrc +++ b/.pylintrc @@ -82,16 +82,6 @@ disable=missing-docstring, no-name-in-module, no-member, unsubscriptable-object, - print-statement, - parameter-unpacking, - unpacking-in-except, - old-raise-syntax, - backtick, - long-suffix, - old-ne-operator, - old-octal-literal, - import-star-module-level, - non-ascii-bytes-literal, raw-checker-failed, bad-inline-option, locally-disabled, @@ -106,67 +96,6 @@ disable=missing-docstring, too-many-arguments, too-many-locals, too-many-statements, - apply-builtin, - basestring-builtin, - buffer-builtin, - cmp-builtin, - coerce-builtin, - execfile-builtin, - file-builtin, - long-builtin, - raw_input-builtin, - reduce-builtin, - standarderror-builtin, - unicode-builtin, - xrange-builtin, - coerce-method, - delslice-method, - getslice-method, - setslice-method, - no-absolute-import, - old-division, - dict-iter-method, - dict-view-method, - next-method-called, - metaclass-assignment, - indexing-exception, - raising-string, - reload-builtin, - oct-method, - hex-method, - nonzero-method, - cmp-method, - input-builtin, - round-builtin, - intern-builtin, - unichr-builtin, - map-builtin-not-iterating, - zip-builtin-not-iterating, - range-builtin-not-iterating, - filter-builtin-not-iterating, - using-cmp-argument, - eq-without-hash, - div-method, - idiv-method, - rdiv-method, - exception-message-attribute, - invalid-str-codec, - sys-max-int, - bad-python3-import, - deprecated-string-function, - deprecated-str-translate-call, - deprecated-itertools-function, - deprecated-types-field, - next-method-defined, - dict-items-not-iterating, - dict-keys-not-iterating, - dict-values-not-iterating, - deprecated-operator-function, - deprecated-urllib-function, - xreadlines-attribute, - deprecated-sys-function, - exception-escape, - comprehension-escape, duplicate-code, not-callable, import-outside-toplevel, diff --git a/README.md b/README.md index 2534da0..e33084d 100644 --- a/README.md +++ b/README.md @@ -26,13 +26,13 @@ We propose 🍵 Matcha-TTS, a new approach to non-autoregressive neural TTS, tha - Sounds highly natural - Is very fast to synthesise from -Check out our [demo page](https://shivammehta25.github.io/Matcha-TTS) and read [our arXiv preprint](https://arxiv.org/abs/2309.03199) for more details. +Check out our [demo page](https://shivammehta25.github.io/Matcha-TTS) and read [our ICASSP 2024 paper](https://arxiv.org/abs/2309.03199) for more details. [Pre-trained models](https://drive.google.com/drive/folders/17C_gYgEHOxI5ZypcfE_k1piKCtyR0isJ?usp=sharing) will be automatically downloaded with the CLI or gradio interface. -[Try 🍵 Matcha-TTS on HuggingFace 🤗 spaces!](https://huggingface.co/spaces/shivammehta25/Matcha-TTS) +You can also [try 🍵 Matcha-TTS in your browser on HuggingFace 🤗 spaces](https://huggingface.co/spaces/shivammehta25/Matcha-TTS). -## Watch the teaser +## Teaser video [![Watch the video](https://img.youtube.com/vi/xmvJkz3bqw0/hqdefault.jpg)](https://youtu.be/xmvJkz3bqw0) @@ -257,11 +257,11 @@ This will write `.wav` audio files to the output directory. If you use our code or otherwise find this work useful, please cite our paper: ```text -@article{mehta2023matcha, - title={Matcha-TTS: A fast TTS architecture with conditional flow matching}, +@inproceedings{mehta2024matcha, + title={Matcha-{TTS}: A fast {TTS} architecture with conditional flow matching}, author={Mehta, Shivam and Tu, Ruibo and Beskow, Jonas and Sz{\'e}kely, {\'E}va and Henter, Gustav Eje}, - journal={arXiv preprint arXiv:2309.03199}, - year={2023} + booktitle={Proc. ICASSP}, + year={2024} } ``` @@ -269,7 +269,7 @@ If you use our code or otherwise find this work useful, please cite our paper: Since this code uses [Lightning-Hydra-Template](https://github.com/ashleve/lightning-hydra-template), you have all the powers that come with it. -Other source code I would like to acknowledge: +Other source code we would like to acknowledge: - [Coqui-TTS](https://github.com/coqui-ai/TTS/tree/dev): For helping me figure out how to make cython binaries pip installable and encouragement - [Hugging Face Diffusers](https://huggingface.co/): For their awesome diffusers library and its components diff --git a/matcha/cli.py b/matcha/cli.py index 9e3f7fb..f3c29a7 100644 --- a/matcha/cli.py +++ b/matcha/cli.py @@ -18,8 +18,8 @@ from matcha.text import sequence_to_text, text_to_sequence from matcha.utils.utils import assert_model_downloaded, get_user_data_dir, intersperse MATCHA_URLS = { - "matcha_ljspeech": "https://drive.google.com/file/d/1BBzmMU7k3a_WetDfaFblMoN18GqQeHCg/view?usp=drive_link", - "matcha_vctk": "https://drive.google.com/file/d/1enuxmfslZciWGAl63WGh2ekVo00FYuQ9/view?usp=drive_link", + "matcha_ljspeech": "https://github.com/shivammehta25/Matcha-TTS-checkpoints/releases/download/v1.0/matcha_ljspeech.ckpt", + "matcha_vctk": "https://github.com/shivammehta25/Matcha-TTS-checkpoints/releases/download/v1.0/matcha_vctk.ckpt", } VOCODER_URLS = { @@ -63,7 +63,7 @@ def get_texts(args): if args.text: texts = [args.text] else: - with open(args.file) as f: + with open(args.file, encoding="utf-8") as f: texts = f.readlines() return texts @@ -140,7 +140,7 @@ def validate_args(args): if args.checkpoint_path is None: # When using pretrained models - if args.model in SINGLESPEAKER_MODEL.keys(): + if args.model in SINGLESPEAKER_MODEL: args = validate_args_for_single_speaker_model(args) if args.model in MULTISPEAKER_MODEL: