diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 03bd2b7..e695f11 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ default_language_version: repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: # list of supported hooks: https://pre-commit.com/hooks.html - id: trailing-whitespace @@ -18,28 +18,28 @@ repos: # python code formatting - repo: https://github.com/psf/black - rev: 23.1.0 + rev: 23.12.1 hooks: - id: black args: [--line-length, "120"] # python import sorting - repo: https://github.com/PyCQA/isort - rev: 5.12.0 + rev: 5.13.2 hooks: - id: isort args: ["--profile", "black", "--filter-files"] # python upgrading syntax to newer version - repo: https://github.com/asottile/pyupgrade - rev: v3.3.1 + rev: v3.15.0 hooks: - id: pyupgrade args: [--py38-plus] # python check (PEP8), programming errors and code complexity - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 + rev: 7.0.0 hooks: - id: flake8 args: @@ -54,6 +54,6 @@ repos: # pylint - repo: https://github.com/pycqa/pylint - rev: v2.8.2 + rev: v3.0.3 hooks: - id: pylint diff --git a/.pylintrc b/.pylintrc index 11aeb8d..9628641 100644 --- a/.pylintrc +++ b/.pylintrc @@ -82,16 +82,6 @@ disable=missing-docstring, no-name-in-module, no-member, unsubscriptable-object, - print-statement, - parameter-unpacking, - unpacking-in-except, - old-raise-syntax, - backtick, - long-suffix, - old-ne-operator, - old-octal-literal, - import-star-module-level, - non-ascii-bytes-literal, raw-checker-failed, bad-inline-option, locally-disabled, @@ -106,67 +96,6 @@ disable=missing-docstring, too-many-arguments, too-many-locals, too-many-statements, - apply-builtin, - basestring-builtin, - buffer-builtin, - cmp-builtin, - coerce-builtin, - execfile-builtin, - file-builtin, - long-builtin, - raw_input-builtin, - reduce-builtin, - standarderror-builtin, - unicode-builtin, - xrange-builtin, - coerce-method, - delslice-method, - getslice-method, - setslice-method, - no-absolute-import, - old-division, - dict-iter-method, - dict-view-method, - next-method-called, - metaclass-assignment, - indexing-exception, - raising-string, - reload-builtin, - oct-method, - hex-method, - nonzero-method, - cmp-method, - input-builtin, - round-builtin, - intern-builtin, - unichr-builtin, - map-builtin-not-iterating, - zip-builtin-not-iterating, - range-builtin-not-iterating, - filter-builtin-not-iterating, - using-cmp-argument, - eq-without-hash, - div-method, - idiv-method, - rdiv-method, - exception-message-attribute, - invalid-str-codec, - sys-max-int, - bad-python3-import, - deprecated-string-function, - deprecated-str-translate-call, - deprecated-itertools-function, - deprecated-types-field, - next-method-defined, - dict-items-not-iterating, - dict-keys-not-iterating, - dict-values-not-iterating, - deprecated-operator-function, - deprecated-urllib-function, - xreadlines-attribute, - deprecated-sys-function, - exception-escape, - comprehension-escape, duplicate-code, not-callable, import-outside-toplevel, @@ -363,13 +292,6 @@ max-line-length=120 # Maximum number of lines in a module. max-module-lines=1000 -# List of optional constructs for which whitespace checking is disabled. `dict- -# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. -# `trailing-comma` allows a space between comma and closing bracket: (a, ). -# `empty-line` allows space-only lines. -no-space-check=trailing-comma, - dict-separator - # Allow the body of a class to be on the same line as the declaration if body # contains single statement. single-line-class-stmt=no @@ -599,5 +521,5 @@ min-public-methods=2 # Exceptions that will emit a warning when being caught. Defaults to # "BaseException, Exception". -overgeneral-exceptions=BaseException, - Exception +overgeneral-exceptions=builtins.BaseException, + builtins.Exception diff --git a/README.md b/README.md index 2534da0..ebc6b7c 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ -> This is the official code implementation of 🍵 Matcha-TTS. +> This is the official code implementation of 🍵 Matcha-TTS [ICASSP 2024]. We propose 🍵 Matcha-TTS, a new approach to non-autoregressive neural TTS, that uses [conditional flow matching](https://arxiv.org/abs/2210.02747) (similar to [rectified flows](https://arxiv.org/abs/2209.03003)) to speed up ODE-based speech synthesis. Our method: @@ -26,13 +26,13 @@ We propose 🍵 Matcha-TTS, a new approach to non-autoregressive neural TTS, tha - Sounds highly natural - Is very fast to synthesise from -Check out our [demo page](https://shivammehta25.github.io/Matcha-TTS) and read [our arXiv preprint](https://arxiv.org/abs/2309.03199) for more details. +Check out our [demo page](https://shivammehta25.github.io/Matcha-TTS) and read [our ICASSP 2024 paper](https://arxiv.org/abs/2309.03199) for more details. [Pre-trained models](https://drive.google.com/drive/folders/17C_gYgEHOxI5ZypcfE_k1piKCtyR0isJ?usp=sharing) will be automatically downloaded with the CLI or gradio interface. -[Try 🍵 Matcha-TTS on HuggingFace 🤗 spaces!](https://huggingface.co/spaces/shivammehta25/Matcha-TTS) +You can also [try 🍵 Matcha-TTS in your browser on HuggingFace 🤗 spaces](https://huggingface.co/spaces/shivammehta25/Matcha-TTS). -## Watch the teaser +## Teaser video [![Watch the video](https://img.youtube.com/vi/xmvJkz3bqw0/hqdefault.jpg)](https://youtu.be/xmvJkz3bqw0) @@ -257,11 +257,11 @@ This will write `.wav` audio files to the output directory. If you use our code or otherwise find this work useful, please cite our paper: ```text -@article{mehta2023matcha, - title={Matcha-TTS: A fast TTS architecture with conditional flow matching}, +@inproceedings{mehta2024matcha, + title={Matcha-{TTS}: A fast {TTS} architecture with conditional flow matching}, author={Mehta, Shivam and Tu, Ruibo and Beskow, Jonas and Sz{\'e}kely, {\'E}va and Henter, Gustav Eje}, - journal={arXiv preprint arXiv:2309.03199}, - year={2023} + booktitle={Proc. ICASSP}, + year={2024} } ``` @@ -269,7 +269,7 @@ If you use our code or otherwise find this work useful, please cite our paper: Since this code uses [Lightning-Hydra-Template](https://github.com/ashleve/lightning-hydra-template), you have all the powers that come with it. -Other source code I would like to acknowledge: +Other source code we would like to acknowledge: - [Coqui-TTS](https://github.com/coqui-ai/TTS/tree/dev): For helping me figure out how to make cython binaries pip installable and encouragement - [Hugging Face Diffusers](https://huggingface.co/): For their awesome diffusers library and its components diff --git a/matcha/VERSION b/matcha/VERSION index 81340c7..442b113 100644 --- a/matcha/VERSION +++ b/matcha/VERSION @@ -1 +1 @@ -0.0.4 +0.0.5.1 diff --git a/matcha/app.py b/matcha/app.py index 16e8077..d68fbaa 100644 --- a/matcha/app.py +++ b/matcha/app.py @@ -29,8 +29,15 @@ args = Namespace( CURRENTLY_LOADED_MODEL = args.model -MATCHA_TTS_LOC = lambda x: LOCATION / f"{x}.ckpt" # noqa: E731 -VOCODER_LOC = lambda x: LOCATION / f"{x}" # noqa: E731 + +def MATCHA_TTS_LOC(x): + return LOCATION / f"{x}.ckpt" + + +def VOCODER_LOC(x): + return LOCATION / f"{x}" + + LOGO_URL = "https://shivammehta25.github.io/Matcha-TTS/images/logo.png" RADIO_OPTIONS = { "Multi Speaker (VCTK)": { diff --git a/matcha/cli.py b/matcha/cli.py index 9e3f7fb..579d7d6 100644 --- a/matcha/cli.py +++ b/matcha/cli.py @@ -18,13 +18,13 @@ from matcha.text import sequence_to_text, text_to_sequence from matcha.utils.utils import assert_model_downloaded, get_user_data_dir, intersperse MATCHA_URLS = { - "matcha_ljspeech": "https://drive.google.com/file/d/1BBzmMU7k3a_WetDfaFblMoN18GqQeHCg/view?usp=drive_link", - "matcha_vctk": "https://drive.google.com/file/d/1enuxmfslZciWGAl63WGh2ekVo00FYuQ9/view?usp=drive_link", + "matcha_ljspeech": "https://github.com/shivammehta25/Matcha-TTS-checkpoints/releases/download/v1.0/matcha_ljspeech.ckpt", + "matcha_vctk": "https://github.com/shivammehta25/Matcha-TTS-checkpoints/releases/download/v1.0/matcha_vctk.ckpt", } VOCODER_URLS = { - "hifigan_T2_v1": "https://drive.google.com/file/d/14NENd4equCBLyyCSke114Mv6YR_j_uFs/view?usp=drive_link", - "hifigan_univ_v1": "https://drive.google.com/file/d/1qpgI41wNXFcH-iKq1Y42JlBC9j0je8PW/view?usp=drive_link", + "hifigan_T2_v1": "https://github.com/shivammehta25/Matcha-TTS-checkpoints/releases/download/v1.0/generator_v1", # Old url: https://drive.google.com/file/d/14NENd4equCBLyyCSke114Mv6YR_j_uFs/view?usp=drive_link + "hifigan_univ_v1": "https://github.com/shivammehta25/Matcha-TTS-checkpoints/releases/download/v1.0/g_02500000", # Old url: https://drive.google.com/file/d/1qpgI41wNXFcH-iKq1Y42JlBC9j0je8PW/view?usp=drive_link } MULTISPEAKER_MODEL = { @@ -63,7 +63,7 @@ def get_texts(args): if args.text: texts = [args.text] else: - with open(args.file) as f: + with open(args.file, encoding="utf-8") as f: texts = f.readlines() return texts @@ -140,7 +140,7 @@ def validate_args(args): if args.checkpoint_path is None: # When using pretrained models - if args.model in SINGLESPEAKER_MODEL.keys(): + if args.model in SINGLESPEAKER_MODEL: args = validate_args_for_single_speaker_model(args) if args.model in MULTISPEAKER_MODEL: diff --git a/matcha/utils/utils.py b/matcha/utils/utils.py index 5f8162d..af65e09 100644 --- a/matcha/utils/utils.py +++ b/matcha/utils/utils.py @@ -115,7 +115,7 @@ def get_metric_value(metric_dict: Dict[str, Any], metric_name: str) -> float: return None if metric_name not in metric_dict: - raise Exception( + raise ValueError( f"Metric value not found! \n" "Make sure metric name logged in LightningModule is correct!\n" "Make sure `optimized_metric` name in `hparams_search` config is correct!" @@ -205,11 +205,13 @@ def get_user_data_dir(appname="matcha_tts"): return final_path -def assert_model_downloaded(checkpoint_path, url, use_wget=False): +def assert_model_downloaded(checkpoint_path, url, use_wget=True): if Path(checkpoint_path).exists(): log.debug(f"[+] Model already present at {checkpoint_path}!") + print(f"[+] Model already present at {checkpoint_path}!") return log.info(f"[-] Model not found at {checkpoint_path}! Will download it") + print(f"[-] Model not found at {checkpoint_path}! Will download it") checkpoint_path = str(checkpoint_path) if not use_wget: gdown.download(url=url, output=checkpoint_path, quiet=False, fuzzy=True) diff --git a/requirements.txt b/requirements.txt index f657dc1..0a7e14c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,7 +35,7 @@ torchaudio matplotlib pandas conformer==0.3.2 -diffusers==0.21.3 +diffusers==0.25.0 notebook ipywidgets gradio