mirror of
https://github.com/shivammehta25/Matcha-TTS.git
synced 2026-02-04 09:49:21 +08:00
Adding versioning in a file
This commit is contained in:
@@ -3,6 +3,7 @@ include LICENSE.txt
|
|||||||
include requirements.*.txt
|
include requirements.*.txt
|
||||||
include *.cff
|
include *.cff
|
||||||
include requirements.txt
|
include requirements.txt
|
||||||
|
include matcha/VERSION
|
||||||
recursive-include matcha *.json
|
recursive-include matcha *.json
|
||||||
recursive-include matcha *.html
|
recursive-include matcha *.html
|
||||||
recursive-include matcha *.png
|
recursive-include matcha *.png
|
||||||
|
|||||||
24
README.md
24
README.md
@@ -28,6 +28,8 @@ We propose 🍵 Matcha-TTS, a new approach to non-autoregressive neural TTS, tha
|
|||||||
|
|
||||||
Check out our [demo page](https://shivammehta25.github.io/Matcha-TTS). Read our [arXiv preprint for more details](https://arxiv.org/abs/2309.03199).
|
Check out our [demo page](https://shivammehta25.github.io/Matcha-TTS). Read our [arXiv preprint for more details](https://arxiv.org/abs/2309.03199).
|
||||||
|
|
||||||
|
[Pretrained models](https://drive.google.com/drive/folders/17C_gYgEHOxI5ZypcfE_k1piKCtyR0isJ?usp=sharing) will be auto downloaded with the CLI or gradio interface.
|
||||||
|
|
||||||
<br>
|
<br>
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
@@ -35,8 +37,8 @@ Check out our [demo page](https://shivammehta25.github.io/Matcha-TTS). Read our
|
|||||||
1. Create an environment (suggested but optional)
|
1. Create an environment (suggested but optional)
|
||||||
|
|
||||||
```
|
```
|
||||||
conda create -n matcha_tts python=3.10 -y
|
conda create -n matcha-tts python=3.10 -y
|
||||||
conda activate matcha_tts
|
conda activate matcha-tts
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install Matcha TTS using pip from source
|
2. Install Matcha TTS using pip from source
|
||||||
@@ -50,13 +52,13 @@ pip install git+https://github.com/shivammehta25/Matcha-TTS.git
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# This will download the required models
|
# This will download the required models
|
||||||
match_tts --text "<INPUT TEXT>"
|
matcha-tts --text "<INPUT TEXT>"
|
||||||
```
|
```
|
||||||
|
|
||||||
or
|
or
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
matcha_tts_app
|
matcha-tts_app
|
||||||
```
|
```
|
||||||
|
|
||||||
or open `synthesis.ipynb` on jupyter notebook
|
or open `synthesis.ipynb` on jupyter notebook
|
||||||
@@ -66,19 +68,19 @@ or open `synthesis.ipynb` on jupyter notebook
|
|||||||
- To synthesise from given text, run:
|
- To synthesise from given text, run:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
match_tts --text "<INPUT TEXT>"
|
matcha-tts --text "<INPUT TEXT>"
|
||||||
```
|
```
|
||||||
|
|
||||||
- To synthesise from a file, run:
|
- To synthesise from a file, run:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
match_tts --file <PATH TO FILE>
|
matcha-tts --file <PATH TO FILE>
|
||||||
```
|
```
|
||||||
|
|
||||||
- To batch synthesise from a file, run:
|
- To batch synthesise from a file, run:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
match_tts --file <PATH TO FILE> --batched
|
matcha-tts --file <PATH TO FILE> --batched
|
||||||
```
|
```
|
||||||
|
|
||||||
Additional arguments
|
Additional arguments
|
||||||
@@ -86,19 +88,19 @@ Additional arguments
|
|||||||
- Speaking rate
|
- Speaking rate
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
match_tts --text "<INPUT TEXT>" --speaking_rate 1.0
|
matcha-tts --text "<INPUT TEXT>" --speaking_rate 1.0
|
||||||
```
|
```
|
||||||
|
|
||||||
- Sampling temperature
|
- Sampling temperature
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
match_tts --text "<INPUT TEXT>" --temperature 0.667
|
matcha-tts --text "<INPUT TEXT>" --temperature 0.667
|
||||||
```
|
```
|
||||||
|
|
||||||
- Euler ODE solver steps
|
- Euler ODE solver steps
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
match_tts --text "<INPUT TEXT>" --steps 10
|
matcha-tts --text "<INPUT TEXT>" --steps 10
|
||||||
```
|
```
|
||||||
|
|
||||||
## Citation information
|
## Citation information
|
||||||
@@ -185,7 +187,7 @@ python matcha/train.py experiment=ljspeech trainer.devices=[0,1]
|
|||||||
6. Synthesise from the custom trained model
|
6. Synthesise from the custom trained model
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
matcha_tts --text "<INPUT TEXT>" --checkpoint_path <PATH TO CHECKPOINT>
|
matcha-tts --text "<INPUT TEXT>" --checkpoint_path <PATH TO CHECKPOINT>
|
||||||
```
|
```
|
||||||
|
|
||||||
## Acknowledgements
|
## Acknowledgements
|
||||||
|
|||||||
1
matcha/VERSION
Normal file
1
matcha/VERSION
Normal file
@@ -0,0 +1 @@
|
|||||||
|
0.0.1.dev0
|
||||||
@@ -30,7 +30,7 @@ args = Namespace(
|
|||||||
MATCHA_TTS_LOC = LOCATION / f"{args.model}.ckpt"
|
MATCHA_TTS_LOC = LOCATION / f"{args.model}.ckpt"
|
||||||
VOCODER_LOC = LOCATION / f"{args.vocoder}"
|
VOCODER_LOC = LOCATION / f"{args.vocoder}"
|
||||||
LOGO_URL = "https://shivammehta25.github.io/Matcha-TTS/images/logo.png"
|
LOGO_URL = "https://shivammehta25.github.io/Matcha-TTS/images/logo.png"
|
||||||
assert_model_downloaded(MATCHA_TTS_LOC, MATCHA_URLS[args.model], use_wget=True)
|
assert_model_downloaded(MATCHA_TTS_LOC, MATCHA_URLS[args.model])
|
||||||
assert_model_downloaded(VOCODER_LOC, VOCODER_URL[args.vocoder])
|
assert_model_downloaded(VOCODER_LOC, VOCODER_URL[args.vocoder])
|
||||||
device = get_device(args)
|
device = get_device(args)
|
||||||
|
|
||||||
|
|||||||
@@ -16,14 +16,14 @@ from matcha.models.matcha_tts import MatchaTTS
|
|||||||
from matcha.text import sequence_to_text, text_to_sequence
|
from matcha.text import sequence_to_text, text_to_sequence
|
||||||
from matcha.utils.utils import assert_model_downloaded, get_user_data_dir, intersperse
|
from matcha.utils.utils import assert_model_downloaded, get_user_data_dir, intersperse
|
||||||
|
|
||||||
MATCHA_URLS = {"matcha_ljspeech": ""} # , "matcha_vctk": ""} # Coming soon
|
MATCHA_URLS = {
|
||||||
|
"matcha_ljspeech": "https://drive.google.com/file/d/1BBzmMU7k3a_WetDfaFblMoN18GqQeHCg/view?usp=drive_link"
|
||||||
|
} # , "matcha_vctk": ""} # Coming soon
|
||||||
|
|
||||||
MULTISPEAKER_MODEL = {"matcha_vctk"}
|
MULTISPEAKER_MODEL = {"matcha_vctk"}
|
||||||
SINGLESPEAKER_MODEL = {"matcha_ljspeech"}
|
SINGLESPEAKER_MODEL = {"matcha_ljspeech"}
|
||||||
|
|
||||||
VOCODER_URL = {
|
VOCODER_URL = {"hifigan_T2_v1": "https://drive.google.com/file/d/14NENd4equCBLyyCSke114Mv6YR_j_uFs/view?usp=drive_link"}
|
||||||
"hifigan_T2_v1": "https://drive.google.com/file/d/14NENd4equCBLyyCSke114Mv6YR_j_uFs/view?usp=drive_link",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def plot_spectrogram_to_numpy(spectrogram, filename):
|
def plot_spectrogram_to_numpy(spectrogram, filename):
|
||||||
@@ -64,7 +64,7 @@ def assert_required_models_available(args):
|
|||||||
save_dir = get_user_data_dir()
|
save_dir = get_user_data_dir()
|
||||||
model_path = save_dir / f"{args.model}.ckpt"
|
model_path = save_dir / f"{args.model}.ckpt"
|
||||||
vocoder_path = save_dir / f"{args.vocoder}"
|
vocoder_path = save_dir / f"{args.vocoder}"
|
||||||
assert_model_downloaded(model_path, MATCHA_URLS[args.model], use_wget=True)
|
assert_model_downloaded(model_path, MATCHA_URLS[args.model])
|
||||||
assert_model_downloaded(vocoder_path, VOCODER_URL[args.vocoder])
|
assert_model_downloaded(vocoder_path, VOCODER_URL[args.vocoder])
|
||||||
return {"matcha": model_path, "vocoder": vocoder_path}
|
return {"matcha": model_path, "vocoder": vocoder_path}
|
||||||
|
|
||||||
|
|||||||
5
setup.py
5
setup.py
@@ -15,10 +15,13 @@ exts = [
|
|||||||
with open("README.md", encoding="utf-8") as readme_file:
|
with open("README.md", encoding="utf-8") as readme_file:
|
||||||
README = readme_file.read()
|
README = readme_file.read()
|
||||||
|
|
||||||
|
cwd = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
with open(os.path.join(cwd, "matcha", "VERSION")) as fin:
|
||||||
|
version = fin.read().strip()
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="matcha-tts",
|
name="matcha-tts",
|
||||||
version="0.0.0.1.dev0",
|
version=version,
|
||||||
description="🍵 Matcha-TTS: A fast TTS architecture with conditional flow matching",
|
description="🍵 Matcha-TTS: A fast TTS architecture with conditional flow matching",
|
||||||
long_description=README,
|
long_description=README,
|
||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
|
|||||||
Reference in New Issue
Block a user