diff --git a/configs/data/hi-fi_en-US_female.yaml b/configs/data/hi-fi_en-US_female.yaml new file mode 100644 index 0000000..1269f9b --- /dev/null +++ b/configs/data/hi-fi_en-US_female.yaml @@ -0,0 +1,14 @@ +defaults: + - ljspeech + - _self_ + +# Dataset URL: https://ast-astrec.nict.go.jp/en/release/hi-fi-captain/ +_target_: matcha.data.text_mel_datamodule.TextMelDataModule +name: hi-fi_en-US_female +train_filelist_path: data/filelists/hi-fi-captain-en-us-female_train.txt +valid_filelist_path: data/filelists/hi-fi-captain-en-us-female_val.txt +batch_size: 32 +cleaners: [english_cleaners_piper] +data_statistics: # Computed for this dataset + mel_mean: -6.38385 + mel_std: 2.541796 diff --git a/configs/experiment/hifi_dataset_piper_phonemizer.yaml b/configs/experiment/hifi_dataset_piper_phonemizer.yaml new file mode 100644 index 0000000..7e6c57a --- /dev/null +++ b/configs/experiment/hifi_dataset_piper_phonemizer.yaml @@ -0,0 +1,14 @@ +# @package _global_ + +# to execute this experiment run: +# python train.py experiment=multispeaker + +defaults: + - override /data: hi-fi_en-US_female.yaml + +# all parameters below will be merged with parameters from default configurations set above +# this allows you to overwrite only specified parameters + +tags: ["hi-fi", "single_speaker", "piper_phonemizer", "en_US", "female"] + +run_name: hi-fi_en-US_female_piper_phonemizer diff --git a/matcha/text/cleaners.py b/matcha/text/cleaners.py index 26b91d7..5e8d96b 100644 --- a/matcha/text/cleaners.py +++ b/matcha/text/cleaners.py @@ -15,6 +15,7 @@ import logging import re import phonemizer +import piper_phonemize from unidecode import unidecode # To avoid excessive logging we set the log level of the phonemizer package to Critical @@ -103,3 +104,13 @@ def english_cleaners2(text): phonemes = global_phonemizer.phonemize([text], strip=True, njobs=1)[0] phonemes = collapse_whitespace(phonemes) return phonemes + + +def english_cleaners_piper(text): + """Pipeline for English text, including abbreviation expansion. + punctuation + stress""" + text = convert_to_ascii(text) + text = lowercase(text) + text = expand_abbreviations(text) + phonemes = "".join(piper_phonemize.phonemize_espeak(text=text, voice="en-US")[0]) + phonemes = collapse_whitespace(phonemes) + return phonemes diff --git a/requirements.txt b/requirements.txt index c1be781..f657dc1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,3 +42,4 @@ gradio gdown wget seaborn +piper_phonemize