mirror of
https://github.com/shivammehta25/Matcha-TTS.git
synced 2026-02-04 09:49:21 +08:00
add a cleaner for the mmconv data
Different versions of espeak represent things differently, it seems (also, there are some distinctions none of our speakers make, so normalising those away reduces perplexity a tiny amount).
This commit is contained in:
@@ -105,6 +105,20 @@ def english_cleaners2(text):
|
||||
return phonemes
|
||||
|
||||
|
||||
def mmconv_ipa_simplify(text):
|
||||
replacements = [
|
||||
("ɐ", "ə"),
|
||||
("ˈə", "ə"),
|
||||
("ʤ", "dʒ"),
|
||||
("ʧ", "tʃ"),
|
||||
("ᵻ", "ɪ"),
|
||||
]
|
||||
for replacement in replacements:
|
||||
text = text.replace(replacement[0], replacement[1])
|
||||
phonemes = collapse_whitespace(text)
|
||||
return phonemes
|
||||
|
||||
|
||||
# I am removing this due to incompatibility with several version of python
|
||||
# However, if you want to use it, you can uncomment it
|
||||
# and install piper-phonemize with the following command:
|
||||
|
||||
Reference in New Issue
Block a user