From 4bc541705ae182909ce165fc238e3a49a6f8e28d Mon Sep 17 00:00:00 2001 From: Jim O'Regan Date: Thu, 3 Oct 2024 17:18:58 +0000 Subject: [PATCH 1/2] add a cleaner for the mmconv data Different versions of espeak represent things differently, it seems (also, there are some distinctions none of our speakers make, so normalising those away reduces perplexity a tiny amount). --- matcha/text/cleaners.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/matcha/text/cleaners.py b/matcha/text/cleaners.py index 36776e3..386257b 100644 --- a/matcha/text/cleaners.py +++ b/matcha/text/cleaners.py @@ -105,6 +105,20 @@ def english_cleaners2(text): return phonemes +def mmconv_ipa_simplify(text): + replacements = [ + ("ɐ", "ə"), + ("ˈə", "ə"), + ("ʤ", "dʒ"), + ("ʧ", "tʃ"), + ("ᵻ", "ɪ"), + ] + for replacement in replacements: + text = text.replace(replacement[0], replacement[1]) + phonemes = collapse_whitespace(text) + return phonemes + + # I am removing this due to incompatibility with several version of python # However, if you want to use it, you can uncomment it # and install piper-phonemize with the following command: From 863bfbdd8b3dc3df20fd7e15f8f2f0353e61486a Mon Sep 17 00:00:00 2001 From: Jim O'Regan Date: Thu, 3 Oct 2024 18:51:47 +0000 Subject: [PATCH 2/2] rename method, it's more generic than the previous name suggested --- matcha/text/cleaners.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matcha/text/cleaners.py b/matcha/text/cleaners.py index 386257b..6b2dff4 100644 --- a/matcha/text/cleaners.py +++ b/matcha/text/cleaners.py @@ -105,7 +105,7 @@ def english_cleaners2(text): return phonemes -def mmconv_ipa_simplify(text): +def ipa_simplifier(text): replacements = [ ("ɐ", "ə"), ("ˈə", "ə"),