Adding piper phonemizer with different dataset

This commit is contained in:
Shivam Mehta
2023-12-01 12:06:26 +00:00
parent df896301ca
commit 263d5c4d4e
4 changed files with 39 additions and 0 deletions

View File

@@ -15,6 +15,7 @@ import logging
import re
import phonemizer
import piper_phonemize
from unidecode import unidecode
# To avoid excessive logging we set the log level of the phonemizer package to Critical
@@ -103,3 +104,13 @@ def english_cleaners2(text):
phonemes = global_phonemizer.phonemize([text], strip=True, njobs=1)[0]
phonemes = collapse_whitespace(phonemes)
return phonemes
def english_cleaners_piper(text):
"""Pipeline for English text, including abbreviation expansion. + punctuation + stress"""
text = convert_to_ascii(text)
text = lowercase(text)
text = expand_abbreviations(text)
phonemes = "".join(piper_phonemize.phonemize_espeak(text=text, voice="en-US")[0])
phonemes = collapse_whitespace(phonemes)
return phonemes