mirror of
https://github.com/HumanAIGC/lite-avatar.git
synced 2026-02-05 18:09:20 +08:00
add files
This commit is contained in:
77
funasr_local/text/korean_cleaner.py
Normal file
77
funasr_local/text/korean_cleaner.py
Normal file
@@ -0,0 +1,77 @@
|
||||
# Referenced from https://github.com/hccho2/Tacotron-Wavenet-Vocoder-Korean
|
||||
|
||||
import re
|
||||
|
||||
|
||||
class KoreanCleaner:
|
||||
@classmethod
|
||||
def _normalize_numbers(cls, text):
|
||||
number_to_kor = {
|
||||
"0": "영",
|
||||
"1": "일",
|
||||
"2": "이",
|
||||
"3": "삼",
|
||||
"4": "사",
|
||||
"5": "오",
|
||||
"6": "육",
|
||||
"7": "칠",
|
||||
"8": "팔",
|
||||
"9": "구",
|
||||
}
|
||||
new_text = "".join(
|
||||
number_to_kor[char] if char in number_to_kor.keys() else char
|
||||
for char in text
|
||||
)
|
||||
return new_text
|
||||
|
||||
@classmethod
|
||||
def _normalize_english_text(cls, text):
|
||||
upper_alphabet_to_kor = {
|
||||
"A": "에이",
|
||||
"B": "비",
|
||||
"C": "씨",
|
||||
"D": "디",
|
||||
"E": "이",
|
||||
"F": "에프",
|
||||
"G": "지",
|
||||
"H": "에이치",
|
||||
"I": "아이",
|
||||
"J": "제이",
|
||||
"K": "케이",
|
||||
"L": "엘",
|
||||
"M": "엠",
|
||||
"N": "엔",
|
||||
"O": "오",
|
||||
"P": "피",
|
||||
"Q": "큐",
|
||||
"R": "알",
|
||||
"S": "에스",
|
||||
"T": "티",
|
||||
"U": "유",
|
||||
"V": "브이",
|
||||
"W": "더블유",
|
||||
"X": "엑스",
|
||||
"Y": "와이",
|
||||
"Z": "지",
|
||||
}
|
||||
new_text = re.sub("[a-z]+", lambda x: str.upper(x.group()), text)
|
||||
new_text = "".join(
|
||||
upper_alphabet_to_kor[char]
|
||||
if char in upper_alphabet_to_kor.keys()
|
||||
else char
|
||||
for char in new_text
|
||||
)
|
||||
|
||||
return new_text
|
||||
|
||||
@classmethod
|
||||
def normalize_text(cls, text):
|
||||
# stage 0 : text strip
|
||||
text = text.strip()
|
||||
|
||||
# stage 1 : normalize numbers
|
||||
text = cls._normalize_numbers(text)
|
||||
|
||||
# stage 2 : normalize english text
|
||||
text = cls._normalize_english_text(text)
|
||||
return text
|
||||
Reference in New Issue
Block a user