Merge pull request #451 from zhuzizyf/bug-fix-split_paragraph

Bug fix for split_paragraph
This commit is contained in:
Xiang Lyu
2024-09-29 14:51:18 +08:00
committed by GitHub

View File

@@ -80,6 +80,13 @@ def split_paragraph(text: str, tokenize, lang="zh", token_max_n=80, token_min_n=
pounc = ['.', '?', '!', ';', ':']
if comma_split:
pounc.extend(['', ','])
if text[-1] not in pounc:
if lang == "zh":
text += ""
else:
text += "."
st = 0
utts = []
for i, c in enumerate(text):
@@ -92,11 +99,7 @@ def split_paragraph(text: str, tokenize, lang="zh", token_max_n=80, token_min_n=
st = i + 2
else:
st = i + 1
if len(utts) == 0:
if lang == "zh":
utts.append(text + '')
else:
utts.append(text + '.')
final_utts = []
cur_utt = ""
for utt in utts: