From 9c0aa1918bef41e4a094af16664ab522b2e6e617 Mon Sep 17 00:00:00 2001 From: zhuyunfeng <42790740+zhuzizyf@users.noreply.github.com> Date: Sun, 29 Sep 2024 14:19:31 +0800 Subject: [PATCH] Update frontend_utils.py "Fix the bug in `split_paragraph` where the last sentence of synthesized text with multiple paragraphs loses punctuation, causing it to be lost." --- cosyvoice/utils/frontend_utils.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/cosyvoice/utils/frontend_utils.py b/cosyvoice/utils/frontend_utils.py index 7c6e19e..215cbe4 100644 --- a/cosyvoice/utils/frontend_utils.py +++ b/cosyvoice/utils/frontend_utils.py @@ -80,6 +80,13 @@ def split_paragraph(text: str, tokenize, lang="zh", token_max_n=80, token_min_n= pounc = ['.', '?', '!', ';', ':'] if comma_split: pounc.extend([',', ',']) + + if text[-1] not in pounc: + if lang == "zh": + text += "。" + else: + text += "." + st = 0 utts = [] for i, c in enumerate(text): @@ -92,11 +99,7 @@ def split_paragraph(text: str, tokenize, lang="zh", token_max_n=80, token_min_n= st = i + 2 else: st = i + 1 - if len(utts) == 0: - if lang == "zh": - utts.append(text + '。') - else: - utts.append(text + '.') + final_utts = [] cur_utt = "" for utt in utts: