From 9c0aa1918bef41e4a094af16664ab522b2e6e617 Mon Sep 17 00:00:00 2001 From: zhuyunfeng <42790740+zhuzizyf@users.noreply.github.com> Date: Sun, 29 Sep 2024 14:19:31 +0800 Subject: [PATCH 1/3] Update frontend_utils.py "Fix the bug in `split_paragraph` where the last sentence of synthesized text with multiple paragraphs loses punctuation, causing it to be lost." --- cosyvoice/utils/frontend_utils.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/cosyvoice/utils/frontend_utils.py b/cosyvoice/utils/frontend_utils.py index 7c6e19e..215cbe4 100644 --- a/cosyvoice/utils/frontend_utils.py +++ b/cosyvoice/utils/frontend_utils.py @@ -80,6 +80,13 @@ def split_paragraph(text: str, tokenize, lang="zh", token_max_n=80, token_min_n= pounc = ['.', '?', '!', ';', ':'] if comma_split: pounc.extend([',', ',']) + + if text[-1] not in pounc: + if lang == "zh": + text += "。" + else: + text += "." + st = 0 utts = [] for i, c in enumerate(text): @@ -92,11 +99,7 @@ def split_paragraph(text: str, tokenize, lang="zh", token_max_n=80, token_min_n= st = i + 2 else: st = i + 1 - if len(utts) == 0: - if lang == "zh": - utts.append(text + '。') - else: - utts.append(text + '.') + final_utts = [] cur_utt = "" for utt in utts: From 74a449ad1f2288595c0a37e2b345e6653002873f Mon Sep 17 00:00:00 2001 From: zhuyunfeng <42790740+zhuzizyf@users.noreply.github.com> Date: Sun, 29 Sep 2024 14:26:33 +0800 Subject: [PATCH 2/3] Update frontend_utils.py Fix typo --- cosyvoice/utils/frontend_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cosyvoice/utils/frontend_utils.py b/cosyvoice/utils/frontend_utils.py index 215cbe4..5514193 100644 --- a/cosyvoice/utils/frontend_utils.py +++ b/cosyvoice/utils/frontend_utils.py @@ -86,7 +86,7 @@ def split_paragraph(text: str, tokenize, lang="zh", token_max_n=80, token_min_n= text += "。" else: text += "." - + st = 0 utts = [] for i, c in enumerate(text): From 0b76dfa1eb8e4b7768180359ec7043024ba7cef4 Mon Sep 17 00:00:00 2001 From: zhuyunfeng <42790740+zhuzizyf@users.noreply.github.com> Date: Sun, 29 Sep 2024 14:41:33 +0800 Subject: [PATCH 3/3] Update frontend_utils.py Fix typo --- cosyvoice/utils/frontend_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cosyvoice/utils/frontend_utils.py b/cosyvoice/utils/frontend_utils.py index 5514193..ab01a1f 100644 --- a/cosyvoice/utils/frontend_utils.py +++ b/cosyvoice/utils/frontend_utils.py @@ -86,7 +86,7 @@ def split_paragraph(text: str, tokenize, lang="zh", token_max_n=80, token_min_n= text += "。" else: text += "." - + st = 0 utts = [] for i, c in enumerate(text):