From 84015697c2cf854668b6550f71a048512d308a03 Mon Sep 17 00:00:00 2001 From: 0xCAFEBABE0 Date: Thu, 12 Dec 2024 16:49:39 +0800 Subject: [PATCH 1/6] fix(bug).when generating text that contains only punctuation marks or whitespace characters, the CPU usage reaches 100%, and the process crashes. --- cosyvoice/cli/model.py | 4 ++++ cosyvoice/utils/common.py | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/cosyvoice/cli/model.py b/cosyvoice/cli/model.py index de86a24..22affe4 100644 --- a/cosyvoice/cli/model.py +++ b/cosyvoice/cli/model.py @@ -19,6 +19,7 @@ from torch.nn import functional as F from contextlib import nullcontext import uuid from cosyvoice.utils.common import fade_in_out +from cosyvoice.utils.common import is_only_punctuation class CosyVoiceModel: @@ -145,6 +146,9 @@ class CosyVoiceModel: llm_prompt_speech_token=torch.zeros(1, 0, dtype=torch.int32), flow_prompt_speech_token=torch.zeros(1, 0, dtype=torch.int32), prompt_speech_feat=torch.zeros(1, 0, 80), stream=False, speed=1.0, **kwargs): + if is_only_punctuation(text): + logging.info('only punctuation, skip synthesis:{}'.format(text)) + return {'tts_speech': torch.zeros(1, int(0.01 * 22050))} #返回10ms空白音频,保证了一致的上下游处理逻辑 # this_uuid is used to track variables related to this inference thread this_uuid = str(uuid.uuid1()) with self.lock: diff --git a/cosyvoice/utils/common.py b/cosyvoice/utils/common.py index 25bc835..f926424 100644 --- a/cosyvoice/utils/common.py +++ b/cosyvoice/utils/common.py @@ -20,6 +20,7 @@ from typing import List import numpy as np import torch +import regex IGNORE_ID = -1 @@ -153,3 +154,9 @@ def set_all_random_seed(seed): np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) + + +def is_only_punctuation(text): + # Regular expression: Match strings that consist only of punctuation marks or are empty. + punctuation_pattern = r'^[\p{P}\p{S}]*$' + return bool(regex.fullmatch(punctuation_pattern, text)) From f56c2583e86040e2758bbbd57d9ca5757dfa8cf6 Mon Sep 17 00:00:00 2001 From: 0xCAFEBABE0 Date: Thu, 12 Dec 2024 16:53:30 +0800 Subject: [PATCH 2/6] fix(bug).when generating text that contains only punctuation marks or whitespace characters, the CPU usage reaches 100%, and the process crashes. --- cosyvoice/cli/model.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cosyvoice/cli/model.py b/cosyvoice/cli/model.py index 22affe4..d878fc1 100644 --- a/cosyvoice/cli/model.py +++ b/cosyvoice/cli/model.py @@ -147,7 +147,6 @@ class CosyVoiceModel: flow_prompt_speech_token=torch.zeros(1, 0, dtype=torch.int32), prompt_speech_feat=torch.zeros(1, 0, 80), stream=False, speed=1.0, **kwargs): if is_only_punctuation(text): - logging.info('only punctuation, skip synthesis:{}'.format(text)) return {'tts_speech': torch.zeros(1, int(0.01 * 22050))} #返回10ms空白音频,保证了一致的上下游处理逻辑 # this_uuid is used to track variables related to this inference thread this_uuid = str(uuid.uuid1()) From 014fed4405592b113cbe60104ad5543d6f821cf5 Mon Sep 17 00:00:00 2001 From: 0xCAFEBABE0 Date: Thu, 12 Dec 2024 16:55:43 +0800 Subject: [PATCH 3/6] fix(bug).when generating text that contains only punctuation marks or whitespace characters, the CPU usage reaches 100%, and the process crashes. --- cosyvoice/cli/model.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cosyvoice/cli/model.py b/cosyvoice/cli/model.py index d878fc1..850e5db 100644 --- a/cosyvoice/cli/model.py +++ b/cosyvoice/cli/model.py @@ -146,8 +146,10 @@ class CosyVoiceModel: llm_prompt_speech_token=torch.zeros(1, 0, dtype=torch.int32), flow_prompt_speech_token=torch.zeros(1, 0, dtype=torch.int32), prompt_speech_feat=torch.zeros(1, 0, 80), stream=False, speed=1.0, **kwargs): + # When generating text that contains only punctuation marks or whitespace characters + # - Returning 10ms of silence ensures consistent processing logic. if is_only_punctuation(text): - return {'tts_speech': torch.zeros(1, int(0.01 * 22050))} #返回10ms空白音频,保证了一致的上下游处理逻辑 + return {'tts_speech': torch.zeros(1, int(0.01 * 22050))} # this_uuid is used to track variables related to this inference thread this_uuid = str(uuid.uuid1()) with self.lock: From 1d8d94de824f0d822a02a35df60f67c2969162df Mon Sep 17 00:00:00 2001 From: 0xCAFEBABE0 <18649808198@163.com> Date: Mon, 16 Dec 2024 13:56:28 +0800 Subject: [PATCH 4/6] Update common.py --- cosyvoice/utils/common.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cosyvoice/utils/common.py b/cosyvoice/utils/common.py index 22b944d..43ced32 100644 --- a/cosyvoice/utils/common.py +++ b/cosyvoice/utils/common.py @@ -161,7 +161,6 @@ def is_only_punctuation(text): punctuation_pattern = r'^[\p{P}\p{S}]*$' return bool(regex.fullmatch(punctuation_pattern, text)) - def mask_to_bias(mask: torch.Tensor, dtype: torch.dtype) -> torch.Tensor: assert mask.dtype == torch.bool assert dtype in [torch.float32, torch.bfloat16, torch.float16] @@ -171,4 +170,3 @@ def mask_to_bias(mask: torch.Tensor, dtype: torch.dtype) -> torch.Tensor: # chunk_masks = (1.0 - chunk_masks) * torch.finfo(dtype).min mask = (1.0 - mask) * torch.finfo(dtype).min return mask - From bcc58cb4cb2e94afbb4d299d677b94784695adb6 Mon Sep 17 00:00:00 2001 From: 0xCAFEBABE0 <18649808198@163.com> Date: Mon, 16 Dec 2024 13:57:38 +0800 Subject: [PATCH 5/6] Update common.py --- cosyvoice/utils/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cosyvoice/utils/common.py b/cosyvoice/utils/common.py index 43ced32..f162cbe 100644 --- a/cosyvoice/utils/common.py +++ b/cosyvoice/utils/common.py @@ -161,6 +161,7 @@ def is_only_punctuation(text): punctuation_pattern = r'^[\p{P}\p{S}]*$' return bool(regex.fullmatch(punctuation_pattern, text)) + def mask_to_bias(mask: torch.Tensor, dtype: torch.dtype) -> torch.Tensor: assert mask.dtype == torch.bool assert dtype in [torch.float32, torch.bfloat16, torch.float16] From b60c37b31ae0409de078d798c99fbfa5f187146d Mon Sep 17 00:00:00 2001 From: 0xCAFEBABE0 Date: Mon, 30 Dec 2024 10:48:43 +0800 Subject: [PATCH 6/6] fix(bug).when generating text that contains only punctuation marks or whitespace characters, the CPU usage reaches 100%, and the process crashes. --- cosyvoice/cli/frontend.py | 6 +++++- cosyvoice/cli/model.py | 5 ----- cosyvoice/utils/common.py | 7 ------- cosyvoice/utils/frontend_utils.py | 7 +++++++ 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/cosyvoice/cli/frontend.py b/cosyvoice/cli/frontend.py index 9885a0f..31926f0 100644 --- a/cosyvoice/cli/frontend.py +++ b/cosyvoice/cli/frontend.py @@ -31,7 +31,7 @@ except ImportError: from tn.chinese.normalizer import Normalizer as ZhNormalizer from tn.english.normalizer import Normalizer as EnNormalizer use_ttsfrd = False -from cosyvoice.utils.frontend_utils import contains_chinese, replace_blank, replace_corner_mark, remove_bracket, spell_out_number, split_paragraph +from cosyvoice.utils.frontend_utils import contains_chinese, replace_blank, replace_corner_mark, remove_bracket, spell_out_number, split_paragraph, is_only_punctuation class CosyVoiceFrontEnd: @@ -109,6 +109,10 @@ class CosyVoiceFrontEnd: def text_normalize(self, text, split=True): text = text.strip() + # When generating text that contains only punctuation marks or whitespace characters + # - Returning empty texts ensures consistent processing logic. + if is_only_punctuation(text): + return [] if contains_chinese(text): if self.use_ttsfrd: texts = [i["text"] for i in json.loads(self.frd.do_voicegen_frd(text))["sentences"]] diff --git a/cosyvoice/cli/model.py b/cosyvoice/cli/model.py index 5314e8b..b9f555b 100644 --- a/cosyvoice/cli/model.py +++ b/cosyvoice/cli/model.py @@ -19,7 +19,6 @@ from torch.nn import functional as F from contextlib import nullcontext import uuid from cosyvoice.utils.common import fade_in_out -from cosyvoice.utils.common import is_only_punctuation class CosyVoiceModel: @@ -146,10 +145,6 @@ class CosyVoiceModel: llm_prompt_speech_token=torch.zeros(1, 0, dtype=torch.int32), flow_prompt_speech_token=torch.zeros(1, 0, dtype=torch.int32), prompt_speech_feat=torch.zeros(1, 0, 80), stream=False, speed=1.0, **kwargs): - # When generating text that contains only punctuation marks or whitespace characters - # - Returning 10ms of silence ensures consistent processing logic. - if is_only_punctuation(text): - return {'tts_speech': torch.zeros(1, int(0.01 * 22050))} # this_uuid is used to track variables related to this inference thread this_uuid = str(uuid.uuid1()) with self.lock: diff --git a/cosyvoice/utils/common.py b/cosyvoice/utils/common.py index f162cbe..b356f0c 100644 --- a/cosyvoice/utils/common.py +++ b/cosyvoice/utils/common.py @@ -20,7 +20,6 @@ from typing import List import numpy as np import torch -import regex IGNORE_ID = -1 @@ -156,12 +155,6 @@ def set_all_random_seed(seed): torch.cuda.manual_seed_all(seed) -def is_only_punctuation(text): - # Regular expression: Match strings that consist only of punctuation marks or are empty. - punctuation_pattern = r'^[\p{P}\p{S}]*$' - return bool(regex.fullmatch(punctuation_pattern, text)) - - def mask_to_bias(mask: torch.Tensor, dtype: torch.dtype) -> torch.Tensor: assert mask.dtype == torch.bool assert dtype in [torch.float32, torch.bfloat16, torch.float16] diff --git a/cosyvoice/utils/frontend_utils.py b/cosyvoice/utils/frontend_utils.py index ab01a1f..ea1c9fc 100644 --- a/cosyvoice/utils/frontend_utils.py +++ b/cosyvoice/utils/frontend_utils.py @@ -13,6 +13,7 @@ # limitations under the License. import re +import regex chinese_char_pattern = re.compile(r'[\u4e00-\u9fff]+') @@ -127,3 +128,9 @@ def replace_blank(text: str): else: out_str.append(c) return "".join(out_str) + + +def is_only_punctuation(text): + # Regular expression: Match strings that consist only of punctuation marks or are empty. + punctuation_pattern = r'^[\p{P}\p{S}]*$' + return bool(regex.fullmatch(punctuation_pattern, text))