mirror of
https://github.com/FunAudioLLM/CosyVoice.git
synced 2026-02-04 17:39:25 +08:00
Merge pull request #710 from 0xCAFEBABE0/bug_cpu_hang
fix(bug).when generating text that contains only punctuation marks or…
This commit is contained in:
@@ -31,7 +31,7 @@ except ImportError:
|
||||
from tn.chinese.normalizer import Normalizer as ZhNormalizer
|
||||
from tn.english.normalizer import Normalizer as EnNormalizer
|
||||
use_ttsfrd = False
|
||||
from cosyvoice.utils.frontend_utils import contains_chinese, replace_blank, replace_corner_mark, remove_bracket, spell_out_number, split_paragraph
|
||||
from cosyvoice.utils.frontend_utils import contains_chinese, replace_blank, replace_corner_mark, remove_bracket, spell_out_number, split_paragraph, is_only_punctuation
|
||||
|
||||
|
||||
class CosyVoiceFrontEnd:
|
||||
@@ -111,6 +111,10 @@ class CosyVoiceFrontEnd:
|
||||
if text_frontend is False:
|
||||
return [text] if split is True else text
|
||||
text = text.strip()
|
||||
# When generating text that contains only punctuation marks or whitespace characters
|
||||
# - Returning empty texts ensures consistent processing logic.
|
||||
if is_only_punctuation(text):
|
||||
return []
|
||||
if contains_chinese(text):
|
||||
if self.use_ttsfrd:
|
||||
texts = [i["text"] for i in json.loads(self.frd.do_voicegen_frd(text))["sentences"]]
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
# limitations under the License.
|
||||
|
||||
import re
|
||||
import regex
|
||||
chinese_char_pattern = re.compile(r'[\u4e00-\u9fff]+')
|
||||
|
||||
|
||||
@@ -127,3 +128,9 @@ def replace_blank(text: str):
|
||||
else:
|
||||
out_str.append(c)
|
||||
return "".join(out_str)
|
||||
|
||||
|
||||
def is_only_punctuation(text):
|
||||
# Regular expression: Match strings that consist only of punctuation marks or are empty.
|
||||
punctuation_pattern = r'^[\p{P}\p{S}]*$'
|
||||
return bool(regex.fullmatch(punctuation_pattern, text))
|
||||
|
||||
Reference in New Issue
Block a user