fix(bug).when generating text that contains only punctuation marks or whitespace characters, the CPU usage reaches 100%, and the process crashes.

This commit is contained in:
0xCAFEBABE0
2024-12-30 10:48:43 +08:00
parent bcc58cb4cb
commit b60c37b31a
4 changed files with 12 additions and 13 deletions

View File

@@ -20,7 +20,6 @@ from typing import List
import numpy as np
import torch
import regex
IGNORE_ID = -1
@@ -156,12 +155,6 @@ def set_all_random_seed(seed):
torch.cuda.manual_seed_all(seed)
def is_only_punctuation(text):
# Regular expression: Match strings that consist only of punctuation marks or are empty.
punctuation_pattern = r'^[\p{P}\p{S}]*$'
return bool(regex.fullmatch(punctuation_pattern, text))
def mask_to_bias(mask: torch.Tensor, dtype: torch.dtype) -> torch.Tensor:
assert mask.dtype == torch.bool
assert dtype in [torch.float32, torch.bfloat16, torch.float16]

View File

@@ -13,6 +13,7 @@
# limitations under the License.
import re
import regex
chinese_char_pattern = re.compile(r'[\u4e00-\u9fff]+')
@@ -127,3 +128,9 @@ def replace_blank(text: str):
else:
out_str.append(c)
return "".join(out_str)
def is_only_punctuation(text):
# Regular expression: Match strings that consist only of punctuation marks or are empty.
punctuation_pattern = r'^[\p{P}\p{S}]*$'
return bool(regex.fullmatch(punctuation_pattern, text))