fix(bug).when generating text that contains only punctuation marks or whitespace characters, the CPU usage reaches 100%, and the process crashes.

This commit is contained in:
0xCAFEBABE0
2024-12-12 16:49:39 +08:00
parent 07352a50b3
commit 84015697c2
2 changed files with 11 additions and 0 deletions

View File

@@ -20,6 +20,7 @@ from typing import List
import numpy as np
import torch
import regex
IGNORE_ID = -1
@@ -153,3 +154,9 @@ def set_all_random_seed(seed):
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
def is_only_punctuation(text):
# Regular expression: Match strings that consist only of punctuation marks or are empty.
punctuation_pattern = r'^[\p{P}\p{S}]*$'
return bool(regex.fullmatch(punctuation_pattern, text))