update

2026-02-05 18:09:24 +08:00 · 2024-12-16 09:42:08 +08:00
parent 6b9cebea14
commit 6b5931dc70
2 changed files with 62 additions and 62 deletions
--- a/cosyvoice/cli/frontend.py
+++ b/cosyvoice/cli/frontend.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from functools import partial
+import json
 import onnxruntime
 import torch
 import numpy as np
@@ -66,9 +67,7 @@ class CosyVoiceFrontEnd:
            ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
            assert self.frd.initialize('{}/../../pretrained_models/CosyVoice-ttsfrd/resource'.format(ROOT_DIR)) is True, \
                'failed to initialize ttsfrd resource'
-            self.frd.set_lang_type('pinyin')
-            self.frd.enable_pinyin_mix(True)
-            self.frd.set_breakmodel_index(1)
+            self.frd.set_lang_type('pinyinvg')
        else:
            self.zh_tn_model = ZhNormalizer(remove_erhua=False, full_to_half=False)
            self.en_tn_model = EnNormalizer()
@@ -112,26 +111,28 @@ class CosyVoiceFrontEnd:
        text = text.strip()
        if contains_chinese(text):
            if self.use_ttsfrd:
-                text = self.frd.get_frd_extra_info(text, 'input')
+                texts = [i["text"] for i in json.loads(self.frd.do_voicegen_frd(text))["sentences"]]
+                text = ''.join(texts)
            else:
                text = self.zh_tn_model.normalize(text)
-            text = text.replace("\n", "")
-            text = replace_blank(text)
-            text = replace_corner_mark(text)
-            text = text.replace(".", "。")
-            text = text.replace(" - ", "，")
-            text = remove_bracket(text)
-            text = re.sub(r'[，,、]+$', '。', text)
-            texts = list(split_paragraph(text, partial(self.tokenizer.encode, allowed_special=self.allowed_special), "zh", token_max_n=80,
-                                         token_min_n=60, merge_len=20, comma_split=False))
+                text = text.replace("\n", "")
+                text = replace_blank(text)
+                text = replace_corner_mark(text)
+                text = text.replace(".", "。")
+                text = text.replace(" - ", "，")
+                text = remove_bracket(text)
+                text = re.sub(r'[，,、]+$', '。', text)
+                texts = list(split_paragraph(text, partial(self.tokenizer.encode, allowed_special=self.allowed_special), "zh", token_max_n=80,
+                                            token_min_n=60, merge_len=20, comma_split=False))
        else:
            if self.use_ttsfrd:
-                text = self.frd.get_frd_extra_info(text, 'input')
+                texts = [i["text"] for i in json.loads(self.frd.do_voicegen_frd(text))["sentences"]]
+                text = ''.join(texts)
            else:
                text = self.en_tn_model.normalize(text)
-            text = spell_out_number(text, self.inflect_parser)
-            texts = list(split_paragraph(text, partial(self.tokenizer.encode, allowed_special=self.allowed_special), "en", token_max_n=80,
-                                         token_min_n=60, merge_len=20, comma_split=False))
+                text = spell_out_number(text, self.inflect_parser)
+                texts = list(split_paragraph(text, partial(self.tokenizer.encode, allowed_special=self.allowed_special), "en", token_max_n=80,
+                                            token_min_n=60, merge_len=20, comma_split=False))
        if split is False:
            return text
        return texts