mirror of
https://github.com/FunAudioLLM/CosyVoice.git
synced 2026-02-05 01:49:25 +08:00
update readme
This commit is contained in:
@@ -85,7 +85,7 @@ class CosyVoice:
|
||||
start_time = time.time()
|
||||
|
||||
def inference_cross_lingual(self, tts_text, prompt_speech_16k, stream=False, speed=1.0):
|
||||
if self.frontend.instruct is True:
|
||||
if self.frontend.instruct is True and isinstance(self.model, CosyVoiceModel):
|
||||
raise ValueError('{} do not support cross_lingual inference'.format(self.model_dir))
|
||||
for i in tqdm(self.frontend.text_normalize(tts_text, split=True)):
|
||||
model_input = self.frontend.frontend_cross_lingual(i, prompt_speech_16k, self.sample_rate)
|
||||
|
||||
@@ -109,6 +109,10 @@ class CosyVoiceFrontEnd:
|
||||
|
||||
def text_normalize(self, text, split=True):
|
||||
text = text.strip()
|
||||
# NOTE(lyuxiang.lx) move this judgement into ttsfrd in the future
|
||||
for token in self.tokenizer.special_tokens['additional_special_tokens']:
|
||||
if token in text:
|
||||
return text if split is False else [text]
|
||||
if contains_chinese(text):
|
||||
if self.use_ttsfrd:
|
||||
texts = [i["text"] for i in json.loads(self.frd.do_voicegen_frd(text))["sentences"]]
|
||||
|
||||
@@ -255,6 +255,7 @@ class QwenTokenizer():
|
||||
"[lipsmack]", "[mn]"
|
||||
]
|
||||
}
|
||||
self.special_tokens = special_tokens
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(token_path)
|
||||
self.tokenizer.add_special_tokens(special_tokens)
|
||||
self.skip_special_tokens = skip_special_tokens
|
||||
|
||||
Reference in New Issue
Block a user