mirror of
https://github.com/FunAudioLLM/CosyVoice.git
synced 2026-02-04 17:39:25 +08:00
update
This commit is contained in:
@@ -240,6 +240,8 @@ def get_tokenizer(
|
||||
|
||||
class QwenTokenizer():
|
||||
def __init__(self, token_path, skip_special_tokens=True):
|
||||
super().__init__()
|
||||
# NOTE: non-chat model, all these special tokens keep randomly initialized.
|
||||
special_tokens = {
|
||||
'eos_token': '<|endoftext|>',
|
||||
'pad_token': '<|endoftext|>',
|
||||
@@ -248,6 +250,9 @@ class QwenTokenizer():
|
||||
'[breath]', '<strong>', '</strong>', '[noise]',
|
||||
'[laughter]', '[cough]', '[clucking]', '[accent]',
|
||||
'[quick_breath]',
|
||||
"<laughter>", "</laughter>",
|
||||
"[hissing]", "[sigh]", "[vocalized-noise]",
|
||||
"[lipsmack]", "[mn]"
|
||||
]
|
||||
}
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(token_path)
|
||||
|
||||
Reference in New Issue
Block a user