From bcda6d807c918753aa2be043c4c2c9251265f796 Mon Sep 17 00:00:00 2001 From: "lyuxiang.lx" Date: Thu, 5 Sep 2024 17:09:07 +0800 Subject: [PATCH] add prompt contraint --- cosyvoice/cli/frontend.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cosyvoice/cli/frontend.py b/cosyvoice/cli/frontend.py index ec4f019..69557d8 100644 --- a/cosyvoice/cli/frontend.py +++ b/cosyvoice/cli/frontend.py @@ -78,6 +78,7 @@ class CosyVoiceFrontEnd: return text_token, text_token_len def _extract_speech_token(self, speech): + assert speech.shape[1] / 16000 <= 30, 'do not support extract speech token for audio longer than 30s' feat = whisper.log_mel_spectrogram(speech, n_mels=128) speech_token = self.speech_tokenizer_session.run(None, {self.speech_tokenizer_session.get_inputs()[0].name: