add vc code

This commit is contained in:
lyuxiang.lx
2024-09-26 10:49:22 +08:00
parent ed87445540
commit 49015f63e6
7 changed files with 43 additions and 216 deletions

View File

@@ -42,6 +42,7 @@ class CosyVoiceFrontEnd:
speech_tokenizer_model: str,
spk2info: str = '',
instruct: bool = False,
vc: bool = False,
allowed_special: str = 'all'):
self.tokenizer = get_tokenizer()
self.feat_extractor = feat_extractor
@@ -55,7 +56,10 @@ class CosyVoiceFrontEnd:
"CPUExecutionProvider"])
if os.path.exists(spk2info):
self.spk2info = torch.load(spk2info, map_location=self.device)
else:
self.spk2info = {}
self.instruct = instruct
self.vc = vc
self.allowed_special = allowed_special
self.inflect_parser = inflect.engine()
self.use_ttsfrd = use_ttsfrd
@@ -172,3 +176,15 @@ class CosyVoiceFrontEnd:
model_input['prompt_text'] = instruct_text_token
model_input['prompt_text_len'] = instruct_text_token_len
return model_input
def frontend_vc(self, source_speech_16k, prompt_speech_16k):
prompt_speech_token, prompt_speech_token_len = self._extract_speech_token(prompt_speech_16k)
prompt_speech_22050 = torchaudio.transforms.Resample(orig_freq=16000, new_freq=22050)(prompt_speech_16k)
prompt_speech_feat, prompt_speech_feat_len = self._extract_speech_feat(prompt_speech_22050)
embedding = self._extract_spk_embedding(prompt_speech_16k)
source_speech_token, source_speech_token_len = self._extract_speech_token(source_speech_16k)
model_input = {'source_speech_token': source_speech_token, 'source_speech_token_len': source_speech_token_len,
'flow_prompt_speech_token': prompt_speech_token, 'flow_prompt_speech_token_len': prompt_speech_token_len,
'prompt_speech_feat': prompt_speech_feat, 'prompt_speech_feat_len': prompt_speech_feat_len,
'flow_embedding': embedding}
return model_input