mirror of
https://github.com/FunAudioLLM/CosyVoice.git
synced 2026-02-05 18:09:24 +08:00
update
This commit is contained in:
@@ -11,7 +11,7 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import logging
|
||||
import os, logging
|
||||
import random
|
||||
from typing import Dict, Optional
|
||||
import torch
|
||||
@@ -19,7 +19,7 @@ import torch.nn as nn
|
||||
from torch.nn import functional as F
|
||||
from omegaconf import DictConfig
|
||||
from cosyvoice.utils.mask import make_pad_mask
|
||||
from cosyvoice.utils.onnx import SpeechTokenExtractor
|
||||
from cosyvoice.utils.onnx import SpeechTokenExtractor, online_feature, onnx_path
|
||||
|
||||
|
||||
class MaskedDiffWithXvec(torch.nn.Module):
|
||||
@@ -180,14 +180,19 @@ class CausalMaskedDiffWithXvec(torch.nn.Module):
|
||||
self.only_mask_loss = only_mask_loss
|
||||
self.token_mel_ratio = token_mel_ratio
|
||||
self.pre_lookahead_len = pre_lookahead_len
|
||||
if online_feature is True:
|
||||
self.speech_token_extractor = SpeechTokenExtractor(model_path=os.path.join(onnx_path, 'speech_tokenizer_v2.batch.onnx'))
|
||||
|
||||
def forward(
|
||||
self,
|
||||
batch: dict,
|
||||
device: torch.device,
|
||||
) -> Dict[str, Optional[torch.Tensor]]:
|
||||
token = batch['speech_token'].to(device)
|
||||
token_len = batch['speech_token_len'].to(device)
|
||||
if 'speech_token' not in batch:
|
||||
token, token_len = self.speech_token_extractor.inference(batch['whisper_feat'], batch['whisper_feat_len'])
|
||||
else:
|
||||
token = batch['speech_token'].to(device)
|
||||
token_len = batch['speech_token_len'].to(device)
|
||||
feat = batch['speech_feat'].to(device)
|
||||
feat_len = batch['speech_feat_len'].to(device)
|
||||
embedding = batch['embedding'].to(device)
|
||||
@@ -309,6 +314,8 @@ class CausalMaskedDiffWithDiT(torch.nn.Module):
|
||||
self.decoder = decoder
|
||||
self.only_mask_loss = only_mask_loss
|
||||
self.token_mel_ratio = token_mel_ratio
|
||||
if online_feature is True:
|
||||
self.speech_token_extractor = SpeechTokenExtractor(model_path=os.path.join(onnx_path, 'speech_tokenizer_v3.batch.onnx'))
|
||||
|
||||
def forward(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user