From 29507bc77a54356a523fd6c207da13d53d50f7aa Mon Sep 17 00:00:00 2001 From: "lyuxiang.lx" Date: Wed, 16 Oct 2024 13:06:31 +0800 Subject: [PATCH] update lint --- .github/workflows/lint.yml | 2 +- cosyvoice/bin/average_model.py | 1 - cosyvoice/bin/train.py | 3 ++- cosyvoice/dataset/processor.py | 5 +++-- cosyvoice/hifigan/discriminator.py | 7 ++++--- cosyvoice/hifigan/hifigan.py | 7 +++++-- cosyvoice/utils/executor.py | 5 +++-- cosyvoice/utils/losses.py | 4 +++- 8 files changed, 21 insertions(+), 13 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index fff7290..5dc2204 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -52,5 +52,5 @@ jobs: set -eux pip install flake8==3.8.2 flake8-bugbear flake8-comprehensions flake8-executable flake8-pyi==20.5.0 mccabe pycodestyle==2.6.0 pyflakes==2.2.0 flake8 --version - flake8 --max-line-length 150 --ignore B006,B008,B905,C408,E402,E741,W503,W504 --exclude ./third_party/,./runtime/python/grpc/cosyvoice_pb2*py + flake8 --max-line-length 150 --ignore B006,B008,B905,C408,E402,E731,E741,W503,W504 --exclude ./third_party/,./runtime/python/grpc/cosyvoice_pb2*py if [ $? != 0 ]; then exit 1; fi \ No newline at end of file diff --git a/cosyvoice/bin/average_model.py b/cosyvoice/bin/average_model.py index 3112f29..d095dcd 100644 --- a/cosyvoice/bin/average_model.py +++ b/cosyvoice/bin/average_model.py @@ -16,7 +16,6 @@ import os import argparse import glob -import sys import yaml import torch diff --git a/cosyvoice/bin/train.py b/cosyvoice/bin/train.py index 229d9ee..e2f60ea 100644 --- a/cosyvoice/bin/train.py +++ b/cosyvoice/bin/train.py @@ -138,7 +138,8 @@ def main(): dist.barrier() group_join = dist.new_group(backend="gloo", timeout=datetime.timedelta(seconds=args.timeout)) if gan is True: - executor.train_one_epoc_gan(model, optimizer, scheduler, optimizer_d, scheduler_d, train_data_loader, cv_data_loader, writer, info_dict, group_join) + executor.train_one_epoc_gan(model, optimizer, scheduler, optimizer_d, scheduler_d, train_data_loader, cv_data_loader, + writer, info_dict, group_join) else: executor.train_one_epoc(model, optimizer, scheduler, train_data_loader, cv_data_loader, writer, info_dict, group_join) dist.destroy_process_group(group_join) diff --git a/cosyvoice/dataset/processor.py b/cosyvoice/dataset/processor.py index d71ae89..35a1445 100644 --- a/cosyvoice/dataset/processor.py +++ b/cosyvoice/dataset/processor.py @@ -177,6 +177,7 @@ def compute_fbank(data, sample['speech_feat'] = mat yield sample + def compute_f0(data, pitch_extractor, mode='train'): """ Extract f0 @@ -404,8 +405,8 @@ def padding(data, use_spk_embedding, mode='train', gan=False): pitch_feat = [sample[i]['pitch_feat'] for i in order] pitch_feat_len = torch.tensor([i.size(0) for i in pitch_feat], dtype=torch.int32) pitch_feat = pad_sequence(pitch_feat, - batch_first=True, - padding_value=0) + batch_first=True, + padding_value=0) batch["pitch_feat"] = pitch_feat batch["pitch_feat_len"] = pitch_feat_len else: diff --git a/cosyvoice/hifigan/discriminator.py b/cosyvoice/hifigan/discriminator.py index d128652..6fc7845 100644 --- a/cosyvoice/hifigan/discriminator.py +++ b/cosyvoice/hifigan/discriminator.py @@ -1,4 +1,3 @@ -from typing import List import torch import torch.nn as nn from torch.nn.utils import weight_norm @@ -6,6 +5,7 @@ from typing import List, Optional, Tuple from einops import rearrange from torchaudio.transforms import Spectrogram + class MultipleDiscriminator(nn.Module): def __init__( self, mpd: nn.Module, mrd: nn.Module @@ -28,6 +28,7 @@ class MultipleDiscriminator(nn.Module): fmap_gs += this_fmap_gs return y_d_rs, y_d_gs, fmap_rs, fmap_gs + class MultiResolutionDiscriminator(nn.Module): def __init__( self, @@ -112,7 +113,7 @@ class DiscriminatorR(nn.Module): x = torch.view_as_real(x) x = rearrange(x, "b f t c -> b c t f") # Split into bands - x_bands = [x[..., b[0] : b[1]] for b in self.bands] + x_bands = [x[..., b[0]: b[1]] for b in self.bands] return x_bands def forward(self, x: torch.Tensor, cond_embedding_id: torch.Tensor = None): @@ -136,4 +137,4 @@ class DiscriminatorR(nn.Module): fmap.append(x) x += h - return x, fmap \ No newline at end of file + return x, fmap diff --git a/cosyvoice/hifigan/hifigan.py b/cosyvoice/hifigan/hifigan.py index ed18e8a..db6457e 100644 --- a/cosyvoice/hifigan/hifigan.py +++ b/cosyvoice/hifigan/hifigan.py @@ -5,6 +5,7 @@ import torch.nn.functional as F from matcha.hifigan.models import feature_loss, generator_loss, discriminator_loss from cosyvoice.utils.losses import tpr_loss, mel_loss + class HiFiGan(nn.Module): def __init__(self, generator, discriminator, mel_spec_transform, multi_mel_spectral_recon_loss_weight=45, feat_match_loss_weight=2.0, @@ -44,7 +45,9 @@ class HiFiGan(nn.Module): else: loss_tpr = torch.zeros(1).to(device) loss_f0 = F.l1_loss(generated_f0, pitch_feat) - loss = loss_gen + self.feat_match_loss_weight * loss_fm + self.multi_mel_spectral_recon_loss_weight * loss_mel + self.tpr_loss_weight * loss_tpr + loss_f0 + loss = loss_gen + self.feat_match_loss_weight * loss_fm + \ + self.multi_mel_spectral_recon_loss_weight * loss_mel + \ + self.tpr_loss_weight * loss_tpr + loss_f0 return {'loss': loss, 'loss_gen': loss_gen, 'loss_fm': loss_fm, 'loss_mel': loss_mel, 'loss_tpr': loss_tpr, 'loss_f0': loss_f0} def forward_discriminator(self, batch, device): @@ -63,4 +66,4 @@ class HiFiGan(nn.Module): loss_tpr = torch.zeros(1).to(device) loss_f0 = F.l1_loss(generated_f0, pitch_feat) loss = loss_disc + self.tpr_loss_weight * loss_tpr + loss_f0 - return {'loss': loss, 'loss_disc': loss_disc, 'loss_tpr': loss_tpr, 'loss_f0': loss_f0} \ No newline at end of file + return {'loss': loss, 'loss_disc': loss_disc, 'loss_tpr': loss_tpr, 'loss_f0': loss_f0} diff --git a/cosyvoice/utils/executor.py b/cosyvoice/utils/executor.py index bb7289c..14550fa 100644 --- a/cosyvoice/utils/executor.py +++ b/cosyvoice/utils/executor.py @@ -25,7 +25,7 @@ from cosyvoice.utils.train_utils import update_parameter_and_lr, log_per_step, l class Executor: - def __init__(self, gan: bool=False): + def __init__(self, gan: bool = False): self.gan = gan self.step = 0 self.epoch = 0 @@ -81,7 +81,8 @@ class Executor: dist.barrier() self.cv(model, cv_data_loader, writer, info_dict, on_batch_end=True) - def train_one_epoc_gan(self, model, optimizer, scheduler, optimizer_d, scheduler_d, train_data_loader, cv_data_loader, writer, info_dict, group_join): + def train_one_epoc_gan(self, model, optimizer, scheduler, optimizer_d, scheduler_d, train_data_loader, cv_data_loader, + writer, info_dict, group_join): ''' Train one epoch ''' diff --git a/cosyvoice/utils/losses.py b/cosyvoice/utils/losses.py index 46d9883..78efd3b 100644 --- a/cosyvoice/utils/losses.py +++ b/cosyvoice/utils/losses.py @@ -1,6 +1,7 @@ import torch import torch.nn.functional as F + def tpr_loss(disc_real_outputs, disc_generated_outputs, tau): loss = 0 for dr, dg in zip(disc_real_outputs, disc_generated_outputs): @@ -9,10 +10,11 @@ def tpr_loss(disc_real_outputs, disc_generated_outputs, tau): loss += tau - F.relu(tau - L_rel) return loss + def mel_loss(real_speech, generated_speech, mel_transforms): loss = 0 for transform in mel_transforms: mel_r = transform(real_speech) mel_g = transform(generated_speech) loss += F.l1_loss(mel_g, mel_r) - return loss \ No newline at end of file + return loss