From 289ef5157814b567400e26ed43b4d708d06a5cf6 Mon Sep 17 00:00:00 2001 From: Shivam Mehta Date: Thu, 14 Nov 2024 06:55:51 +0100 Subject: [PATCH] Fixing thhe usage of denoiser_strength from the command line. --- matcha/VERSION | 2 +- matcha/cli.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/matcha/VERSION b/matcha/VERSION index 10507a3..4d81495 100644 --- a/matcha/VERSION +++ b/matcha/VERSION @@ -1 +1 @@ -0.0.7.1 +0.0.7.2 diff --git a/matcha/cli.py b/matcha/cli.py index 7daf130..aa15eb1 100644 --- a/matcha/cli.py +++ b/matcha/cli.py @@ -114,10 +114,10 @@ def load_matcha(model_name, checkpoint_path, device): return model -def to_waveform(mel, vocoder, denoiser=None): +def to_waveform(mel, vocoder, denoiser=None, denoiser_strength=0.00025): audio = vocoder(mel).clamp(-1, 1) if denoiser is not None: - audio = denoiser(audio.squeeze(), strength=0.00025).cpu().squeeze() + audio = denoiser(audio.squeeze(), strength=denoiser_strength).cpu().squeeze() return audio.cpu().squeeze() @@ -336,7 +336,7 @@ def batched_synthesis(args, device, model, vocoder, denoiser, texts, spk): length_scale=args.speaking_rate, ) - output["waveform"] = to_waveform(output["mel"], vocoder, denoiser) + output["waveform"] = to_waveform(output["mel"], vocoder, denoiser, args.denoiser_strength) t = (dt.datetime.now() - start_t).total_seconds() rtf_w = t * 22050 / (output["waveform"].shape[-1]) print(f"[🍵-Batch: {i}] Matcha-TTS RTF: {output['rtf']:.4f}") @@ -377,7 +377,7 @@ def unbatched_synthesis(args, device, model, vocoder, denoiser, texts, spk): spks=spk, length_scale=args.speaking_rate, ) - output["waveform"] = to_waveform(output["mel"], vocoder, denoiser) + output["waveform"] = to_waveform(output["mel"], vocoder, denoiser, args.denoiser_strength) # RTF with HiFiGAN t = (dt.datetime.now() - start_t).total_seconds() rtf_w = t * 22050 / (output["waveform"].shape[-1])