From 289ef5157814b567400e26ed43b4d708d06a5cf6 Mon Sep 17 00:00:00 2001
From: Shivam Mehta <shivam.mehta25@gmail.com>
Date: Thu, 14 Nov 2024 06:55:51 +0100
Subject: [PATCH] Fixing thhe usage of denoiser_strength from the command line.

---
 matcha/VERSION | 2 +-
 matcha/cli.py  | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/matcha/VERSION b/matcha/VERSION
index 10507a3..4d81495 100644
--- a/matcha/VERSION
+++ b/matcha/VERSION
@@ -1 +1 @@
-0.0.7.1
+0.0.7.2
diff --git a/matcha/cli.py b/matcha/cli.py
index 7daf130..aa15eb1 100644
--- a/matcha/cli.py
+++ b/matcha/cli.py
@@ -114,10 +114,10 @@ def load_matcha(model_name, checkpoint_path, device):
     return model
 
 
-def to_waveform(mel, vocoder, denoiser=None):
+def to_waveform(mel, vocoder, denoiser=None, denoiser_strength=0.00025):
     audio = vocoder(mel).clamp(-1, 1)
     if denoiser is not None:
-        audio = denoiser(audio.squeeze(), strength=0.00025).cpu().squeeze()
+        audio = denoiser(audio.squeeze(), strength=denoiser_strength).cpu().squeeze()
 
     return audio.cpu().squeeze()
 
@@ -336,7 +336,7 @@ def batched_synthesis(args, device, model, vocoder, denoiser, texts, spk):
             length_scale=args.speaking_rate,
         )
 
-        output["waveform"] = to_waveform(output["mel"], vocoder, denoiser)
+        output["waveform"] = to_waveform(output["mel"], vocoder, denoiser, args.denoiser_strength)
         t = (dt.datetime.now() - start_t).total_seconds()
         rtf_w = t * 22050 / (output["waveform"].shape[-1])
         print(f"[🍵-Batch: {i}] Matcha-TTS RTF: {output['rtf']:.4f}")
@@ -377,7 +377,7 @@ def unbatched_synthesis(args, device, model, vocoder, denoiser, texts, spk):
             spks=spk,
             length_scale=args.speaking_rate,
         )
-        output["waveform"] = to_waveform(output["mel"], vocoder, denoiser)
+        output["waveform"] = to_waveform(output["mel"], vocoder, denoiser, args.denoiser_strength)
         # RTF with HiFiGAN
         t = (dt.datetime.now() - start_t).total_seconds()
         rtf_w = t * 22050 / (output["waveform"].shape[-1])