mirror of
https://github.com/shivammehta25/Matcha-TTS.git
synced 2026-02-04 09:49:21 +08:00
Adding possibility of getting durations out
This commit is contained in:
@@ -227,7 +227,7 @@ def cli():
|
||||
parser.add_argument(
|
||||
"--vocoder",
|
||||
type=str,
|
||||
default=None,
|
||||
default="hifigan_univ_v1",
|
||||
help="Vocoder to use (default: will use the one suggested with the pretrained model))",
|
||||
choices=VOCODER_URLS.keys(),
|
||||
)
|
||||
|
||||
@@ -109,7 +109,7 @@ class TextMelDataModule(LightningDataModule):
|
||||
"""Clean up after fit or test."""
|
||||
pass # pylint: disable=unnecessary-pass
|
||||
|
||||
def state_dict(self): # pylint: disable=no-self-use
|
||||
def state_dict(self):
|
||||
"""Extra things to save to checkpoint."""
|
||||
return {}
|
||||
|
||||
@@ -167,7 +167,7 @@ class TextMelDataset(torch.utils.data.Dataset):
|
||||
text = self.get_text(text, add_blank=self.add_blank)
|
||||
mel = self.get_mel(filepath)
|
||||
|
||||
return {"x": text, "y": mel, "spk": spk}
|
||||
return {"x": text, "y": mel, "spk": spk, "filepath": filepath}
|
||||
|
||||
def get_mel(self, filepath):
|
||||
audio, sr = ta.load(filepath)
|
||||
@@ -207,15 +207,16 @@ class TextMelBatchCollate:
|
||||
|
||||
def __call__(self, batch):
|
||||
B = len(batch)
|
||||
y_max_length = max([item["y"].shape[-1] for item in batch])
|
||||
y_max_length = max([item["y"].shape[-1] for item in batch]) # pylint: disable=consider-using-generator
|
||||
y_max_length = fix_len_compatibility(y_max_length)
|
||||
x_max_length = max([item["x"].shape[-1] for item in batch])
|
||||
x_max_length = max([item["x"].shape[-1] for item in batch]) # pylint: disable=consider-using-generator
|
||||
n_feats = batch[0]["y"].shape[-2]
|
||||
|
||||
y = torch.zeros((B, n_feats, y_max_length), dtype=torch.float32)
|
||||
x = torch.zeros((B, x_max_length), dtype=torch.long)
|
||||
y_lengths, x_lengths = [], []
|
||||
spks = []
|
||||
filepaths = []
|
||||
for i, item in enumerate(batch):
|
||||
y_, x_ = item["y"], item["x"]
|
||||
y_lengths.append(y_.shape[-1])
|
||||
@@ -223,9 +224,10 @@ class TextMelBatchCollate:
|
||||
y[i, :, : y_.shape[-1]] = y_
|
||||
x[i, : x_.shape[-1]] = x_
|
||||
spks.append(item["spk"])
|
||||
filepaths.append(item["filepath"])
|
||||
|
||||
y_lengths = torch.tensor(y_lengths, dtype=torch.long)
|
||||
x_lengths = torch.tensor(x_lengths, dtype=torch.long)
|
||||
spks = torch.tensor(spks, dtype=torch.long) if self.n_spks > 1 else None
|
||||
|
||||
return {"x": x, "x_lengths": x_lengths, "y": y, "y_lengths": y_lengths, "spks": spks}
|
||||
return {"x": x, "x_lengths": x_lengths, "y": y, "y_lengths": y_lengths, "spks": spks, "filepaths": filepaths}
|
||||
|
||||
@@ -58,7 +58,7 @@ class BaseLightningClass(LightningModule, ABC):
|
||||
y, y_lengths = batch["y"], batch["y_lengths"]
|
||||
spks = batch["spks"]
|
||||
|
||||
dur_loss, prior_loss, diff_loss = self(
|
||||
dur_loss, prior_loss, diff_loss, *_ = self(
|
||||
x=x,
|
||||
x_lengths=x_lengths,
|
||||
y=y,
|
||||
|
||||
@@ -4,7 +4,7 @@ import random
|
||||
|
||||
import torch
|
||||
|
||||
import matcha.utils.monotonic_align as monotonic_align
|
||||
import matcha.utils.monotonic_align as monotonic_align # pylint: disable=consider-using-from-import
|
||||
from matcha import utils
|
||||
from matcha.models.baselightningmodule import BaseLightningClass
|
||||
from matcha.models.components.duration_predictors import DP
|
||||
@@ -241,4 +241,4 @@ class MatchaTTS(BaseLightningClass): # 🍵
|
||||
else:
|
||||
prior_loss = 0
|
||||
|
||||
return dur_loss, prior_loss, diff_loss
|
||||
return dur_loss, prior_loss, diff_loss, attn
|
||||
|
||||
174
matcha/utils/get_durations_from_trained_model.py
Normal file
174
matcha/utils/get_durations_from_trained_model.py
Normal file
@@ -0,0 +1,174 @@
|
||||
r"""
|
||||
The file creates a pickle file where the values needed for loading of dataset is stored and the model can load it
|
||||
when needed.
|
||||
|
||||
Parameters from hparam.py will be used
|
||||
"""
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import lightning
|
||||
import numpy as np
|
||||
import rootutils
|
||||
import torch
|
||||
from hydra import compose, initialize
|
||||
from omegaconf import open_dict
|
||||
from torch import nn
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
from matcha.cli import get_device
|
||||
from matcha.data.text_mel_datamodule import TextMelDataModule
|
||||
from matcha.models.matcha_tts import MatchaTTS
|
||||
from matcha.utils.logging_utils import pylogger
|
||||
|
||||
log = pylogger.get_pylogger(__name__)
|
||||
|
||||
|
||||
def save_durations_to_folder(attn: torch.Tensor, x_length: int, y_length: int, filepath: str, output_folder: Path):
|
||||
durations = attn.squeeze().sum(1)[:x_length].numpy()
|
||||
output = output_folder / Path(filepath).name.replace(".wav", ".npy")
|
||||
np.save(output, durations)
|
||||
|
||||
|
||||
@torch.inference_mode()
|
||||
def compute_durations(data_loader: torch.utils.data.DataLoader, model: nn.Module, device: torch.device, output_folder):
|
||||
"""Generate durations from the model for each datapoint and save it in a folder
|
||||
|
||||
Args:
|
||||
data_loader (torch.utils.data.DataLoader): Dataloader
|
||||
model (nn.Module): MatchaTTS model
|
||||
device (torch.device): GPU or CPU
|
||||
"""
|
||||
|
||||
for batch in tqdm(data_loader, leave=False):
|
||||
x, x_lengths = batch["x"], batch["x_lengths"]
|
||||
y, y_lengths = batch["y"], batch["y_lengths"]
|
||||
spks = batch["spks"]
|
||||
x = x.to(device)
|
||||
y = y.to(device)
|
||||
x_lengths = x_lengths.to(device)
|
||||
y_lengths = y_lengths.to(device)
|
||||
spks = spks.to(device) if spks is not None else None
|
||||
|
||||
_, _, _, attn = model(
|
||||
x=x,
|
||||
x_lengths=x_lengths,
|
||||
y=y,
|
||||
y_lengths=y_lengths,
|
||||
spks=spks,
|
||||
)
|
||||
attn = attn.cpu()
|
||||
for i in range(attn.shape[0]):
|
||||
save_durations_to_folder(
|
||||
attn[i], x_lengths[i].item(), y_lengths[i].item(), batch["filepaths"][i], output_folder
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument(
|
||||
"-i",
|
||||
"--input-config",
|
||||
type=str,
|
||||
default="vctk.yaml",
|
||||
help="The name of the yaml config file under configs/data",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-b",
|
||||
"--batch-size",
|
||||
type=int,
|
||||
default="32",
|
||||
help="Can have increased batch size for faster computation",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
"--force",
|
||||
action="store_true",
|
||||
default=False,
|
||||
required=False,
|
||||
help="force overwrite the file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-c",
|
||||
"--checkpoint_path",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Path to the checkpoint file to load the model from",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--output-folder",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Output folder to save the data statistics",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--cpu", action="store_true", help="Use CPU for inference, not recommended (default: use GPU if available)"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
with initialize(version_base="1.3", config_path="../../configs/data"):
|
||||
cfg = compose(config_name=args.input_config, return_hydra_config=True, overrides=[])
|
||||
|
||||
root_path = rootutils.find_root(search_from=__file__, indicator=".project-root")
|
||||
|
||||
with open_dict(cfg):
|
||||
del cfg["hydra"]
|
||||
del cfg["_target_"]
|
||||
cfg["seed"] = 1234
|
||||
cfg["batch_size"] = args.batch_size
|
||||
cfg["train_filelist_path"] = str(os.path.join(root_path, cfg["train_filelist_path"]))
|
||||
cfg["valid_filelist_path"] = str(os.path.join(root_path, cfg["valid_filelist_path"]))
|
||||
|
||||
if args.output_folder is not None:
|
||||
output_folder = Path(args.output_folder)
|
||||
else:
|
||||
output_folder = Path("data") / "processed_data" / "durations" / cfg["name"]
|
||||
|
||||
if os.path.exists(output_folder) and not args.force:
|
||||
print("Folder already exists. Use -f to force overwrite")
|
||||
sys.exit(1)
|
||||
|
||||
output_folder.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(f"Preprocessing: {cfg['name']} from training filelist: {cfg['train_filelist_path']}")
|
||||
print("Loading model...")
|
||||
device = get_device(args)
|
||||
model = MatchaTTS.load_from_checkpoint(args.checkpoint_path, map_location=device)
|
||||
|
||||
text_mel_datamodule = TextMelDataModule(**cfg)
|
||||
text_mel_datamodule.setup()
|
||||
try:
|
||||
print("Computing stats for training set if exists...")
|
||||
train_dataloader = text_mel_datamodule.train_dataloader()
|
||||
compute_durations(train_dataloader, model, device, output_folder)
|
||||
except lightning.fabric.utilities.exceptions.MisconfigurationException:
|
||||
print("No training set found")
|
||||
|
||||
try:
|
||||
print("Computing stats for validation set if exists...")
|
||||
val_dataloader = text_mel_datamodule.val_dataloader()
|
||||
compute_durations(val_dataloader, model, device, output_folder)
|
||||
except lightning.fabric.utilities.exceptions.MisconfigurationException:
|
||||
print("No validation set found")
|
||||
|
||||
try:
|
||||
print("Computing stats for test set if exists...")
|
||||
test_dataloader = text_mel_datamodule.test_dataloader()
|
||||
compute_durations(test_dataloader, model, device, output_folder)
|
||||
except lightning.fabric.utilities.exceptions.MisconfigurationException:
|
||||
print("No test set found")
|
||||
|
||||
print(f"[+] Done! Data statistics saved to: {output_folder}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
347
synthesis.ipynb
347
synthesis.ipynb
@@ -37,7 +37,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 7,
|
||||
"id": "8d5876c0-b47e-4c80-9e9c-62550f81b64e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -69,10 +69,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 8,
|
||||
"id": "b1a30306-588c-4f22-8d9b-e2676880b0e5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The autoreload extension is already loaded. To reload it, use:\n",
|
||||
" %reload_ext autoreload\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%load_ext autoreload\n",
|
||||
"%autoreload 2\n",
|
||||
@@ -82,7 +91,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 9,
|
||||
"id": "a312856b-01a9-4d75-a4c8-4666dffa0692",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -100,16 +109,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 10,
|
||||
"id": "7640a4c1-44ce-447c-a8ff-45012fb7bddd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# MATCHA_CHECKPOINT = \"logs/train/hi-fi_en-US_female_piper_phonemizer/runs/2023-12-01_12-14-06/checkpoints/last.ckpt\"\n",
|
||||
"# MATCHA_CHECKPOINT = get_user_data_dir()/\"matcha_ljspeech.ckpt\n",
|
||||
"MATCHA_CHECKPOINT = \"logs/train/tsg2_stoc_dur/runs/2024-01-05_12-35-07/checkpoints/last.ckpt\"\n",
|
||||
"HIFIGAN_CHECKPOINT = get_user_data_dir() / \"hifigan_T2_v1\"\n",
|
||||
"OUTPUT_FOLDER = \"synth_output\""
|
||||
"# MATCHA_CHECKPOINT = \"logs/train/tsg2_stoc_dur/runs/2024-01-05_12-35-07/checkpoints/last.ckpt\"\n",
|
||||
"# MATCHA_CHECKPOINT = \"logs/train/lj_stoc/runs/2024-01-12_12-03-19/checkpoints/checkpoint_epoch=3299.ckpt\"\n",
|
||||
"MATCHA_CHECKPOINT = \"logs/train/joe_stoc_dur/runs/2024-02-20_14-01-53/checkpoints/last.ckpt\"\n",
|
||||
"HIFIGAN_CHECKPOINT = get_user_data_dir() / \"hifigan_univ_v1\"\n",
|
||||
"OUTPUT_FOLDER = \"cormac_fm_output2\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -122,7 +131,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 11,
|
||||
"id": "26a16230-04ba-4825-a844-2fb5ab945e24",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -156,7 +165,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 12,
|
||||
"id": "f6b68184-968d-4868-9029-f0c40e9e68af",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -191,11 +200,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 13,
|
||||
"id": "880a1879-24fd-4757-849c-850339120796",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"@torch.inference_mode()\n",
|
||||
"def process_text(text: str):\n",
|
||||
" x = torch.tensor(intersperse(text_to_sequence(text, ['english_cleaners2']), 0),dtype=torch.long, device=device)[None]\n",
|
||||
@@ -236,6 +247,9 @@
|
||||
" folder.mkdir(exist_ok=True, parents=True)\n",
|
||||
" np.save(folder / f'{filename}', output['mel'].cpu().numpy())\n",
|
||||
" sf.write(folder / f'{filename}.wav', output['waveform'], 22050, 'PCM_24')\n",
|
||||
" if \"duration_json\" in output:\n",
|
||||
" with open(folder / f'{filename}.json', 'w', encoding='utf-8') as f:\n",
|
||||
" json.dump(output['duration_json'], f, indent=4, ensure_ascii=False)\n",
|
||||
"\n",
|
||||
"def plot_spectrogram(spectrogram):\n",
|
||||
" fig, ax = plt.subplots(figsize=(12, 3))\n",
|
||||
@@ -257,7 +271,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"execution_count": 14,
|
||||
"id": "2e0a9acd-0845-4192-ba09-b9683e28a3ac",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -279,6 +293,186 @@
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "f58e82fa-0ae0-43f2-a021-97efc6041793",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"texts = [\n",
|
||||
" \"If, you know, it works as well as we would expect, it would change our, like, understanding of the world.\",\n",
|
||||
" \"If, you know, it works as well as we would expect, it would change our, like, understanding of the world.\",\n",
|
||||
" \"; If, you know, it works as well as we would expect, it would change our, like, understanding of the world.\",\n",
|
||||
" \"; If, you know, it works as well as we would expect, it would change our, like, understanding of the world.\",\n",
|
||||
" \"We've, uh, been working on this project where, you know, I think is interesting, just, you know, really cool.\",\n",
|
||||
" \"We've, uh, been working on this project where, you know, I think is interesting, just, you know, really cool.\",\n",
|
||||
" \"; We've, uh, been working on this project where, you know, I think is interesting, just, you know, really cool.\",\n",
|
||||
" \"; We've, uh, been working on this project where, you know, I think is interesting, just, you know, really cool.\",\n",
|
||||
" \"I heard about this thing called, uhm, big wave surfing, where these guys tackle waves of over 20 metres, just insane.\",\n",
|
||||
" \"I heard about this thing called, uhm, big wave surfing, where these guys tackle waves of over 20 metres, just insane.\",\n",
|
||||
" \"; I heard about this thing called, uhm, big wave surfing, where these guys tackle waves of over 20 metres, just insane.\",\n",
|
||||
" \"; I heard about this thing called, uhm, big wave surfing, where these guys tackle waves of over 20 metres, just insane.\",\n",
|
||||
" \"I need to go to this, uhm, mandatory work seminar that I really don't want to attend.\",\n",
|
||||
" \"I need to go to this, uhm, mandatory work seminar that I really don't want to attend.\",\n",
|
||||
" \"; I need to go to this, uhm, mandatory work seminar that I really don't want to attend.\",\n",
|
||||
" \"; I need to go to this, uhm, mandatory work seminar that I really don't want to attend.\",\n",
|
||||
" \n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "38aff9af-420f-4ab8-a9b1-7d12fc5bcb57",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"texts = [\n",
|
||||
" \"So, yesterday I went to the zoo, and, you know, it was such an incredible experience ; I mean, like, absolutely breathtaking!\",\n",
|
||||
" \"I wasn't quite sure what to expect, but, you know, it turned out to be really, really fun ; like, surprisingly enjoyable.\",\n",
|
||||
" \"Oh, I saw so many amazing animals ; lions, tigers, elephants, you name it ; it was like a wildlife paradise!\",\n",
|
||||
" \"At first, I thought it might be crowded, but, fortunately, it wasn't too bad ; I guess I got lucky.\",\n",
|
||||
" \"You know, I'm not usually a fan of large crowds, but somehow, I managed to enjoy myself ; it was quite the adventure.\",\n",
|
||||
" \"So, when I got there, the first thing I did was head straight to the monkey exhibit ; those little guys are just so fascinating, you know?\",\n",
|
||||
" \"Ah, I'm absolutely fascinated by monkeys ; they're so lively and entertaining to watch ; I could spend hours just observing them.\",\n",
|
||||
" \"I spent quite a bit of time there, just admiring their playful antics and interactions with each other.\",\n",
|
||||
" \"But eventually, I decided to tear myself away and, like, explore the rest of the zoo ; there was just so much more to see!\",\n",
|
||||
" \"Oh, I stumbled upon the giraffe enclosure next ; those majestic creatures are truly a sight to behold, don't you think?\",\n",
|
||||
" \"I took a ton of pictures of them, but, none of them really did justice to their beauty ; they're just so graceful and magnificent.\",\n",
|
||||
" \"You know, I'm not exactly the best photographer, but I tried my best to capture their elegance and grandeur.\",\n",
|
||||
" \"So, after spending some time with the giraffes, I decided to take a little break and, like, grab a snack.\",\n",
|
||||
" \"Ah, I'm a bit of a sucker for zoo food ; it may not be the healthiest, but, it's definitely delicious ; I just couldn't resist.\",\n",
|
||||
" \"I indulged in some popcorn and, a soda, and just, you know, sat down for a while, soaking in the vibrant atmosphere around me.\",\n",
|
||||
" \"But soon enough, I felt re-energized and ready to, like, embark on the next leg of my zoo adventure ; there was still so much to explore!\",\n",
|
||||
" \"Oh, I stumbled upon the reptile house next ; now that, that was an experience, let me tell you!\",\n",
|
||||
" \"I have to admit, I'm not exactly fond of snakes, but, I braved my fears and ventured in anyway ; it was quite the adrenaline rush.\",\n",
|
||||
" \"You know, I'm not entirely sure why I did that to myself, but, it definitely made for an interesting experience, to say the least.\",\n",
|
||||
" \"So, I cautiously made my way through the exhibit, trying not to, like, jump at every sudden movement or slithering motion ; it was quite the challenge.\",\n",
|
||||
" \"Ah, I'm proud of myself for confronting my fears head-on, but, you know, I don't think I'll be rushing back to the reptile house anytime soon ; once was probably enough for me.\",\n",
|
||||
" \"After that rather nerve-wracking experience, I felt the need for something a little more uplifting, so I, like, headed straight for the penguin exhibit.\",\n",
|
||||
" \"I absolutely adore penguins ; they're just so adorable and, comical, especially when they waddle around on land ; it's simply delightful to watch.\",\n",
|
||||
" \"I spent a good amount of time there, just enjoying their playful antics and quirky personalities ; they never fail to bring a smile to my face.\",\n",
|
||||
" \"But eventually, I tore myself away and, like, continued on with my zoo adventure ; there were still so many more exhibits to explore!\",\n",
|
||||
" \"Oh, I stumbled upon the big cat area next ; now that, that was an exhilarating experience, let me tell you!\",\n",
|
||||
" \"I have to admit, I was both excited and a little nervous to see the lions and tigers up close ; they're such majestic and powerful creatures.\",\n",
|
||||
" \"I kept a safe distance, of course, but, just being in their presence filled me with a sense of awe and wonder.\",\n",
|
||||
" \"But, as the day started to draw to a close, I realized that my time at the zoo was coming to an end ; it was bittersweet, to say the least.\",\n",
|
||||
" \"Oh, I hadn't realized how quickly the hours had flown by ; I guess I was just having too much fun!\",\n",
|
||||
" \"So, I reluctantly made my way towards the exit, but not before promising myself that I'd, like, come back again soon ; there was still so much more to see and explore!\",\n",
|
||||
" \"I felt a profound sense of gratitude for the incredible experience I'd had at the zoo ; it was truly unforgettable, and I couldn't wait to, like, do it all over again!\",\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "3d503650-3347-41ce-9d1b-177183a28db8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"texts = [\" I will have to, uh, make sure to charge my camera battery ; I don't want it dying on me while we're out.\",\n",
|
||||
" \" I heard there's a, a sort of farmers' market on the weekends ; so we might, uh, we might swing by that, uh, just to see, you know?\",\n",
|
||||
" \" I'm not really, uh, sure what to expect from this shopping center ; but I've heard good things from friends who've been there already.\",\n",
|
||||
" ' I will have to, uh, remind my sister to, uh, wear comfortable shoes ; last time she insisted on heels and regretted it halfway through the day.',\n",
|
||||
" \" I've been saving up for a, uh, new phone case, so I will, uh, definitely check out the, uh, tech accessory store.\",\n",
|
||||
" \" my family, we like to, uh, make a day of it ; you know, shopping together, eating together, it's, uh, it's nice.\",\n",
|
||||
" \"; , And I've got to, uh, look for some new headphones ; mine just broke and, uh, I can't go to the gym without them, you know?\",\n",
|
||||
" \"; , My dad, he wants to check out the, uh, the hardware store ; he's always looking for tools and, uh, stuff like that.\",\n",
|
||||
" 'But, as the day started to draw to a close, I realized that my time at the zoo was coming to an end ; it was bittersweet, to say the least.',\n",
|
||||
" 'So, after spending some time with the giraffes, I decided to take a little break and, like, grab a snack.',\n",
|
||||
" 'But eventually, I tore myself away and, like, continued on with my zoo adventure ; there were still so many more exhibits to explore!',\n",
|
||||
" \"Oh, I hadn't realized how quickly the hours had flown by ; I guess I was just having too much fun!\",\n",
|
||||
" \"Ah, I'm a bit of a sucker for zoo food ; it may not be the healthiest, but, it's definitely delicious ; I just couldn't resist.\",\n",
|
||||
" \" I will probably, uh, get distracted by all the, uh, sales and promotions ; I'm a sucker for a good deal.\",\n",
|
||||
" \"; , Uhm, my cousin, the one who's, uh, into photography ; he's, uh, he's going to look for a new, uh, camera lens at the, the camera store.\",\n",
|
||||
" \"; And, uh, there's this, uh, arcade area that I'm kinda, uh, interested in ; I used to love playing those games when I was younger.\",\n",
|
||||
" \"You know, I'm not usually a fan of large crowds, but somehow, I managed to enjoy myself ; it was quite the adventure.\",\n",
|
||||
" \"; , And, uh, my mom's been talking about this, uh, new recipe book she wants to get ; so she's on the lookout for it.\",\n",
|
||||
" \"Ah, I'm proud of myself for confronting my fears head-on, but, you know, I don't think I'll be rushing back to the reptile house anytime soon ; once was probably enough for me.\",\n",
|
||||
" \"; I think my dad's, uh, really excited about the, uh, electronics section ; he's been talking about upgrading his gadgets for a while now.\",\n",
|
||||
" \" there's this, uh, chocolate shop that gives out free samples ; and, uh, you know, who can say no to, uh, free chocolate?\",\n",
|
||||
" \"; , Uhm, they've got an indoor, uh, mini-golf place ; which could be a, uh, fun way to take a break from all the shopping.\",\n",
|
||||
" '; , So the plan is to, uhm, meet up at, like, the central plaza ; after everyone is done with their, uh, their own shopping and stuff.',\n",
|
||||
" \"; , And, uh, my aunt, she's big on, uhm, organic stuff ; so she's excited about the, the natural skincare shop that's, uh, that's there.\",\n",
|
||||
" 'But eventually, I decided to tear myself away and, like, explore the rest of the zoo ; there was just so much more to see!',\n",
|
||||
" \" I will probably, uh, end up carrying most of the shopping bags ; it's just easier for me since I'm used to it.\",\n",
|
||||
" \"; , And, uh, my mom's been talking about this, uh, new fitness craze she wants to try ; so she's looking for workout gear.\",\n",
|
||||
" ' I will have to, uh, make sure to bring my reusable shopping bags ; you know, to reduce waste and all that.',\n",
|
||||
" \"; , Uhm, my family, we're not, you know, super outdoorsy ; but there's an outdoor goods store, so maybe we'll, uh, we'll take a look, just in case, you know?\",\n",
|
||||
" 'Oh, I saw so many amazing animals ; lions, tigers, elephants, you name it ; it was like a wildlife paradise!',\n",
|
||||
" ' I will need to, uh, make a list of things I want to buy beforehand ; otherwise, I will end up forgetting.',\n",
|
||||
" \"; They have this, uh, loyalty program thing ; so I'm gonna sign up, you know, to get, uh, points or whatever, which is, uhm, pretty neat.\",\n",
|
||||
" \"; My dad, he's, uh, not that into shopping, right? ; But there's an electronics store, so he'll, he'll probably just, uh, hang out there while we, um, do our thing.\",\n",
|
||||
" \" I will probably, uh, take a lot of photos while we're there ; you know, just to capture the memories of our family outing.\",\n",
|
||||
" \"; So, we were thinking of going to, uh, that new shopping center, you know, the one that just opened up last month ; and I'm actually, I'm kind of excited about it.\",\n",
|
||||
" 'I spent quite a bit of time there, just admiring their playful antics and interactions with each other.',\n",
|
||||
" '; , And, uh, I might, uh, grab a snack from one of those, uh, food kiosks ; you know, just to keep my energy up while we shop.',\n",
|
||||
" \" I'm kind of excited to try the, the virtual reality experience ; I've never done that before, and, uh, it looks, uh, looks really cool.\",\n",
|
||||
" \"; My brother's, uh, he's looking forward to the, the gaming lounge ; they have, like, tournaments and stuff, which is, uhm, pretty cool, I think.\",\n",
|
||||
" \"; The kids, they have this, uhm, face painting thing ; so my niece, she's gonna love that, she's, uh, always asking for it.\",\n",
|
||||
" \" parking is, uh, it's free for the first two hours, which is, uh, nice ; but I'm sure we'll, we'll be there longer than that, so, uh, we'll see how it goes.\",\n",
|
||||
" \" I'm curious about the, the bookstore's cafe ; I've heard they make a, a mean latte, and, uh, you know, I love my coffee.\",\n",
|
||||
" 'I spent a good amount of time there, just enjoying their playful antics and quirky personalities ; they never fail to bring a smile to my face.',\n",
|
||||
" \"; , I'm not much for clothes shopping, but, uh, I do need some new jeans ; so, uh, I will be on the lookout for a good, uh, denim place.\",\n",
|
||||
" \"; , And, uh, my sister's been talking about this, uh, new fashion trend she wants to try ; so she's looking for specific clothing items.\",\n",
|
||||
" \"; I think my brother's, uh, planning to splurge on some, uh, collector's items ; there's a store that sells rare memorabilia that he's into.\",\n",
|
||||
" 'I indulged in some popcorn and, a soda, and just, you know, sat down for a while, soaking in the vibrant atmosphere around me.',\n",
|
||||
" \"; Well, I heard, uh, there's supposed to be some sort of event happening ; a, a raffle, or, uh, giveaway? I'm not totally sure but it, it sounds, uh, sounds fun.\",\n",
|
||||
" \"; , And, uh, my dad's been talking about this, uh, new barbecue grill he wants to get ; so I'm sure we'll end up at the home improvement store.\",\n",
|
||||
" \"; , And, uh, my mom mentioned something about a sale at, uh, her favorite clothing store ; so I bet we'll, we'll end up there for, like, I don't know, an hour, or something.\",\n",
|
||||
" \"; , Oh, and, uh, my sister's been talking about redecorating her room ; so she's looking for new bedding and decor.\",\n",
|
||||
" \" there's a, a pet store that, uh, lets you, you know, play with the puppies ; and, uh, my little brother, he's, he's definitely looking forward to that.\",\n",
|
||||
" \"; , Oh, and, uh, my sister's been talking about this, uh, new restaurant she wants to try ; so we might end up eating there.\",\n",
|
||||
" \"; , Oh, and, uh, my cousin, she's been talking about this, uh, new diet she's trying out ; so she's interested in checking out the health food store.\",\n",
|
||||
" \"; I'm hoping to, uh, find a nice gift for my mom's birthday ; there's a, uh, specialty shop that sells handmade items, so that might be perfect.\",\n",
|
||||
" \"; , And, uh, my little sister, she's all about, uh, the trendy fashion ; so we'll probably, you know, end up browsing those stores longer than expected.\",\n",
|
||||
" \"I felt a profound sense of gratitude for the incredible experience I'd had at the zoo ; it was truly unforgettable, and I couldn't wait to, like, do it all over again!\",\n",
|
||||
" \"Oh, I stumbled upon the giraffe enclosure next ; those majestic creatures are truly a sight to behold, don't you think?\",\n",
|
||||
" 'So, I cautiously made my way through the exhibit, trying not to, like, jump at every sudden movement or slithering motion ; it was quite the challenge.',\n",
|
||||
" \"; , And, uh, the shopping center, it's got, like, this, uh, huge LED screen ; they show sports and, uh, movies on it, which is, uhm, pretty cool.\",\n",
|
||||
" \"; , And, uh, my dad's been talking about this, uh, new recipe he wants to try out for dinner ; so he's looking for ingredients.\",\n",
|
||||
" \"; , And, uh, my aunt's been raving about this, uh, new skincare brand ; so she's eager to see if the store carries it.\",\n",
|
||||
" \"I took a ton of pictures of them, but, none of them really did justice to their beauty ; they're just so graceful and magnificent.\",\n",
|
||||
" 'After that rather nerve-wracking experience, I felt the need for something a little more uplifting, so I, like, headed straight for the penguin exhibit.',\n",
|
||||
" \"; , My grandma, she's always, uh, asking for new kitchen gadgets ; so we'll probably swing by the, uh, culinary store for her.\",\n",
|
||||
" \"; Uh, my sister, yeah, she's already, like, made a list of all the stores she wants to hit ; and she's, um, she's determined to find those, those boots she saw online, you know?\",\n",
|
||||
" \" I'm just hoping I can, you know, find some good deals ; maybe on, uh, clothes or, uhm, shoes? I, I could use some new sneakers, honestly.\",\n",
|
||||
" \"; , Uhm, I need to, uh, remember to, uh, pick up a gift for my friend's, uh, birthday ; so I will be, you know, on the lookout for something special.\",\n",
|
||||
" \" I'm kinda hoping we can find a good, uh, spot to sit and relax ; you know, when, uh, when our feet start hurting from all the walking around.\",\n",
|
||||
" '; I will have to, uh, remember to charge my phone before we go ; last time it died on me while we were shopping, which was, uh, inconvenient.',\n",
|
||||
" \"; , Oh, and, uh, I heard there's a, a rooftop garden with a nice view ; so we might take a break up there and enjoy the scenery.\",\n",
|
||||
" \"; , Oh, and, uh, my dad's been wanting to upgrade our, uh, home entertainment system ; so he's looking for a new TV.\",\n",
|
||||
" \"; And, uh, there's supposed to be this, this art exhibit ; in the, uhm, the central atrium, so that'll be, uh, that'll be something to see.\",\n",
|
||||
" \" I will probably, you know, look for some new books ; because they've got that, that big bookstore, and, uh, I could spend, like, hours in there, honestly.\",\n",
|
||||
" \"; , Oh, and, uh, my sister wants to try out this, uh, new makeup trend she saw online ; so she's looking for specific products.\",\n",
|
||||
" \"I have to admit, I was both excited and a little nervous to see the lions and tigers up close ; they're such majestic and powerful creatures.\",\n",
|
||||
" \"Ah, I'm absolutely fascinated by monkeys ; they're so lively and entertaining to watch ; I could spend hours just observing them.\",\n",
|
||||
" \"; , And, uh, I've heard there's a, a live music performance happening later in the evening ; so we might stick around for that if we're not too tired.\",\n",
|
||||
" \"; , Uh, last time we went shopping as a family, it was, uhm, quite the, the adventure ; so, uh, I'm expecting, you know, more of the same today.\",\n",
|
||||
" \" I will probably, uh, forget where we parked the car ; I'm terrible with remembering those things.\",\n",
|
||||
" \"At first, I thought it might be crowded, but, fortunately, it wasn't too bad ; I guess I got lucky.\",\n",
|
||||
" \" I'm thinking of, uh, treating myself to a, a massage at the, uh, spa center ; they have some pretty good deals going on right now.\",\n",
|
||||
" \"; , So, uh, yeah, after shopping we're, um, planning to grab dinner ; maybe try out that new, that new Italian place? I've heard, uhm, good things about it.\",\n",
|
||||
" 'So, yesterday I went to the zoo, and, you know, it was such an incredible experience ; I mean, like, absolutely breathtaking!',\n",
|
||||
" 'So, when I got there, the first thing I did was head straight to the monkey exhibit ; those little guys are just so fascinating, you know?',\n",
|
||||
" \"You know, I'm not exactly the best photographer, but I tried my best to capture their elegance and grandeur.\",\n",
|
||||
" \" I will need to, uh, check the store directory when we get there ; I'm not familiar with the layout of the shopping center.\",\n",
|
||||
" \"; We're, uh, probably going to start with the, the food court because, uhm, my brother, he gets, uh, he gets real cranky if he doesn't eat ; and there's supposed to be a, a new sushi place.\",\n",
|
||||
" \"; My family, they tend to, uh, split up and, uh, meet back at a certain time ; so we're, uh, probably gonna do that, you know?\",\n",
|
||||
" \"; And, uh, there's a, a bookstore signing event ; some author my brother likes, so, uh, we'll probably, you know, check that out.\",\n",
|
||||
" \"; , Uhm, the weather's supposed to be good, so we'll, uh, probably take a break outside ; they've got benches and, uh, fountains and stuff.\",\n",
|
||||
" \"I wasn't quite sure what to expect, but, you know, it turned out to be really, really fun ; like, surprisingly enjoyable.\",\n",
|
||||
" \" I'm not sure if I mentioned this, but, uh, my aunt, she's got this, uh, thing about, uh, finding unique home decor pieces ; so we might, you know, spend a bit of time there.\",\n",
|
||||
" ' the plan is to, uh, get there early, because, you know, parking can be crazy, and, uh, my dad, he, uh, he hates looking for parking spots for too long.',\n",
|
||||
" \" I've been meaning to, uh, buy some new workout gear ; so I will definitely be browsing the, uh, sports apparel stores.\",\n",
|
||||
" \" I think my brother's, uh, interested in checking out the, uh, car accessories store ; he's been talking about upgrading his car stereo.\",\n",
|
||||
" 'Oh, I stumbled upon the reptile house next ; now that, that was an experience, let me tell you!',\n",
|
||||
" \"; Oh, and, uh, my mom, she wants to check out the new, what's it, the home goods store ; because she's, uh, redecorating the living room, or something like that.\",\n",
|
||||
" \" my mom's been talking about this, this scarf she saw online ; and, uh, if we find the store, she's, uh, she's definitely buying it.\",\n",
|
||||
" \"You know, I'm not entirely sure why I did that to myself, but, it definitely made for an interesting experience, to say the least.\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a9da9e2d-99b9-4c6f-8a08-c828e2cba121",
|
||||
@@ -289,7 +483,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"execution_count": 18,
|
||||
"id": "f0d216e5-4895-4da8-9d24-9e61021d2556",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -304,6 +498,54 @@
|
||||
"temperature = 0.667"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "443d515d-4a1d-4943-845e-9d60c9bb012a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from math import ceil\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def save_phoneme_durations(output):\n",
|
||||
" durations = output['attn'].squeeze().sum(1)\n",
|
||||
" phones = output['x_phones'][1::2]\n",
|
||||
" prev = durations[0]\n",
|
||||
" merged_durations = []\n",
|
||||
" # Convolve with stride 2\n",
|
||||
" for i in range(1, len(durations), 2):\n",
|
||||
" if i == len(durations) - 2:\n",
|
||||
" # if it is last take full value\n",
|
||||
" next_half = durations[i+1]\n",
|
||||
" else:\n",
|
||||
" next_half = ceil(durations[i+1] /2)\n",
|
||||
"\n",
|
||||
" curr = prev + durations[i] + next_half\n",
|
||||
" prev = durations[i+1] - next_half\n",
|
||||
" # print(durations[i-1:i+2], curr, next_half, prev)\n",
|
||||
" merged_durations.append(curr)\n",
|
||||
"\n",
|
||||
" assert len(phones) == len(merged_durations)\n",
|
||||
" assert len(merged_durations) == (len(durations) - 1) // 2\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" merged_durations = torch.cumsum(torch.tensor(merged_durations), 0, dtype=torch.long)\n",
|
||||
" start = torch.tensor(0)\n",
|
||||
" duration_json = []\n",
|
||||
" for i, duration in enumerate(merged_durations):\n",
|
||||
" duration_json.append({\n",
|
||||
" phones[i]: {\n",
|
||||
" \"starttime\": start.item(),\n",
|
||||
" \"endtime\": duration.item(),\n",
|
||||
" \"duration\": duration.item() - start.item()\n",
|
||||
" }\n",
|
||||
" })\n",
|
||||
" start = duration\n",
|
||||
" assert list(duration_json[-1].values())[0]['endtime'] == output['decoder_outputs'].shape[-1], f\"{list(duration_json[-1].values())[0]['endtime'], output['decoder_outputs'].shape[-1]}\"\n",
|
||||
" return duration_json"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b93aac89-c7f8-4975-8510-4e763c9689f4",
|
||||
@@ -314,9 +556,82 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"execution_count": null,
|
||||
"id": "5a227963-aa12-43b9-a706-1168b6fc0ba5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"OUTPUT_FOLDER_TEMP = \"joe_fm_output_{}\"\n",
|
||||
"for name in range(6):\n",
|
||||
" OUTPUT_FOLDER = OUTPUT_FOLDER_TEMP.format(name)\n",
|
||||
" print(OUTPUT_FOLDER)\n",
|
||||
" outputs, rtfs = [], []\n",
|
||||
" rtfs_w = []\n",
|
||||
" for i, text in enumerate(tqdm(texts)):\n",
|
||||
" seed_everything(random.randint(1, 10000))\n",
|
||||
" output = synthesise(text) #, torch.tensor([15], device=device, dtype=torch.long).unsqueeze(0))\n",
|
||||
" output['waveform'] = to_waveform(output['mel'], vocoder)\n",
|
||||
" output['duration_json'] = save_phoneme_durations(output)\n",
|
||||
" # Compute Real Time Factor (RTF) with HiFi-GAN\n",
|
||||
" t = (dt.datetime.now() - output['start_t']).total_seconds()\n",
|
||||
" rtf_w = t * 22050 / (output['waveform'].shape[-1])\n",
|
||||
" \n",
|
||||
" ## Pretty print\n",
|
||||
" print(f\"{'*' * 53}\")\n",
|
||||
" print(f\"Input text - {i}\")\n",
|
||||
" print(f\"{'-' * 53}\")\n",
|
||||
" print(output['x_orig'])\n",
|
||||
" print(f\"{'*' * 53}\")\n",
|
||||
" print(f\"Phonetised text - {i}\")\n",
|
||||
" print(f\"{'-' * 53}\")\n",
|
||||
" print(output['x_phones'])\n",
|
||||
" print(f\"{'*' * 53}\")\n",
|
||||
" print(f\"RTF:\\t\\t{output['rtf']:.6f}\")\n",
|
||||
" print(f\"RTF Waveform:\\t{rtf_w:.6f}\")\n",
|
||||
" rtfs.append(output['rtf'])\n",
|
||||
" rtfs_w.append(rtf_w)\n",
|
||||
" \n",
|
||||
" ## Display the synthesised waveform\n",
|
||||
" # plot_spectrogram(output['mel'])\n",
|
||||
" # ipd.display(ipd.Audio(output['waveform'], rate=22050))\n",
|
||||
" \n",
|
||||
" ## Save the generated waveform\n",
|
||||
" save_to_folder(i, output, OUTPUT_FOLDER)\n",
|
||||
" \n",
|
||||
" print(f\"Number of ODE steps: {n_timesteps}\")\n",
|
||||
" print(f\"Mean RTF:\\t\\t\\t\\t{np.mean(rtfs):.6f} ± {np.std(rtfs):.6f}\")\n",
|
||||
" print(f\"Mean RTF Waveform (incl. vocoder):\\t{np.mean(rtfs_w):.6f} ± {np.std(rtfs_w):.6f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "107ff003-8a32-4a11-9294-31622b07fc3e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cd1f24e3-0fc8-487f-a478-46cda7ebaec9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d95afbb4-d0ae-4562-bbdf-e27fd99200cd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "840cf38d-978b-4114-b5bb-8963a8a97517",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
|
||||
Reference in New Issue
Block a user