diff --git a/matcha/cli.py b/matcha/cli.py index 579d7d6..0b05583 100644 --- a/matcha/cli.py +++ b/matcha/cli.py @@ -227,7 +227,7 @@ def cli(): parser.add_argument( "--vocoder", type=str, - default=None, + default="hifigan_univ_v1", help="Vocoder to use (default: will use the one suggested with the pretrained model))", choices=VOCODER_URLS.keys(), ) diff --git a/matcha/data/text_mel_datamodule.py b/matcha/data/text_mel_datamodule.py index 704f936..3141293 100644 --- a/matcha/data/text_mel_datamodule.py +++ b/matcha/data/text_mel_datamodule.py @@ -109,7 +109,7 @@ class TextMelDataModule(LightningDataModule): """Clean up after fit or test.""" pass # pylint: disable=unnecessary-pass - def state_dict(self): # pylint: disable=no-self-use + def state_dict(self): """Extra things to save to checkpoint.""" return {} @@ -167,7 +167,7 @@ class TextMelDataset(torch.utils.data.Dataset): text = self.get_text(text, add_blank=self.add_blank) mel = self.get_mel(filepath) - return {"x": text, "y": mel, "spk": spk} + return {"x": text, "y": mel, "spk": spk, "filepath": filepath} def get_mel(self, filepath): audio, sr = ta.load(filepath) @@ -207,15 +207,16 @@ class TextMelBatchCollate: def __call__(self, batch): B = len(batch) - y_max_length = max([item["y"].shape[-1] for item in batch]) + y_max_length = max([item["y"].shape[-1] for item in batch]) # pylint: disable=consider-using-generator y_max_length = fix_len_compatibility(y_max_length) - x_max_length = max([item["x"].shape[-1] for item in batch]) + x_max_length = max([item["x"].shape[-1] for item in batch]) # pylint: disable=consider-using-generator n_feats = batch[0]["y"].shape[-2] y = torch.zeros((B, n_feats, y_max_length), dtype=torch.float32) x = torch.zeros((B, x_max_length), dtype=torch.long) y_lengths, x_lengths = [], [] spks = [] + filepaths = [] for i, item in enumerate(batch): y_, x_ = item["y"], item["x"] y_lengths.append(y_.shape[-1]) @@ -223,9 +224,10 @@ class TextMelBatchCollate: y[i, :, : y_.shape[-1]] = y_ x[i, : x_.shape[-1]] = x_ spks.append(item["spk"]) + filepaths.append(item["filepath"]) y_lengths = torch.tensor(y_lengths, dtype=torch.long) x_lengths = torch.tensor(x_lengths, dtype=torch.long) spks = torch.tensor(spks, dtype=torch.long) if self.n_spks > 1 else None - return {"x": x, "x_lengths": x_lengths, "y": y, "y_lengths": y_lengths, "spks": spks} + return {"x": x, "x_lengths": x_lengths, "y": y, "y_lengths": y_lengths, "spks": spks, "filepaths": filepaths} diff --git a/matcha/models/baselightningmodule.py b/matcha/models/baselightningmodule.py index 3724888..5fd09a4 100644 --- a/matcha/models/baselightningmodule.py +++ b/matcha/models/baselightningmodule.py @@ -58,7 +58,7 @@ class BaseLightningClass(LightningModule, ABC): y, y_lengths = batch["y"], batch["y_lengths"] spks = batch["spks"] - dur_loss, prior_loss, diff_loss = self( + dur_loss, prior_loss, diff_loss, *_ = self( x=x, x_lengths=x_lengths, y=y, diff --git a/matcha/models/matcha_tts.py b/matcha/models/matcha_tts.py index 8ff6388..ae951a2 100644 --- a/matcha/models/matcha_tts.py +++ b/matcha/models/matcha_tts.py @@ -4,7 +4,7 @@ import random import torch -import matcha.utils.monotonic_align as monotonic_align +import matcha.utils.monotonic_align as monotonic_align # pylint: disable=consider-using-from-import from matcha import utils from matcha.models.baselightningmodule import BaseLightningClass from matcha.models.components.duration_predictors import DP @@ -241,4 +241,4 @@ class MatchaTTS(BaseLightningClass): # 🍵 else: prior_loss = 0 - return dur_loss, prior_loss, diff_loss + return dur_loss, prior_loss, diff_loss, attn diff --git a/matcha/utils/get_durations_from_trained_model.py b/matcha/utils/get_durations_from_trained_model.py new file mode 100644 index 0000000..90ea1eb --- /dev/null +++ b/matcha/utils/get_durations_from_trained_model.py @@ -0,0 +1,174 @@ +r""" +The file creates a pickle file where the values needed for loading of dataset is stored and the model can load it +when needed. + +Parameters from hparam.py will be used +""" +import argparse +import os +import sys +from pathlib import Path + +import lightning +import numpy as np +import rootutils +import torch +from hydra import compose, initialize +from omegaconf import open_dict +from torch import nn +from tqdm.auto import tqdm + +from matcha.cli import get_device +from matcha.data.text_mel_datamodule import TextMelDataModule +from matcha.models.matcha_tts import MatchaTTS +from matcha.utils.logging_utils import pylogger + +log = pylogger.get_pylogger(__name__) + + +def save_durations_to_folder(attn: torch.Tensor, x_length: int, y_length: int, filepath: str, output_folder: Path): + durations = attn.squeeze().sum(1)[:x_length].numpy() + output = output_folder / Path(filepath).name.replace(".wav", ".npy") + np.save(output, durations) + + +@torch.inference_mode() +def compute_durations(data_loader: torch.utils.data.DataLoader, model: nn.Module, device: torch.device, output_folder): + """Generate durations from the model for each datapoint and save it in a folder + + Args: + data_loader (torch.utils.data.DataLoader): Dataloader + model (nn.Module): MatchaTTS model + device (torch.device): GPU or CPU + """ + + for batch in tqdm(data_loader, leave=False): + x, x_lengths = batch["x"], batch["x_lengths"] + y, y_lengths = batch["y"], batch["y_lengths"] + spks = batch["spks"] + x = x.to(device) + y = y.to(device) + x_lengths = x_lengths.to(device) + y_lengths = y_lengths.to(device) + spks = spks.to(device) if spks is not None else None + + _, _, _, attn = model( + x=x, + x_lengths=x_lengths, + y=y, + y_lengths=y_lengths, + spks=spks, + ) + attn = attn.cpu() + for i in range(attn.shape[0]): + save_durations_to_folder( + attn[i], x_lengths[i].item(), y_lengths[i].item(), batch["filepaths"][i], output_folder + ) + + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument( + "-i", + "--input-config", + type=str, + default="vctk.yaml", + help="The name of the yaml config file under configs/data", + ) + + parser.add_argument( + "-b", + "--batch-size", + type=int, + default="32", + help="Can have increased batch size for faster computation", + ) + + parser.add_argument( + "-f", + "--force", + action="store_true", + default=False, + required=False, + help="force overwrite the file", + ) + parser.add_argument( + "-c", + "--checkpoint_path", + type=str, + required=True, + help="Path to the checkpoint file to load the model from", + ) + + parser.add_argument( + "-o", + "--output-folder", + type=str, + default=None, + help="Output folder to save the data statistics", + ) + + parser.add_argument( + "--cpu", action="store_true", help="Use CPU for inference, not recommended (default: use GPU if available)" + ) + + args = parser.parse_args() + + with initialize(version_base="1.3", config_path="../../configs/data"): + cfg = compose(config_name=args.input_config, return_hydra_config=True, overrides=[]) + + root_path = rootutils.find_root(search_from=__file__, indicator=".project-root") + + with open_dict(cfg): + del cfg["hydra"] + del cfg["_target_"] + cfg["seed"] = 1234 + cfg["batch_size"] = args.batch_size + cfg["train_filelist_path"] = str(os.path.join(root_path, cfg["train_filelist_path"])) + cfg["valid_filelist_path"] = str(os.path.join(root_path, cfg["valid_filelist_path"])) + + if args.output_folder is not None: + output_folder = Path(args.output_folder) + else: + output_folder = Path("data") / "processed_data" / "durations" / cfg["name"] + + if os.path.exists(output_folder) and not args.force: + print("Folder already exists. Use -f to force overwrite") + sys.exit(1) + + output_folder.mkdir(parents=True, exist_ok=True) + + print(f"Preprocessing: {cfg['name']} from training filelist: {cfg['train_filelist_path']}") + print("Loading model...") + device = get_device(args) + model = MatchaTTS.load_from_checkpoint(args.checkpoint_path, map_location=device) + + text_mel_datamodule = TextMelDataModule(**cfg) + text_mel_datamodule.setup() + try: + print("Computing stats for training set if exists...") + train_dataloader = text_mel_datamodule.train_dataloader() + compute_durations(train_dataloader, model, device, output_folder) + except lightning.fabric.utilities.exceptions.MisconfigurationException: + print("No training set found") + + try: + print("Computing stats for validation set if exists...") + val_dataloader = text_mel_datamodule.val_dataloader() + compute_durations(val_dataloader, model, device, output_folder) + except lightning.fabric.utilities.exceptions.MisconfigurationException: + print("No validation set found") + + try: + print("Computing stats for test set if exists...") + test_dataloader = text_mel_datamodule.test_dataloader() + compute_durations(test_dataloader, model, device, output_folder) + except lightning.fabric.utilities.exceptions.MisconfigurationException: + print("No test set found") + + print(f"[+] Done! Data statistics saved to: {output_folder}") + + +if __name__ == "__main__": + main() diff --git a/synthesis.ipynb b/synthesis.ipynb index c33c0e7..f2f6c31 100644 --- a/synthesis.ipynb +++ b/synthesis.ipynb @@ -37,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 7, "id": "8d5876c0-b47e-4c80-9e9c-62550f81b64e", "metadata": {}, "outputs": [], @@ -69,10 +69,19 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 8, "id": "b1a30306-588c-4f22-8d9b-e2676880b0e5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", @@ -82,7 +91,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 9, "id": "a312856b-01a9-4d75-a4c8-4666dffa0692", "metadata": {}, "outputs": [], @@ -100,16 +109,16 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 10, "id": "7640a4c1-44ce-447c-a8ff-45012fb7bddd", "metadata": {}, "outputs": [], "source": [ - "# MATCHA_CHECKPOINT = \"logs/train/hi-fi_en-US_female_piper_phonemizer/runs/2023-12-01_12-14-06/checkpoints/last.ckpt\"\n", - "# MATCHA_CHECKPOINT = get_user_data_dir()/\"matcha_ljspeech.ckpt\n", - "MATCHA_CHECKPOINT = \"logs/train/tsg2_stoc_dur/runs/2024-01-05_12-35-07/checkpoints/last.ckpt\"\n", - "HIFIGAN_CHECKPOINT = get_user_data_dir() / \"hifigan_T2_v1\"\n", - "OUTPUT_FOLDER = \"synth_output\"" + "# MATCHA_CHECKPOINT = \"logs/train/tsg2_stoc_dur/runs/2024-01-05_12-35-07/checkpoints/last.ckpt\"\n", + "# MATCHA_CHECKPOINT = \"logs/train/lj_stoc/runs/2024-01-12_12-03-19/checkpoints/checkpoint_epoch=3299.ckpt\"\n", + "MATCHA_CHECKPOINT = \"logs/train/joe_stoc_dur/runs/2024-02-20_14-01-53/checkpoints/last.ckpt\"\n", + "HIFIGAN_CHECKPOINT = get_user_data_dir() / \"hifigan_univ_v1\"\n", + "OUTPUT_FOLDER = \"cormac_fm_output2\"" ] }, { @@ -122,7 +131,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "id": "26a16230-04ba-4825-a844-2fb5ab945e24", "metadata": {}, "outputs": [ @@ -156,7 +165,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 12, "id": "f6b68184-968d-4868-9029-f0c40e9e68af", "metadata": {}, "outputs": [ @@ -191,11 +200,13 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 13, "id": "880a1879-24fd-4757-849c-850339120796", "metadata": {}, "outputs": [], "source": [ + "import json\n", + "\n", "@torch.inference_mode()\n", "def process_text(text: str):\n", " x = torch.tensor(intersperse(text_to_sequence(text, ['english_cleaners2']), 0),dtype=torch.long, device=device)[None]\n", @@ -236,6 +247,9 @@ " folder.mkdir(exist_ok=True, parents=True)\n", " np.save(folder / f'{filename}', output['mel'].cpu().numpy())\n", " sf.write(folder / f'{filename}.wav', output['waveform'], 22050, 'PCM_24')\n", + " if \"duration_json\" in output:\n", + " with open(folder / f'{filename}.json', 'w', encoding='utf-8') as f:\n", + " json.dump(output['duration_json'], f, indent=4, ensure_ascii=False)\n", "\n", "def plot_spectrogram(spectrogram):\n", " fig, ax = plt.subplots(figsize=(12, 3))\n", @@ -257,7 +271,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 14, "id": "2e0a9acd-0845-4192-ba09-b9683e28a3ac", "metadata": {}, "outputs": [], @@ -279,6 +293,186 @@ "]" ] }, + { + "cell_type": "code", + "execution_count": 15, + "id": "f58e82fa-0ae0-43f2-a021-97efc6041793", + "metadata": {}, + "outputs": [], + "source": [ + "texts = [\n", + " \"If, you know, it works as well as we would expect, it would change our, like, understanding of the world.\",\n", + " \"If, you know, it works as well as we would expect, it would change our, like, understanding of the world.\",\n", + " \"; If, you know, it works as well as we would expect, it would change our, like, understanding of the world.\",\n", + " \"; If, you know, it works as well as we would expect, it would change our, like, understanding of the world.\",\n", + " \"We've, uh, been working on this project where, you know, I think is interesting, just, you know, really cool.\",\n", + " \"We've, uh, been working on this project where, you know, I think is interesting, just, you know, really cool.\",\n", + " \"; We've, uh, been working on this project where, you know, I think is interesting, just, you know, really cool.\",\n", + " \"; We've, uh, been working on this project where, you know, I think is interesting, just, you know, really cool.\",\n", + " \"I heard about this thing called, uhm, big wave surfing, where these guys tackle waves of over 20 metres, just insane.\",\n", + " \"I heard about this thing called, uhm, big wave surfing, where these guys tackle waves of over 20 metres, just insane.\",\n", + " \"; I heard about this thing called, uhm, big wave surfing, where these guys tackle waves of over 20 metres, just insane.\",\n", + " \"; I heard about this thing called, uhm, big wave surfing, where these guys tackle waves of over 20 metres, just insane.\",\n", + " \"I need to go to this, uhm, mandatory work seminar that I really don't want to attend.\",\n", + " \"I need to go to this, uhm, mandatory work seminar that I really don't want to attend.\",\n", + " \"; I need to go to this, uhm, mandatory work seminar that I really don't want to attend.\",\n", + " \"; I need to go to this, uhm, mandatory work seminar that I really don't want to attend.\",\n", + " \n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "38aff9af-420f-4ab8-a9b1-7d12fc5bcb57", + "metadata": {}, + "outputs": [], + "source": [ + "texts = [\n", + " \"So, yesterday I went to the zoo, and, you know, it was such an incredible experience ; I mean, like, absolutely breathtaking!\",\n", + " \"I wasn't quite sure what to expect, but, you know, it turned out to be really, really fun ; like, surprisingly enjoyable.\",\n", + " \"Oh, I saw so many amazing animals ; lions, tigers, elephants, you name it ; it was like a wildlife paradise!\",\n", + " \"At first, I thought it might be crowded, but, fortunately, it wasn't too bad ; I guess I got lucky.\",\n", + " \"You know, I'm not usually a fan of large crowds, but somehow, I managed to enjoy myself ; it was quite the adventure.\",\n", + " \"So, when I got there, the first thing I did was head straight to the monkey exhibit ; those little guys are just so fascinating, you know?\",\n", + " \"Ah, I'm absolutely fascinated by monkeys ; they're so lively and entertaining to watch ; I could spend hours just observing them.\",\n", + " \"I spent quite a bit of time there, just admiring their playful antics and interactions with each other.\",\n", + " \"But eventually, I decided to tear myself away and, like, explore the rest of the zoo ; there was just so much more to see!\",\n", + " \"Oh, I stumbled upon the giraffe enclosure next ; those majestic creatures are truly a sight to behold, don't you think?\",\n", + " \"I took a ton of pictures of them, but, none of them really did justice to their beauty ; they're just so graceful and magnificent.\",\n", + " \"You know, I'm not exactly the best photographer, but I tried my best to capture their elegance and grandeur.\",\n", + " \"So, after spending some time with the giraffes, I decided to take a little break and, like, grab a snack.\",\n", + " \"Ah, I'm a bit of a sucker for zoo food ; it may not be the healthiest, but, it's definitely delicious ; I just couldn't resist.\",\n", + " \"I indulged in some popcorn and, a soda, and just, you know, sat down for a while, soaking in the vibrant atmosphere around me.\",\n", + " \"But soon enough, I felt re-energized and ready to, like, embark on the next leg of my zoo adventure ; there was still so much to explore!\",\n", + " \"Oh, I stumbled upon the reptile house next ; now that, that was an experience, let me tell you!\",\n", + " \"I have to admit, I'm not exactly fond of snakes, but, I braved my fears and ventured in anyway ; it was quite the adrenaline rush.\",\n", + " \"You know, I'm not entirely sure why I did that to myself, but, it definitely made for an interesting experience, to say the least.\",\n", + " \"So, I cautiously made my way through the exhibit, trying not to, like, jump at every sudden movement or slithering motion ; it was quite the challenge.\",\n", + " \"Ah, I'm proud of myself for confronting my fears head-on, but, you know, I don't think I'll be rushing back to the reptile house anytime soon ; once was probably enough for me.\",\n", + " \"After that rather nerve-wracking experience, I felt the need for something a little more uplifting, so I, like, headed straight for the penguin exhibit.\",\n", + " \"I absolutely adore penguins ; they're just so adorable and, comical, especially when they waddle around on land ; it's simply delightful to watch.\",\n", + " \"I spent a good amount of time there, just enjoying their playful antics and quirky personalities ; they never fail to bring a smile to my face.\",\n", + " \"But eventually, I tore myself away and, like, continued on with my zoo adventure ; there were still so many more exhibits to explore!\",\n", + " \"Oh, I stumbled upon the big cat area next ; now that, that was an exhilarating experience, let me tell you!\",\n", + " \"I have to admit, I was both excited and a little nervous to see the lions and tigers up close ; they're such majestic and powerful creatures.\",\n", + " \"I kept a safe distance, of course, but, just being in their presence filled me with a sense of awe and wonder.\",\n", + " \"But, as the day started to draw to a close, I realized that my time at the zoo was coming to an end ; it was bittersweet, to say the least.\",\n", + " \"Oh, I hadn't realized how quickly the hours had flown by ; I guess I was just having too much fun!\",\n", + " \"So, I reluctantly made my way towards the exit, but not before promising myself that I'd, like, come back again soon ; there was still so much more to see and explore!\",\n", + " \"I felt a profound sense of gratitude for the incredible experience I'd had at the zoo ; it was truly unforgettable, and I couldn't wait to, like, do it all over again!\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "3d503650-3347-41ce-9d1b-177183a28db8", + "metadata": {}, + "outputs": [], + "source": [ + "texts = [\" I will have to, uh, make sure to charge my camera battery ; I don't want it dying on me while we're out.\",\n", + " \" I heard there's a, a sort of farmers' market on the weekends ; so we might, uh, we might swing by that, uh, just to see, you know?\",\n", + " \" I'm not really, uh, sure what to expect from this shopping center ; but I've heard good things from friends who've been there already.\",\n", + " ' I will have to, uh, remind my sister to, uh, wear comfortable shoes ; last time she insisted on heels and regretted it halfway through the day.',\n", + " \" I've been saving up for a, uh, new phone case, so I will, uh, definitely check out the, uh, tech accessory store.\",\n", + " \" my family, we like to, uh, make a day of it ; you know, shopping together, eating together, it's, uh, it's nice.\",\n", + " \"; , And I've got to, uh, look for some new headphones ; mine just broke and, uh, I can't go to the gym without them, you know?\",\n", + " \"; , My dad, he wants to check out the, uh, the hardware store ; he's always looking for tools and, uh, stuff like that.\",\n", + " 'But, as the day started to draw to a close, I realized that my time at the zoo was coming to an end ; it was bittersweet, to say the least.',\n", + " 'So, after spending some time with the giraffes, I decided to take a little break and, like, grab a snack.',\n", + " 'But eventually, I tore myself away and, like, continued on with my zoo adventure ; there were still so many more exhibits to explore!',\n", + " \"Oh, I hadn't realized how quickly the hours had flown by ; I guess I was just having too much fun!\",\n", + " \"Ah, I'm a bit of a sucker for zoo food ; it may not be the healthiest, but, it's definitely delicious ; I just couldn't resist.\",\n", + " \" I will probably, uh, get distracted by all the, uh, sales and promotions ; I'm a sucker for a good deal.\",\n", + " \"; , Uhm, my cousin, the one who's, uh, into photography ; he's, uh, he's going to look for a new, uh, camera lens at the, the camera store.\",\n", + " \"; And, uh, there's this, uh, arcade area that I'm kinda, uh, interested in ; I used to love playing those games when I was younger.\",\n", + " \"You know, I'm not usually a fan of large crowds, but somehow, I managed to enjoy myself ; it was quite the adventure.\",\n", + " \"; , And, uh, my mom's been talking about this, uh, new recipe book she wants to get ; so she's on the lookout for it.\",\n", + " \"Ah, I'm proud of myself for confronting my fears head-on, but, you know, I don't think I'll be rushing back to the reptile house anytime soon ; once was probably enough for me.\",\n", + " \"; I think my dad's, uh, really excited about the, uh, electronics section ; he's been talking about upgrading his gadgets for a while now.\",\n", + " \" there's this, uh, chocolate shop that gives out free samples ; and, uh, you know, who can say no to, uh, free chocolate?\",\n", + " \"; , Uhm, they've got an indoor, uh, mini-golf place ; which could be a, uh, fun way to take a break from all the shopping.\",\n", + " '; , So the plan is to, uhm, meet up at, like, the central plaza ; after everyone is done with their, uh, their own shopping and stuff.',\n", + " \"; , And, uh, my aunt, she's big on, uhm, organic stuff ; so she's excited about the, the natural skincare shop that's, uh, that's there.\",\n", + " 'But eventually, I decided to tear myself away and, like, explore the rest of the zoo ; there was just so much more to see!',\n", + " \" I will probably, uh, end up carrying most of the shopping bags ; it's just easier for me since I'm used to it.\",\n", + " \"; , And, uh, my mom's been talking about this, uh, new fitness craze she wants to try ; so she's looking for workout gear.\",\n", + " ' I will have to, uh, make sure to bring my reusable shopping bags ; you know, to reduce waste and all that.',\n", + " \"; , Uhm, my family, we're not, you know, super outdoorsy ; but there's an outdoor goods store, so maybe we'll, uh, we'll take a look, just in case, you know?\",\n", + " 'Oh, I saw so many amazing animals ; lions, tigers, elephants, you name it ; it was like a wildlife paradise!',\n", + " ' I will need to, uh, make a list of things I want to buy beforehand ; otherwise, I will end up forgetting.',\n", + " \"; They have this, uh, loyalty program thing ; so I'm gonna sign up, you know, to get, uh, points or whatever, which is, uhm, pretty neat.\",\n", + " \"; My dad, he's, uh, not that into shopping, right? ; But there's an electronics store, so he'll, he'll probably just, uh, hang out there while we, um, do our thing.\",\n", + " \" I will probably, uh, take a lot of photos while we're there ; you know, just to capture the memories of our family outing.\",\n", + " \"; So, we were thinking of going to, uh, that new shopping center, you know, the one that just opened up last month ; and I'm actually, I'm kind of excited about it.\",\n", + " 'I spent quite a bit of time there, just admiring their playful antics and interactions with each other.',\n", + " '; , And, uh, I might, uh, grab a snack from one of those, uh, food kiosks ; you know, just to keep my energy up while we shop.',\n", + " \" I'm kind of excited to try the, the virtual reality experience ; I've never done that before, and, uh, it looks, uh, looks really cool.\",\n", + " \"; My brother's, uh, he's looking forward to the, the gaming lounge ; they have, like, tournaments and stuff, which is, uhm, pretty cool, I think.\",\n", + " \"; The kids, they have this, uhm, face painting thing ; so my niece, she's gonna love that, she's, uh, always asking for it.\",\n", + " \" parking is, uh, it's free for the first two hours, which is, uh, nice ; but I'm sure we'll, we'll be there longer than that, so, uh, we'll see how it goes.\",\n", + " \" I'm curious about the, the bookstore's cafe ; I've heard they make a, a mean latte, and, uh, you know, I love my coffee.\",\n", + " 'I spent a good amount of time there, just enjoying their playful antics and quirky personalities ; they never fail to bring a smile to my face.',\n", + " \"; , I'm not much for clothes shopping, but, uh, I do need some new jeans ; so, uh, I will be on the lookout for a good, uh, denim place.\",\n", + " \"; , And, uh, my sister's been talking about this, uh, new fashion trend she wants to try ; so she's looking for specific clothing items.\",\n", + " \"; I think my brother's, uh, planning to splurge on some, uh, collector's items ; there's a store that sells rare memorabilia that he's into.\",\n", + " 'I indulged in some popcorn and, a soda, and just, you know, sat down for a while, soaking in the vibrant atmosphere around me.',\n", + " \"; Well, I heard, uh, there's supposed to be some sort of event happening ; a, a raffle, or, uh, giveaway? I'm not totally sure but it, it sounds, uh, sounds fun.\",\n", + " \"; , And, uh, my dad's been talking about this, uh, new barbecue grill he wants to get ; so I'm sure we'll end up at the home improvement store.\",\n", + " \"; , And, uh, my mom mentioned something about a sale at, uh, her favorite clothing store ; so I bet we'll, we'll end up there for, like, I don't know, an hour, or something.\",\n", + " \"; , Oh, and, uh, my sister's been talking about redecorating her room ; so she's looking for new bedding and decor.\",\n", + " \" there's a, a pet store that, uh, lets you, you know, play with the puppies ; and, uh, my little brother, he's, he's definitely looking forward to that.\",\n", + " \"; , Oh, and, uh, my sister's been talking about this, uh, new restaurant she wants to try ; so we might end up eating there.\",\n", + " \"; , Oh, and, uh, my cousin, she's been talking about this, uh, new diet she's trying out ; so she's interested in checking out the health food store.\",\n", + " \"; I'm hoping to, uh, find a nice gift for my mom's birthday ; there's a, uh, specialty shop that sells handmade items, so that might be perfect.\",\n", + " \"; , And, uh, my little sister, she's all about, uh, the trendy fashion ; so we'll probably, you know, end up browsing those stores longer than expected.\",\n", + " \"I felt a profound sense of gratitude for the incredible experience I'd had at the zoo ; it was truly unforgettable, and I couldn't wait to, like, do it all over again!\",\n", + " \"Oh, I stumbled upon the giraffe enclosure next ; those majestic creatures are truly a sight to behold, don't you think?\",\n", + " 'So, I cautiously made my way through the exhibit, trying not to, like, jump at every sudden movement or slithering motion ; it was quite the challenge.',\n", + " \"; , And, uh, the shopping center, it's got, like, this, uh, huge LED screen ; they show sports and, uh, movies on it, which is, uhm, pretty cool.\",\n", + " \"; , And, uh, my dad's been talking about this, uh, new recipe he wants to try out for dinner ; so he's looking for ingredients.\",\n", + " \"; , And, uh, my aunt's been raving about this, uh, new skincare brand ; so she's eager to see if the store carries it.\",\n", + " \"I took a ton of pictures of them, but, none of them really did justice to their beauty ; they're just so graceful and magnificent.\",\n", + " 'After that rather nerve-wracking experience, I felt the need for something a little more uplifting, so I, like, headed straight for the penguin exhibit.',\n", + " \"; , My grandma, she's always, uh, asking for new kitchen gadgets ; so we'll probably swing by the, uh, culinary store for her.\",\n", + " \"; Uh, my sister, yeah, she's already, like, made a list of all the stores she wants to hit ; and she's, um, she's determined to find those, those boots she saw online, you know?\",\n", + " \" I'm just hoping I can, you know, find some good deals ; maybe on, uh, clothes or, uhm, shoes? I, I could use some new sneakers, honestly.\",\n", + " \"; , Uhm, I need to, uh, remember to, uh, pick up a gift for my friend's, uh, birthday ; so I will be, you know, on the lookout for something special.\",\n", + " \" I'm kinda hoping we can find a good, uh, spot to sit and relax ; you know, when, uh, when our feet start hurting from all the walking around.\",\n", + " '; I will have to, uh, remember to charge my phone before we go ; last time it died on me while we were shopping, which was, uh, inconvenient.',\n", + " \"; , Oh, and, uh, I heard there's a, a rooftop garden with a nice view ; so we might take a break up there and enjoy the scenery.\",\n", + " \"; , Oh, and, uh, my dad's been wanting to upgrade our, uh, home entertainment system ; so he's looking for a new TV.\",\n", + " \"; And, uh, there's supposed to be this, this art exhibit ; in the, uhm, the central atrium, so that'll be, uh, that'll be something to see.\",\n", + " \" I will probably, you know, look for some new books ; because they've got that, that big bookstore, and, uh, I could spend, like, hours in there, honestly.\",\n", + " \"; , Oh, and, uh, my sister wants to try out this, uh, new makeup trend she saw online ; so she's looking for specific products.\",\n", + " \"I have to admit, I was both excited and a little nervous to see the lions and tigers up close ; they're such majestic and powerful creatures.\",\n", + " \"Ah, I'm absolutely fascinated by monkeys ; they're so lively and entertaining to watch ; I could spend hours just observing them.\",\n", + " \"; , And, uh, I've heard there's a, a live music performance happening later in the evening ; so we might stick around for that if we're not too tired.\",\n", + " \"; , Uh, last time we went shopping as a family, it was, uhm, quite the, the adventure ; so, uh, I'm expecting, you know, more of the same today.\",\n", + " \" I will probably, uh, forget where we parked the car ; I'm terrible with remembering those things.\",\n", + " \"At first, I thought it might be crowded, but, fortunately, it wasn't too bad ; I guess I got lucky.\",\n", + " \" I'm thinking of, uh, treating myself to a, a massage at the, uh, spa center ; they have some pretty good deals going on right now.\",\n", + " \"; , So, uh, yeah, after shopping we're, um, planning to grab dinner ; maybe try out that new, that new Italian place? I've heard, uhm, good things about it.\",\n", + " 'So, yesterday I went to the zoo, and, you know, it was such an incredible experience ; I mean, like, absolutely breathtaking!',\n", + " 'So, when I got there, the first thing I did was head straight to the monkey exhibit ; those little guys are just so fascinating, you know?',\n", + " \"You know, I'm not exactly the best photographer, but I tried my best to capture their elegance and grandeur.\",\n", + " \" I will need to, uh, check the store directory when we get there ; I'm not familiar with the layout of the shopping center.\",\n", + " \"; We're, uh, probably going to start with the, the food court because, uhm, my brother, he gets, uh, he gets real cranky if he doesn't eat ; and there's supposed to be a, a new sushi place.\",\n", + " \"; My family, they tend to, uh, split up and, uh, meet back at a certain time ; so we're, uh, probably gonna do that, you know?\",\n", + " \"; And, uh, there's a, a bookstore signing event ; some author my brother likes, so, uh, we'll probably, you know, check that out.\",\n", + " \"; , Uhm, the weather's supposed to be good, so we'll, uh, probably take a break outside ; they've got benches and, uh, fountains and stuff.\",\n", + " \"I wasn't quite sure what to expect, but, you know, it turned out to be really, really fun ; like, surprisingly enjoyable.\",\n", + " \" I'm not sure if I mentioned this, but, uh, my aunt, she's got this, uh, thing about, uh, finding unique home decor pieces ; so we might, you know, spend a bit of time there.\",\n", + " ' the plan is to, uh, get there early, because, you know, parking can be crazy, and, uh, my dad, he, uh, he hates looking for parking spots for too long.',\n", + " \" I've been meaning to, uh, buy some new workout gear ; so I will definitely be browsing the, uh, sports apparel stores.\",\n", + " \" I think my brother's, uh, interested in checking out the, uh, car accessories store ; he's been talking about upgrading his car stereo.\",\n", + " 'Oh, I stumbled upon the reptile house next ; now that, that was an experience, let me tell you!',\n", + " \"; Oh, and, uh, my mom, she wants to check out the new, what's it, the home goods store ; because she's, uh, redecorating the living room, or something like that.\",\n", + " \" my mom's been talking about this, this scarf she saw online ; and, uh, if we find the store, she's, uh, she's definitely buying it.\",\n", + " \"You know, I'm not entirely sure why I did that to myself, but, it definitely made for an interesting experience, to say the least.\"]" + ] + }, { "cell_type": "markdown", "id": "a9da9e2d-99b9-4c6f-8a08-c828e2cba121", @@ -289,7 +483,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 18, "id": "f0d216e5-4895-4da8-9d24-9e61021d2556", "metadata": {}, "outputs": [], @@ -304,6 +498,54 @@ "temperature = 0.667" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "443d515d-4a1d-4943-845e-9d60c9bb012a", + "metadata": {}, + "outputs": [], + "source": [ + "from math import ceil\n", + "\n", + "\n", + "def save_phoneme_durations(output):\n", + " durations = output['attn'].squeeze().sum(1)\n", + " phones = output['x_phones'][1::2]\n", + " prev = durations[0]\n", + " merged_durations = []\n", + " # Convolve with stride 2\n", + " for i in range(1, len(durations), 2):\n", + " if i == len(durations) - 2:\n", + " # if it is last take full value\n", + " next_half = durations[i+1]\n", + " else:\n", + " next_half = ceil(durations[i+1] /2)\n", + "\n", + " curr = prev + durations[i] + next_half\n", + " prev = durations[i+1] - next_half\n", + " # print(durations[i-1:i+2], curr, next_half, prev)\n", + " merged_durations.append(curr)\n", + "\n", + " assert len(phones) == len(merged_durations)\n", + " assert len(merged_durations) == (len(durations) - 1) // 2\n", + "\n", + "\n", + " merged_durations = torch.cumsum(torch.tensor(merged_durations), 0, dtype=torch.long)\n", + " start = torch.tensor(0)\n", + " duration_json = []\n", + " for i, duration in enumerate(merged_durations):\n", + " duration_json.append({\n", + " phones[i]: {\n", + " \"starttime\": start.item(),\n", + " \"endtime\": duration.item(),\n", + " \"duration\": duration.item() - start.item()\n", + " }\n", + " })\n", + " start = duration\n", + " assert list(duration_json[-1].values())[0]['endtime'] == output['decoder_outputs'].shape[-1], f\"{list(duration_json[-1].values())[0]['endtime'], output['decoder_outputs'].shape[-1]}\"\n", + " return duration_json" + ] + }, { "cell_type": "markdown", "id": "b93aac89-c7f8-4975-8510-4e763c9689f4", @@ -314,9 +556,82 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "5a227963-aa12-43b9-a706-1168b6fc0ba5", "metadata": {}, + "outputs": [], + "source": [ + "OUTPUT_FOLDER_TEMP = \"joe_fm_output_{}\"\n", + "for name in range(6):\n", + " OUTPUT_FOLDER = OUTPUT_FOLDER_TEMP.format(name)\n", + " print(OUTPUT_FOLDER)\n", + " outputs, rtfs = [], []\n", + " rtfs_w = []\n", + " for i, text in enumerate(tqdm(texts)):\n", + " seed_everything(random.randint(1, 10000))\n", + " output = synthesise(text) #, torch.tensor([15], device=device, dtype=torch.long).unsqueeze(0))\n", + " output['waveform'] = to_waveform(output['mel'], vocoder)\n", + " output['duration_json'] = save_phoneme_durations(output)\n", + " # Compute Real Time Factor (RTF) with HiFi-GAN\n", + " t = (dt.datetime.now() - output['start_t']).total_seconds()\n", + " rtf_w = t * 22050 / (output['waveform'].shape[-1])\n", + " \n", + " ## Pretty print\n", + " print(f\"{'*' * 53}\")\n", + " print(f\"Input text - {i}\")\n", + " print(f\"{'-' * 53}\")\n", + " print(output['x_orig'])\n", + " print(f\"{'*' * 53}\")\n", + " print(f\"Phonetised text - {i}\")\n", + " print(f\"{'-' * 53}\")\n", + " print(output['x_phones'])\n", + " print(f\"{'*' * 53}\")\n", + " print(f\"RTF:\\t\\t{output['rtf']:.6f}\")\n", + " print(f\"RTF Waveform:\\t{rtf_w:.6f}\")\n", + " rtfs.append(output['rtf'])\n", + " rtfs_w.append(rtf_w)\n", + " \n", + " ## Display the synthesised waveform\n", + " # plot_spectrogram(output['mel'])\n", + " # ipd.display(ipd.Audio(output['waveform'], rate=22050))\n", + " \n", + " ## Save the generated waveform\n", + " save_to_folder(i, output, OUTPUT_FOLDER)\n", + " \n", + " print(f\"Number of ODE steps: {n_timesteps}\")\n", + " print(f\"Mean RTF:\\t\\t\\t\\t{np.mean(rtfs):.6f} ± {np.std(rtfs):.6f}\")\n", + " print(f\"Mean RTF Waveform (incl. vocoder):\\t{np.mean(rtfs_w):.6f} ± {np.std(rtfs_w):.6f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "107ff003-8a32-4a11-9294-31622b07fc3e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd1f24e3-0fc8-487f-a478-46cda7ebaec9", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d95afbb4-d0ae-4562-bbdf-e27fd99200cd", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "840cf38d-978b-4114-b5bb-8963a8a97517", + "metadata": {}, "outputs": [ { "data": {