Adding possibility of getting durations out

This commit is contained in:
Shivam Mehta
2024-02-24 15:10:19 +00:00
parent def0855608
commit 8e87111a98
6 changed files with 516 additions and 25 deletions

View File

@@ -227,7 +227,7 @@ def cli():
parser.add_argument(
"--vocoder",
type=str,
default=None,
default="hifigan_univ_v1",
help="Vocoder to use (default: will use the one suggested with the pretrained model))",
choices=VOCODER_URLS.keys(),
)

View File

@@ -109,7 +109,7 @@ class TextMelDataModule(LightningDataModule):
"""Clean up after fit or test."""
pass # pylint: disable=unnecessary-pass
def state_dict(self): # pylint: disable=no-self-use
def state_dict(self):
"""Extra things to save to checkpoint."""
return {}
@@ -167,7 +167,7 @@ class TextMelDataset(torch.utils.data.Dataset):
text = self.get_text(text, add_blank=self.add_blank)
mel = self.get_mel(filepath)
return {"x": text, "y": mel, "spk": spk}
return {"x": text, "y": mel, "spk": spk, "filepath": filepath}
def get_mel(self, filepath):
audio, sr = ta.load(filepath)
@@ -207,15 +207,16 @@ class TextMelBatchCollate:
def __call__(self, batch):
B = len(batch)
y_max_length = max([item["y"].shape[-1] for item in batch])
y_max_length = max([item["y"].shape[-1] for item in batch]) # pylint: disable=consider-using-generator
y_max_length = fix_len_compatibility(y_max_length)
x_max_length = max([item["x"].shape[-1] for item in batch])
x_max_length = max([item["x"].shape[-1] for item in batch]) # pylint: disable=consider-using-generator
n_feats = batch[0]["y"].shape[-2]
y = torch.zeros((B, n_feats, y_max_length), dtype=torch.float32)
x = torch.zeros((B, x_max_length), dtype=torch.long)
y_lengths, x_lengths = [], []
spks = []
filepaths = []
for i, item in enumerate(batch):
y_, x_ = item["y"], item["x"]
y_lengths.append(y_.shape[-1])
@@ -223,9 +224,10 @@ class TextMelBatchCollate:
y[i, :, : y_.shape[-1]] = y_
x[i, : x_.shape[-1]] = x_
spks.append(item["spk"])
filepaths.append(item["filepath"])
y_lengths = torch.tensor(y_lengths, dtype=torch.long)
x_lengths = torch.tensor(x_lengths, dtype=torch.long)
spks = torch.tensor(spks, dtype=torch.long) if self.n_spks > 1 else None
return {"x": x, "x_lengths": x_lengths, "y": y, "y_lengths": y_lengths, "spks": spks}
return {"x": x, "x_lengths": x_lengths, "y": y, "y_lengths": y_lengths, "spks": spks, "filepaths": filepaths}

View File

@@ -58,7 +58,7 @@ class BaseLightningClass(LightningModule, ABC):
y, y_lengths = batch["y"], batch["y_lengths"]
spks = batch["spks"]
dur_loss, prior_loss, diff_loss = self(
dur_loss, prior_loss, diff_loss, *_ = self(
x=x,
x_lengths=x_lengths,
y=y,

View File

@@ -4,7 +4,7 @@ import random
import torch
import matcha.utils.monotonic_align as monotonic_align
import matcha.utils.monotonic_align as monotonic_align # pylint: disable=consider-using-from-import
from matcha import utils
from matcha.models.baselightningmodule import BaseLightningClass
from matcha.models.components.duration_predictors import DP
@@ -241,4 +241,4 @@ class MatchaTTS(BaseLightningClass): # 🍵
else:
prior_loss = 0
return dur_loss, prior_loss, diff_loss
return dur_loss, prior_loss, diff_loss, attn

View File

@@ -0,0 +1,174 @@
r"""
The file creates a pickle file where the values needed for loading of dataset is stored and the model can load it
when needed.
Parameters from hparam.py will be used
"""
import argparse
import os
import sys
from pathlib import Path
import lightning
import numpy as np
import rootutils
import torch
from hydra import compose, initialize
from omegaconf import open_dict
from torch import nn
from tqdm.auto import tqdm
from matcha.cli import get_device
from matcha.data.text_mel_datamodule import TextMelDataModule
from matcha.models.matcha_tts import MatchaTTS
from matcha.utils.logging_utils import pylogger
log = pylogger.get_pylogger(__name__)
def save_durations_to_folder(attn: torch.Tensor, x_length: int, y_length: int, filepath: str, output_folder: Path):
durations = attn.squeeze().sum(1)[:x_length].numpy()
output = output_folder / Path(filepath).name.replace(".wav", ".npy")
np.save(output, durations)
@torch.inference_mode()
def compute_durations(data_loader: torch.utils.data.DataLoader, model: nn.Module, device: torch.device, output_folder):
"""Generate durations from the model for each datapoint and save it in a folder
Args:
data_loader (torch.utils.data.DataLoader): Dataloader
model (nn.Module): MatchaTTS model
device (torch.device): GPU or CPU
"""
for batch in tqdm(data_loader, leave=False):
x, x_lengths = batch["x"], batch["x_lengths"]
y, y_lengths = batch["y"], batch["y_lengths"]
spks = batch["spks"]
x = x.to(device)
y = y.to(device)
x_lengths = x_lengths.to(device)
y_lengths = y_lengths.to(device)
spks = spks.to(device) if spks is not None else None
_, _, _, attn = model(
x=x,
x_lengths=x_lengths,
y=y,
y_lengths=y_lengths,
spks=spks,
)
attn = attn.cpu()
for i in range(attn.shape[0]):
save_durations_to_folder(
attn[i], x_lengths[i].item(), y_lengths[i].item(), batch["filepaths"][i], output_folder
)
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"-i",
"--input-config",
type=str,
default="vctk.yaml",
help="The name of the yaml config file under configs/data",
)
parser.add_argument(
"-b",
"--batch-size",
type=int,
default="32",
help="Can have increased batch size for faster computation",
)
parser.add_argument(
"-f",
"--force",
action="store_true",
default=False,
required=False,
help="force overwrite the file",
)
parser.add_argument(
"-c",
"--checkpoint_path",
type=str,
required=True,
help="Path to the checkpoint file to load the model from",
)
parser.add_argument(
"-o",
"--output-folder",
type=str,
default=None,
help="Output folder to save the data statistics",
)
parser.add_argument(
"--cpu", action="store_true", help="Use CPU for inference, not recommended (default: use GPU if available)"
)
args = parser.parse_args()
with initialize(version_base="1.3", config_path="../../configs/data"):
cfg = compose(config_name=args.input_config, return_hydra_config=True, overrides=[])
root_path = rootutils.find_root(search_from=__file__, indicator=".project-root")
with open_dict(cfg):
del cfg["hydra"]
del cfg["_target_"]
cfg["seed"] = 1234
cfg["batch_size"] = args.batch_size
cfg["train_filelist_path"] = str(os.path.join(root_path, cfg["train_filelist_path"]))
cfg["valid_filelist_path"] = str(os.path.join(root_path, cfg["valid_filelist_path"]))
if args.output_folder is not None:
output_folder = Path(args.output_folder)
else:
output_folder = Path("data") / "processed_data" / "durations" / cfg["name"]
if os.path.exists(output_folder) and not args.force:
print("Folder already exists. Use -f to force overwrite")
sys.exit(1)
output_folder.mkdir(parents=True, exist_ok=True)
print(f"Preprocessing: {cfg['name']} from training filelist: {cfg['train_filelist_path']}")
print("Loading model...")
device = get_device(args)
model = MatchaTTS.load_from_checkpoint(args.checkpoint_path, map_location=device)
text_mel_datamodule = TextMelDataModule(**cfg)
text_mel_datamodule.setup()
try:
print("Computing stats for training set if exists...")
train_dataloader = text_mel_datamodule.train_dataloader()
compute_durations(train_dataloader, model, device, output_folder)
except lightning.fabric.utilities.exceptions.MisconfigurationException:
print("No training set found")
try:
print("Computing stats for validation set if exists...")
val_dataloader = text_mel_datamodule.val_dataloader()
compute_durations(val_dataloader, model, device, output_folder)
except lightning.fabric.utilities.exceptions.MisconfigurationException:
print("No validation set found")
try:
print("Computing stats for test set if exists...")
test_dataloader = text_mel_datamodule.test_dataloader()
compute_durations(test_dataloader, model, device, output_folder)
except lightning.fabric.utilities.exceptions.MisconfigurationException:
print("No test set found")
print(f"[+] Done! Data statistics saved to: {output_folder}")
if __name__ == "__main__":
main()

View File

@@ -37,7 +37,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 7,
"id": "8d5876c0-b47e-4c80-9e9c-62550f81b64e",
"metadata": {},
"outputs": [],
@@ -69,10 +69,19 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 8,
"id": "b1a30306-588c-4f22-8d9b-e2676880b0e5",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n"
]
}
],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
@@ -82,7 +91,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 9,
"id": "a312856b-01a9-4d75-a4c8-4666dffa0692",
"metadata": {},
"outputs": [],
@@ -100,16 +109,16 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 10,
"id": "7640a4c1-44ce-447c-a8ff-45012fb7bddd",
"metadata": {},
"outputs": [],
"source": [
"# MATCHA_CHECKPOINT = \"logs/train/hi-fi_en-US_female_piper_phonemizer/runs/2023-12-01_12-14-06/checkpoints/last.ckpt\"\n",
"# MATCHA_CHECKPOINT = get_user_data_dir()/\"matcha_ljspeech.ckpt\n",
"MATCHA_CHECKPOINT = \"logs/train/tsg2_stoc_dur/runs/2024-01-05_12-35-07/checkpoints/last.ckpt\"\n",
"HIFIGAN_CHECKPOINT = get_user_data_dir() / \"hifigan_T2_v1\"\n",
"OUTPUT_FOLDER = \"synth_output\""
"# MATCHA_CHECKPOINT = \"logs/train/tsg2_stoc_dur/runs/2024-01-05_12-35-07/checkpoints/last.ckpt\"\n",
"# MATCHA_CHECKPOINT = \"logs/train/lj_stoc/runs/2024-01-12_12-03-19/checkpoints/checkpoint_epoch=3299.ckpt\"\n",
"MATCHA_CHECKPOINT = \"logs/train/joe_stoc_dur/runs/2024-02-20_14-01-53/checkpoints/last.ckpt\"\n",
"HIFIGAN_CHECKPOINT = get_user_data_dir() / \"hifigan_univ_v1\"\n",
"OUTPUT_FOLDER = \"cormac_fm_output2\""
]
},
{
@@ -122,7 +131,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 11,
"id": "26a16230-04ba-4825-a844-2fb5ab945e24",
"metadata": {},
"outputs": [
@@ -156,7 +165,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 12,
"id": "f6b68184-968d-4868-9029-f0c40e9e68af",
"metadata": {},
"outputs": [
@@ -191,11 +200,13 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 13,
"id": "880a1879-24fd-4757-849c-850339120796",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"@torch.inference_mode()\n",
"def process_text(text: str):\n",
" x = torch.tensor(intersperse(text_to_sequence(text, ['english_cleaners2']), 0),dtype=torch.long, device=device)[None]\n",
@@ -236,6 +247,9 @@
" folder.mkdir(exist_ok=True, parents=True)\n",
" np.save(folder / f'{filename}', output['mel'].cpu().numpy())\n",
" sf.write(folder / f'{filename}.wav', output['waveform'], 22050, 'PCM_24')\n",
" if \"duration_json\" in output:\n",
" with open(folder / f'{filename}.json', 'w', encoding='utf-8') as f:\n",
" json.dump(output['duration_json'], f, indent=4, ensure_ascii=False)\n",
"\n",
"def plot_spectrogram(spectrogram):\n",
" fig, ax = plt.subplots(figsize=(12, 3))\n",
@@ -257,7 +271,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 14,
"id": "2e0a9acd-0845-4192-ba09-b9683e28a3ac",
"metadata": {},
"outputs": [],
@@ -279,6 +293,186 @@
"]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "f58e82fa-0ae0-43f2-a021-97efc6041793",
"metadata": {},
"outputs": [],
"source": [
"texts = [\n",
" \"If, you know, it works as well as we would expect, it would change our, like, understanding of the world.\",\n",
" \"If, you know, it works as well as we would expect, it would change our, like, understanding of the world.\",\n",
" \"; If, you know, it works as well as we would expect, it would change our, like, understanding of the world.\",\n",
" \"; If, you know, it works as well as we would expect, it would change our, like, understanding of the world.\",\n",
" \"We've, uh, been working on this project where, you know, I think is interesting, just, you know, really cool.\",\n",
" \"We've, uh, been working on this project where, you know, I think is interesting, just, you know, really cool.\",\n",
" \"; We've, uh, been working on this project where, you know, I think is interesting, just, you know, really cool.\",\n",
" \"; We've, uh, been working on this project where, you know, I think is interesting, just, you know, really cool.\",\n",
" \"I heard about this thing called, uhm, big wave surfing, where these guys tackle waves of over 20 metres, just insane.\",\n",
" \"I heard about this thing called, uhm, big wave surfing, where these guys tackle waves of over 20 metres, just insane.\",\n",
" \"; I heard about this thing called, uhm, big wave surfing, where these guys tackle waves of over 20 metres, just insane.\",\n",
" \"; I heard about this thing called, uhm, big wave surfing, where these guys tackle waves of over 20 metres, just insane.\",\n",
" \"I need to go to this, uhm, mandatory work seminar that I really don't want to attend.\",\n",
" \"I need to go to this, uhm, mandatory work seminar that I really don't want to attend.\",\n",
" \"; I need to go to this, uhm, mandatory work seminar that I really don't want to attend.\",\n",
" \"; I need to go to this, uhm, mandatory work seminar that I really don't want to attend.\",\n",
" \n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "38aff9af-420f-4ab8-a9b1-7d12fc5bcb57",
"metadata": {},
"outputs": [],
"source": [
"texts = [\n",
" \"So, yesterday I went to the zoo, and, you know, it was such an incredible experience ; I mean, like, absolutely breathtaking!\",\n",
" \"I wasn't quite sure what to expect, but, you know, it turned out to be really, really fun ; like, surprisingly enjoyable.\",\n",
" \"Oh, I saw so many amazing animals ; lions, tigers, elephants, you name it ; it was like a wildlife paradise!\",\n",
" \"At first, I thought it might be crowded, but, fortunately, it wasn't too bad ; I guess I got lucky.\",\n",
" \"You know, I'm not usually a fan of large crowds, but somehow, I managed to enjoy myself ; it was quite the adventure.\",\n",
" \"So, when I got there, the first thing I did was head straight to the monkey exhibit ; those little guys are just so fascinating, you know?\",\n",
" \"Ah, I'm absolutely fascinated by monkeys ; they're so lively and entertaining to watch ; I could spend hours just observing them.\",\n",
" \"I spent quite a bit of time there, just admiring their playful antics and interactions with each other.\",\n",
" \"But eventually, I decided to tear myself away and, like, explore the rest of the zoo ; there was just so much more to see!\",\n",
" \"Oh, I stumbled upon the giraffe enclosure next ; those majestic creatures are truly a sight to behold, don't you think?\",\n",
" \"I took a ton of pictures of them, but, none of them really did justice to their beauty ; they're just so graceful and magnificent.\",\n",
" \"You know, I'm not exactly the best photographer, but I tried my best to capture their elegance and grandeur.\",\n",
" \"So, after spending some time with the giraffes, I decided to take a little break and, like, grab a snack.\",\n",
" \"Ah, I'm a bit of a sucker for zoo food ; it may not be the healthiest, but, it's definitely delicious ; I just couldn't resist.\",\n",
" \"I indulged in some popcorn and, a soda, and just, you know, sat down for a while, soaking in the vibrant atmosphere around me.\",\n",
" \"But soon enough, I felt re-energized and ready to, like, embark on the next leg of my zoo adventure ; there was still so much to explore!\",\n",
" \"Oh, I stumbled upon the reptile house next ; now that, that was an experience, let me tell you!\",\n",
" \"I have to admit, I'm not exactly fond of snakes, but, I braved my fears and ventured in anyway ; it was quite the adrenaline rush.\",\n",
" \"You know, I'm not entirely sure why I did that to myself, but, it definitely made for an interesting experience, to say the least.\",\n",
" \"So, I cautiously made my way through the exhibit, trying not to, like, jump at every sudden movement or slithering motion ; it was quite the challenge.\",\n",
" \"Ah, I'm proud of myself for confronting my fears head-on, but, you know, I don't think I'll be rushing back to the reptile house anytime soon ; once was probably enough for me.\",\n",
" \"After that rather nerve-wracking experience, I felt the need for something a little more uplifting, so I, like, headed straight for the penguin exhibit.\",\n",
" \"I absolutely adore penguins ; they're just so adorable and, comical, especially when they waddle around on land ; it's simply delightful to watch.\",\n",
" \"I spent a good amount of time there, just enjoying their playful antics and quirky personalities ; they never fail to bring a smile to my face.\",\n",
" \"But eventually, I tore myself away and, like, continued on with my zoo adventure ; there were still so many more exhibits to explore!\",\n",
" \"Oh, I stumbled upon the big cat area next ; now that, that was an exhilarating experience, let me tell you!\",\n",
" \"I have to admit, I was both excited and a little nervous to see the lions and tigers up close ; they're such majestic and powerful creatures.\",\n",
" \"I kept a safe distance, of course, but, just being in their presence filled me with a sense of awe and wonder.\",\n",
" \"But, as the day started to draw to a close, I realized that my time at the zoo was coming to an end ; it was bittersweet, to say the least.\",\n",
" \"Oh, I hadn't realized how quickly the hours had flown by ; I guess I was just having too much fun!\",\n",
" \"So, I reluctantly made my way towards the exit, but not before promising myself that I'd, like, come back again soon ; there was still so much more to see and explore!\",\n",
" \"I felt a profound sense of gratitude for the incredible experience I'd had at the zoo ; it was truly unforgettable, and I couldn't wait to, like, do it all over again!\",\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "3d503650-3347-41ce-9d1b-177183a28db8",
"metadata": {},
"outputs": [],
"source": [
"texts = [\" I will have to, uh, make sure to charge my camera battery ; I don't want it dying on me while we're out.\",\n",
" \" I heard there's a, a sort of farmers' market on the weekends ; so we might, uh, we might swing by that, uh, just to see, you know?\",\n",
" \" I'm not really, uh, sure what to expect from this shopping center ; but I've heard good things from friends who've been there already.\",\n",
" ' I will have to, uh, remind my sister to, uh, wear comfortable shoes ; last time she insisted on heels and regretted it halfway through the day.',\n",
" \" I've been saving up for a, uh, new phone case, so I will, uh, definitely check out the, uh, tech accessory store.\",\n",
" \" my family, we like to, uh, make a day of it ; you know, shopping together, eating together, it's, uh, it's nice.\",\n",
" \"; , And I've got to, uh, look for some new headphones ; mine just broke and, uh, I can't go to the gym without them, you know?\",\n",
" \"; , My dad, he wants to check out the, uh, the hardware store ; he's always looking for tools and, uh, stuff like that.\",\n",
" 'But, as the day started to draw to a close, I realized that my time at the zoo was coming to an end ; it was bittersweet, to say the least.',\n",
" 'So, after spending some time with the giraffes, I decided to take a little break and, like, grab a snack.',\n",
" 'But eventually, I tore myself away and, like, continued on with my zoo adventure ; there were still so many more exhibits to explore!',\n",
" \"Oh, I hadn't realized how quickly the hours had flown by ; I guess I was just having too much fun!\",\n",
" \"Ah, I'm a bit of a sucker for zoo food ; it may not be the healthiest, but, it's definitely delicious ; I just couldn't resist.\",\n",
" \" I will probably, uh, get distracted by all the, uh, sales and promotions ; I'm a sucker for a good deal.\",\n",
" \"; , Uhm, my cousin, the one who's, uh, into photography ; he's, uh, he's going to look for a new, uh, camera lens at the, the camera store.\",\n",
" \"; And, uh, there's this, uh, arcade area that I'm kinda, uh, interested in ; I used to love playing those games when I was younger.\",\n",
" \"You know, I'm not usually a fan of large crowds, but somehow, I managed to enjoy myself ; it was quite the adventure.\",\n",
" \"; , And, uh, my mom's been talking about this, uh, new recipe book she wants to get ; so she's on the lookout for it.\",\n",
" \"Ah, I'm proud of myself for confronting my fears head-on, but, you know, I don't think I'll be rushing back to the reptile house anytime soon ; once was probably enough for me.\",\n",
" \"; I think my dad's, uh, really excited about the, uh, electronics section ; he's been talking about upgrading his gadgets for a while now.\",\n",
" \" there's this, uh, chocolate shop that gives out free samples ; and, uh, you know, who can say no to, uh, free chocolate?\",\n",
" \"; , Uhm, they've got an indoor, uh, mini-golf place ; which could be a, uh, fun way to take a break from all the shopping.\",\n",
" '; , So the plan is to, uhm, meet up at, like, the central plaza ; after everyone is done with their, uh, their own shopping and stuff.',\n",
" \"; , And, uh, my aunt, she's big on, uhm, organic stuff ; so she's excited about the, the natural skincare shop that's, uh, that's there.\",\n",
" 'But eventually, I decided to tear myself away and, like, explore the rest of the zoo ; there was just so much more to see!',\n",
" \" I will probably, uh, end up carrying most of the shopping bags ; it's just easier for me since I'm used to it.\",\n",
" \"; , And, uh, my mom's been talking about this, uh, new fitness craze she wants to try ; so she's looking for workout gear.\",\n",
" ' I will have to, uh, make sure to bring my reusable shopping bags ; you know, to reduce waste and all that.',\n",
" \"; , Uhm, my family, we're not, you know, super outdoorsy ; but there's an outdoor goods store, so maybe we'll, uh, we'll take a look, just in case, you know?\",\n",
" 'Oh, I saw so many amazing animals ; lions, tigers, elephants, you name it ; it was like a wildlife paradise!',\n",
" ' I will need to, uh, make a list of things I want to buy beforehand ; otherwise, I will end up forgetting.',\n",
" \"; They have this, uh, loyalty program thing ; so I'm gonna sign up, you know, to get, uh, points or whatever, which is, uhm, pretty neat.\",\n",
" \"; My dad, he's, uh, not that into shopping, right? ; But there's an electronics store, so he'll, he'll probably just, uh, hang out there while we, um, do our thing.\",\n",
" \" I will probably, uh, take a lot of photos while we're there ; you know, just to capture the memories of our family outing.\",\n",
" \"; So, we were thinking of going to, uh, that new shopping center, you know, the one that just opened up last month ; and I'm actually, I'm kind of excited about it.\",\n",
" 'I spent quite a bit of time there, just admiring their playful antics and interactions with each other.',\n",
" '; , And, uh, I might, uh, grab a snack from one of those, uh, food kiosks ; you know, just to keep my energy up while we shop.',\n",
" \" I'm kind of excited to try the, the virtual reality experience ; I've never done that before, and, uh, it looks, uh, looks really cool.\",\n",
" \"; My brother's, uh, he's looking forward to the, the gaming lounge ; they have, like, tournaments and stuff, which is, uhm, pretty cool, I think.\",\n",
" \"; The kids, they have this, uhm, face painting thing ; so my niece, she's gonna love that, she's, uh, always asking for it.\",\n",
" \" parking is, uh, it's free for the first two hours, which is, uh, nice ; but I'm sure we'll, we'll be there longer than that, so, uh, we'll see how it goes.\",\n",
" \" I'm curious about the, the bookstore's cafe ; I've heard they make a, a mean latte, and, uh, you know, I love my coffee.\",\n",
" 'I spent a good amount of time there, just enjoying their playful antics and quirky personalities ; they never fail to bring a smile to my face.',\n",
" \"; , I'm not much for clothes shopping, but, uh, I do need some new jeans ; so, uh, I will be on the lookout for a good, uh, denim place.\",\n",
" \"; , And, uh, my sister's been talking about this, uh, new fashion trend she wants to try ; so she's looking for specific clothing items.\",\n",
" \"; I think my brother's, uh, planning to splurge on some, uh, collector's items ; there's a store that sells rare memorabilia that he's into.\",\n",
" 'I indulged in some popcorn and, a soda, and just, you know, sat down for a while, soaking in the vibrant atmosphere around me.',\n",
" \"; Well, I heard, uh, there's supposed to be some sort of event happening ; a, a raffle, or, uh, giveaway? I'm not totally sure but it, it sounds, uh, sounds fun.\",\n",
" \"; , And, uh, my dad's been talking about this, uh, new barbecue grill he wants to get ; so I'm sure we'll end up at the home improvement store.\",\n",
" \"; , And, uh, my mom mentioned something about a sale at, uh, her favorite clothing store ; so I bet we'll, we'll end up there for, like, I don't know, an hour, or something.\",\n",
" \"; , Oh, and, uh, my sister's been talking about redecorating her room ; so she's looking for new bedding and decor.\",\n",
" \" there's a, a pet store that, uh, lets you, you know, play with the puppies ; and, uh, my little brother, he's, he's definitely looking forward to that.\",\n",
" \"; , Oh, and, uh, my sister's been talking about this, uh, new restaurant she wants to try ; so we might end up eating there.\",\n",
" \"; , Oh, and, uh, my cousin, she's been talking about this, uh, new diet she's trying out ; so she's interested in checking out the health food store.\",\n",
" \"; I'm hoping to, uh, find a nice gift for my mom's birthday ; there's a, uh, specialty shop that sells handmade items, so that might be perfect.\",\n",
" \"; , And, uh, my little sister, she's all about, uh, the trendy fashion ; so we'll probably, you know, end up browsing those stores longer than expected.\",\n",
" \"I felt a profound sense of gratitude for the incredible experience I'd had at the zoo ; it was truly unforgettable, and I couldn't wait to, like, do it all over again!\",\n",
" \"Oh, I stumbled upon the giraffe enclosure next ; those majestic creatures are truly a sight to behold, don't you think?\",\n",
" 'So, I cautiously made my way through the exhibit, trying not to, like, jump at every sudden movement or slithering motion ; it was quite the challenge.',\n",
" \"; , And, uh, the shopping center, it's got, like, this, uh, huge LED screen ; they show sports and, uh, movies on it, which is, uhm, pretty cool.\",\n",
" \"; , And, uh, my dad's been talking about this, uh, new recipe he wants to try out for dinner ; so he's looking for ingredients.\",\n",
" \"; , And, uh, my aunt's been raving about this, uh, new skincare brand ; so she's eager to see if the store carries it.\",\n",
" \"I took a ton of pictures of them, but, none of them really did justice to their beauty ; they're just so graceful and magnificent.\",\n",
" 'After that rather nerve-wracking experience, I felt the need for something a little more uplifting, so I, like, headed straight for the penguin exhibit.',\n",
" \"; , My grandma, she's always, uh, asking for new kitchen gadgets ; so we'll probably swing by the, uh, culinary store for her.\",\n",
" \"; Uh, my sister, yeah, she's already, like, made a list of all the stores she wants to hit ; and she's, um, she's determined to find those, those boots she saw online, you know?\",\n",
" \" I'm just hoping I can, you know, find some good deals ; maybe on, uh, clothes or, uhm, shoes? I, I could use some new sneakers, honestly.\",\n",
" \"; , Uhm, I need to, uh, remember to, uh, pick up a gift for my friend's, uh, birthday ; so I will be, you know, on the lookout for something special.\",\n",
" \" I'm kinda hoping we can find a good, uh, spot to sit and relax ; you know, when, uh, when our feet start hurting from all the walking around.\",\n",
" '; I will have to, uh, remember to charge my phone before we go ; last time it died on me while we were shopping, which was, uh, inconvenient.',\n",
" \"; , Oh, and, uh, I heard there's a, a rooftop garden with a nice view ; so we might take a break up there and enjoy the scenery.\",\n",
" \"; , Oh, and, uh, my dad's been wanting to upgrade our, uh, home entertainment system ; so he's looking for a new TV.\",\n",
" \"; And, uh, there's supposed to be this, this art exhibit ; in the, uhm, the central atrium, so that'll be, uh, that'll be something to see.\",\n",
" \" I will probably, you know, look for some new books ; because they've got that, that big bookstore, and, uh, I could spend, like, hours in there, honestly.\",\n",
" \"; , Oh, and, uh, my sister wants to try out this, uh, new makeup trend she saw online ; so she's looking for specific products.\",\n",
" \"I have to admit, I was both excited and a little nervous to see the lions and tigers up close ; they're such majestic and powerful creatures.\",\n",
" \"Ah, I'm absolutely fascinated by monkeys ; they're so lively and entertaining to watch ; I could spend hours just observing them.\",\n",
" \"; , And, uh, I've heard there's a, a live music performance happening later in the evening ; so we might stick around for that if we're not too tired.\",\n",
" \"; , Uh, last time we went shopping as a family, it was, uhm, quite the, the adventure ; so, uh, I'm expecting, you know, more of the same today.\",\n",
" \" I will probably, uh, forget where we parked the car ; I'm terrible with remembering those things.\",\n",
" \"At first, I thought it might be crowded, but, fortunately, it wasn't too bad ; I guess I got lucky.\",\n",
" \" I'm thinking of, uh, treating myself to a, a massage at the, uh, spa center ; they have some pretty good deals going on right now.\",\n",
" \"; , So, uh, yeah, after shopping we're, um, planning to grab dinner ; maybe try out that new, that new Italian place? I've heard, uhm, good things about it.\",\n",
" 'So, yesterday I went to the zoo, and, you know, it was such an incredible experience ; I mean, like, absolutely breathtaking!',\n",
" 'So, when I got there, the first thing I did was head straight to the monkey exhibit ; those little guys are just so fascinating, you know?',\n",
" \"You know, I'm not exactly the best photographer, but I tried my best to capture their elegance and grandeur.\",\n",
" \" I will need to, uh, check the store directory when we get there ; I'm not familiar with the layout of the shopping center.\",\n",
" \"; We're, uh, probably going to start with the, the food court because, uhm, my brother, he gets, uh, he gets real cranky if he doesn't eat ; and there's supposed to be a, a new sushi place.\",\n",
" \"; My family, they tend to, uh, split up and, uh, meet back at a certain time ; so we're, uh, probably gonna do that, you know?\",\n",
" \"; And, uh, there's a, a bookstore signing event ; some author my brother likes, so, uh, we'll probably, you know, check that out.\",\n",
" \"; , Uhm, the weather's supposed to be good, so we'll, uh, probably take a break outside ; they've got benches and, uh, fountains and stuff.\",\n",
" \"I wasn't quite sure what to expect, but, you know, it turned out to be really, really fun ; like, surprisingly enjoyable.\",\n",
" \" I'm not sure if I mentioned this, but, uh, my aunt, she's got this, uh, thing about, uh, finding unique home decor pieces ; so we might, you know, spend a bit of time there.\",\n",
" ' the plan is to, uh, get there early, because, you know, parking can be crazy, and, uh, my dad, he, uh, he hates looking for parking spots for too long.',\n",
" \" I've been meaning to, uh, buy some new workout gear ; so I will definitely be browsing the, uh, sports apparel stores.\",\n",
" \" I think my brother's, uh, interested in checking out the, uh, car accessories store ; he's been talking about upgrading his car stereo.\",\n",
" 'Oh, I stumbled upon the reptile house next ; now that, that was an experience, let me tell you!',\n",
" \"; Oh, and, uh, my mom, she wants to check out the new, what's it, the home goods store ; because she's, uh, redecorating the living room, or something like that.\",\n",
" \" my mom's been talking about this, this scarf she saw online ; and, uh, if we find the store, she's, uh, she's definitely buying it.\",\n",
" \"You know, I'm not entirely sure why I did that to myself, but, it definitely made for an interesting experience, to say the least.\"]"
]
},
{
"cell_type": "markdown",
"id": "a9da9e2d-99b9-4c6f-8a08-c828e2cba121",
@@ -289,7 +483,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 18,
"id": "f0d216e5-4895-4da8-9d24-9e61021d2556",
"metadata": {},
"outputs": [],
@@ -304,6 +498,54 @@
"temperature = 0.667"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "443d515d-4a1d-4943-845e-9d60c9bb012a",
"metadata": {},
"outputs": [],
"source": [
"from math import ceil\n",
"\n",
"\n",
"def save_phoneme_durations(output):\n",
" durations = output['attn'].squeeze().sum(1)\n",
" phones = output['x_phones'][1::2]\n",
" prev = durations[0]\n",
" merged_durations = []\n",
" # Convolve with stride 2\n",
" for i in range(1, len(durations), 2):\n",
" if i == len(durations) - 2:\n",
" # if it is last take full value\n",
" next_half = durations[i+1]\n",
" else:\n",
" next_half = ceil(durations[i+1] /2)\n",
"\n",
" curr = prev + durations[i] + next_half\n",
" prev = durations[i+1] - next_half\n",
" # print(durations[i-1:i+2], curr, next_half, prev)\n",
" merged_durations.append(curr)\n",
"\n",
" assert len(phones) == len(merged_durations)\n",
" assert len(merged_durations) == (len(durations) - 1) // 2\n",
"\n",
"\n",
" merged_durations = torch.cumsum(torch.tensor(merged_durations), 0, dtype=torch.long)\n",
" start = torch.tensor(0)\n",
" duration_json = []\n",
" for i, duration in enumerate(merged_durations):\n",
" duration_json.append({\n",
" phones[i]: {\n",
" \"starttime\": start.item(),\n",
" \"endtime\": duration.item(),\n",
" \"duration\": duration.item() - start.item()\n",
" }\n",
" })\n",
" start = duration\n",
" assert list(duration_json[-1].values())[0]['endtime'] == output['decoder_outputs'].shape[-1], f\"{list(duration_json[-1].values())[0]['endtime'], output['decoder_outputs'].shape[-1]}\"\n",
" return duration_json"
]
},
{
"cell_type": "markdown",
"id": "b93aac89-c7f8-4975-8510-4e763c9689f4",
@@ -314,9 +556,82 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": null,
"id": "5a227963-aa12-43b9-a706-1168b6fc0ba5",
"metadata": {},
"outputs": [],
"source": [
"OUTPUT_FOLDER_TEMP = \"joe_fm_output_{}\"\n",
"for name in range(6):\n",
" OUTPUT_FOLDER = OUTPUT_FOLDER_TEMP.format(name)\n",
" print(OUTPUT_FOLDER)\n",
" outputs, rtfs = [], []\n",
" rtfs_w = []\n",
" for i, text in enumerate(tqdm(texts)):\n",
" seed_everything(random.randint(1, 10000))\n",
" output = synthesise(text) #, torch.tensor([15], device=device, dtype=torch.long).unsqueeze(0))\n",
" output['waveform'] = to_waveform(output['mel'], vocoder)\n",
" output['duration_json'] = save_phoneme_durations(output)\n",
" # Compute Real Time Factor (RTF) with HiFi-GAN\n",
" t = (dt.datetime.now() - output['start_t']).total_seconds()\n",
" rtf_w = t * 22050 / (output['waveform'].shape[-1])\n",
" \n",
" ## Pretty print\n",
" print(f\"{'*' * 53}\")\n",
" print(f\"Input text - {i}\")\n",
" print(f\"{'-' * 53}\")\n",
" print(output['x_orig'])\n",
" print(f\"{'*' * 53}\")\n",
" print(f\"Phonetised text - {i}\")\n",
" print(f\"{'-' * 53}\")\n",
" print(output['x_phones'])\n",
" print(f\"{'*' * 53}\")\n",
" print(f\"RTF:\\t\\t{output['rtf']:.6f}\")\n",
" print(f\"RTF Waveform:\\t{rtf_w:.6f}\")\n",
" rtfs.append(output['rtf'])\n",
" rtfs_w.append(rtf_w)\n",
" \n",
" ## Display the synthesised waveform\n",
" # plot_spectrogram(output['mel'])\n",
" # ipd.display(ipd.Audio(output['waveform'], rate=22050))\n",
" \n",
" ## Save the generated waveform\n",
" save_to_folder(i, output, OUTPUT_FOLDER)\n",
" \n",
" print(f\"Number of ODE steps: {n_timesteps}\")\n",
" print(f\"Mean RTF:\\t\\t\\t\\t{np.mean(rtfs):.6f} ± {np.std(rtfs):.6f}\")\n",
" print(f\"Mean RTF Waveform (incl. vocoder):\\t{np.mean(rtfs_w):.6f} ± {np.std(rtfs_w):.6f}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "107ff003-8a32-4a11-9294-31622b07fc3e",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "cd1f24e3-0fc8-487f-a478-46cda7ebaec9",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "d95afbb4-d0ae-4562-bbdf-e27fd99200cd",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 32,
"id": "840cf38d-978b-4114-b5bb-8963a8a97517",
"metadata": {},
"outputs": [
{
"data": {