better default speaking rate

This commit is contained in:
Shivam Mehta
2023-09-20 15:23:46 +00:00
parent db95158043
commit 281a098337

View File

@@ -74,12 +74,21 @@ def load_model_ui(model_type, textbox):
spk_slider = gr.update(visible=False, value=-1) spk_slider = gr.update(visible=False, value=-1)
single_speaker_examples = gr.update(visible=True) single_speaker_examples = gr.update(visible=True)
multi_speaker_examples = gr.update(visible=False) multi_speaker_examples = gr.update(visible=False)
length_scale = gr.update(value=0.95)
else: else:
spk_slider = gr.update(visible=True, value=0) spk_slider = gr.update(visible=True, value=0)
single_speaker_examples = gr.update(visible=False) single_speaker_examples = gr.update(visible=False)
multi_speaker_examples = gr.update(visible=True) multi_speaker_examples = gr.update(visible=True)
length_scale = gr.update(value=0.85)
return textbox, gr.update(interactive=True), spk_slider, single_speaker_examples, multi_speaker_examples return (
textbox,
gr.update(interactive=True),
spk_slider,
single_speaker_examples,
multi_speaker_examples,
length_scale,
)
@torch.inference_mode() @torch.inference_mode()
@@ -220,43 +229,43 @@ def main():
"We propose Matcha-TTS, a new approach to non-autoregressive neural TTS, that uses conditional flow matching (similar to rectified flows) to speed up O D E-based speech synthesis.", "We propose Matcha-TTS, a new approach to non-autoregressive neural TTS, that uses conditional flow matching (similar to rectified flows) to speed up O D E-based speech synthesis.",
50, 50,
0.677, 0.677,
1.0, 0.95,
], ],
[ [
"The Secret Service believed that it was very doubtful that any President would ride regularly in a vehicle with a fixed top, even though transparent.", "The Secret Service believed that it was very doubtful that any President would ride regularly in a vehicle with a fixed top, even though transparent.",
2, 2,
0.677, 0.677,
1.0, 0.95,
], ],
[ [
"The Secret Service believed that it was very doubtful that any President would ride regularly in a vehicle with a fixed top, even though transparent.", "The Secret Service believed that it was very doubtful that any President would ride regularly in a vehicle with a fixed top, even though transparent.",
4, 4,
0.677, 0.677,
1.0, 0.95,
], ],
[ [
"The Secret Service believed that it was very doubtful that any President would ride regularly in a vehicle with a fixed top, even though transparent.", "The Secret Service believed that it was very doubtful that any President would ride regularly in a vehicle with a fixed top, even though transparent.",
10, 10,
0.677, 0.677,
1.0, 0.95,
], ],
[ [
"The Secret Service believed that it was very doubtful that any President would ride regularly in a vehicle with a fixed top, even though transparent.", "The Secret Service believed that it was very doubtful that any President would ride regularly in a vehicle with a fixed top, even though transparent.",
50, 50,
0.677, 0.677,
1.0, 0.95,
], ],
[ [
"The narrative of these events is based largely on the recollections of the participants.", "The narrative of these events is based largely on the recollections of the participants.",
10, 10,
0.677, 0.677,
1.0, 0.95,
], ],
[ [
"The jury did not believe him, and the verdict was for the defendants.", "The jury did not believe him, and the verdict was for the defendants.",
10, 10,
0.677, 0.677,
1.0, 0.95,
], ],
], ],
fn=ljspeech_example_cacher, fn=ljspeech_example_cacher,
@@ -272,35 +281,35 @@ def main():
"Hello everyone! I am speaker 0 and I am here to tell you that Matcha-TTS is amazing!", "Hello everyone! I am speaker 0 and I am here to tell you that Matcha-TTS is amazing!",
10, 10,
0.677, 0.677,
1.0, 0.85,
0, 0,
], ],
[ [
"Hello everyone! I am speaker 16 and I am here to tell you that Matcha-TTS is amazing!", "Hello everyone! I am speaker 16 and I am here to tell you that Matcha-TTS is amazing!",
10, 10,
0.677, 0.677,
1.0, 0.85,
16, 16,
], ],
[ [
"Hello everyone! I am speaker 44 and I am here to tell you that Matcha-TTS is amazing!", "Hello everyone! I am speaker 44 and I am here to tell you that Matcha-TTS is amazing!",
50, 50,
0.677, 0.677,
1.0, 0.85,
44, 44,
], ],
[ [
"Hello everyone! I am speaker 45 and I am here to tell you that Matcha-TTS is amazing!", "Hello everyone! I am speaker 45 and I am here to tell you that Matcha-TTS is amazing!",
50, 50,
0.677, 0.677,
1.0, 0.85,
45, 45,
], ],
[ [
"Hello everyone! I am speaker 58 and I am here to tell you that Matcha-TTS is amazing!", "Hello everyone! I am speaker 58 and I am here to tell you that Matcha-TTS is amazing!",
4, 4,
0.677, 0.677,
1.0, 0.85,
58, 58,
], ],
], ],
@@ -314,7 +323,7 @@ def main():
model_type.change(lambda x: gr.update(interactive=False), inputs=[synth_btn], outputs=[synth_btn]).then( model_type.change(lambda x: gr.update(interactive=False), inputs=[synth_btn], outputs=[synth_btn]).then(
load_model_ui, load_model_ui,
inputs=[model_type, text], inputs=[model_type, text],
outputs=[text, synth_btn, spk_slider, example_row_lj_speech, example_row_multispeaker], outputs=[text, synth_btn, spk_slider, example_row_lj_speech, example_row_multispeaker, length_scale],
) )
synth_btn.click( synth_btn.click(