diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 858018e..0000000 --- a/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2023 Shivam Mehta - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..12335a4 --- /dev/null +++ b/README.md @@ -0,0 +1,312 @@ +# Matcha-TTS: A fast TTS architecture with conditional flow matching + + + + + + + + + + + + + + + + + + +##### [Shivam Mehta][shivam_profile], [Ruibo Tu][ruibo_profile], [Jonas Beskow][jonas_profile], [Éva Székely][eva_profile], and [Gustav Eje Henter][gustav_profile] + +We propose Matcha-TTS, a new approach to non-autoregressive neural TTS, that uses conditional flow matching to speed up ODE-based speech synthesis. Our method: + +- Is probabilistic +- Has compact memory footprint +- Sounds highly natural +- Is very fast to synthesise from + +Please check out the audio examples below and read our arXiv preprint for more details. +Code and pre-trained models will be made available shortly after the ICASSP deadline. + +[shivam_profile]: https://www.kth.se/profile/smehta +[ruibo_profile]: https://www.kth.se/profile/ruibo +[jonas_profile]: https://www.kth.se/profile/beskow +[eva_profile]: https://www.kth.se/profile/szekely +[gustav_profile]: https://people.kth.se/~ghe/ +[this_page]: https://shivammehta25.github.io/Diff-TTSG/ + + + + + +## Architecture + +Architecture of OverFlow + + + +## Stimuli from the evaluation test + +Currently loaded => MAT-10 : Sentence 1 + +
+

+ It had established periodic regular review of the status of four hundred individuals; +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ArchitectureConditionSentence 1Sentence 2Sentence 3Sentence 4Sentence 5Sentence 6
VocodedVOC
Matcha-TTSMAT-10
MAT-4
MAT-2
Grad-TTSGRAD-10
GRAD-4
Grad-TTS+CFMGCFM-4
FastSpeechFS2
VITSVITS
+ + diff --git a/_config.yaml b/_config.yaml new file mode 100644 index 0000000..7adfc4f --- /dev/null +++ b/_config.yaml @@ -0,0 +1,4 @@ +title: Matcha-TTS +theme: jekyll-theme-dinky +description: A fast TTS architecture with conditional flow matching +show_downloads: False diff --git a/favicon.ico b/favicon.ico new file mode 100644 index 0000000..f6b79b5 Binary files /dev/null and b/favicon.ico differ diff --git a/images/architecture.png b/images/architecture.png new file mode 100644 index 0000000..21ba93e Binary files /dev/null and b/images/architecture.png differ diff --git a/images/play_button.png b/images/play_button.png new file mode 100644 index 0000000..5ede930 Binary files /dev/null and b/images/play_button.png differ diff --git a/stimuli/sample_from_test/FS2_1.wav b/stimuli/sample_from_test/FS2_1.wav new file mode 100644 index 0000000..1734455 Binary files /dev/null and b/stimuli/sample_from_test/FS2_1.wav differ diff --git a/stimuli/sample_from_test/FS2_2.wav b/stimuli/sample_from_test/FS2_2.wav new file mode 100644 index 0000000..34f4906 Binary files /dev/null and b/stimuli/sample_from_test/FS2_2.wav differ diff --git a/stimuli/sample_from_test/FS2_3.wav b/stimuli/sample_from_test/FS2_3.wav new file mode 100644 index 0000000..41d17bf Binary files /dev/null and b/stimuli/sample_from_test/FS2_3.wav differ diff --git a/stimuli/sample_from_test/FS2_4.wav b/stimuli/sample_from_test/FS2_4.wav new file mode 100644 index 0000000..24b0bbb Binary files /dev/null and b/stimuli/sample_from_test/FS2_4.wav differ diff --git a/stimuli/sample_from_test/FS2_5.wav b/stimuli/sample_from_test/FS2_5.wav new file mode 100644 index 0000000..947c940 Binary files /dev/null and b/stimuli/sample_from_test/FS2_5.wav differ diff --git a/stimuli/sample_from_test/FS2_6.wav b/stimuli/sample_from_test/FS2_6.wav new file mode 100644 index 0000000..841fa9d Binary files /dev/null and b/stimuli/sample_from_test/FS2_6.wav differ diff --git a/stimuli/sample_from_test/GCFM-4_1.wav b/stimuli/sample_from_test/GCFM-4_1.wav new file mode 100644 index 0000000..760b1b3 Binary files /dev/null and b/stimuli/sample_from_test/GCFM-4_1.wav differ diff --git a/stimuli/sample_from_test/GCFM-4_2.wav b/stimuli/sample_from_test/GCFM-4_2.wav new file mode 100644 index 0000000..9ccbba5 Binary files /dev/null and b/stimuli/sample_from_test/GCFM-4_2.wav differ diff --git a/stimuli/sample_from_test/GCFM-4_3.wav b/stimuli/sample_from_test/GCFM-4_3.wav new file mode 100644 index 0000000..ac3e5f0 Binary files /dev/null and b/stimuli/sample_from_test/GCFM-4_3.wav differ diff --git a/stimuli/sample_from_test/GCFM-4_4.wav b/stimuli/sample_from_test/GCFM-4_4.wav new file mode 100644 index 0000000..bdcc208 Binary files /dev/null and b/stimuli/sample_from_test/GCFM-4_4.wav differ diff --git a/stimuli/sample_from_test/GCFM-4_5.wav b/stimuli/sample_from_test/GCFM-4_5.wav new file mode 100644 index 0000000..17fdf85 Binary files /dev/null and b/stimuli/sample_from_test/GCFM-4_5.wav differ diff --git a/stimuli/sample_from_test/GCFM-4_6.wav b/stimuli/sample_from_test/GCFM-4_6.wav new file mode 100644 index 0000000..d530170 Binary files /dev/null and b/stimuli/sample_from_test/GCFM-4_6.wav differ diff --git a/stimuli/sample_from_test/GRAD-10_1.wav b/stimuli/sample_from_test/GRAD-10_1.wav new file mode 100644 index 0000000..d85a1e6 Binary files /dev/null and b/stimuli/sample_from_test/GRAD-10_1.wav differ diff --git a/stimuli/sample_from_test/GRAD-10_2.wav b/stimuli/sample_from_test/GRAD-10_2.wav new file mode 100644 index 0000000..49d5069 Binary files /dev/null and b/stimuli/sample_from_test/GRAD-10_2.wav differ diff --git a/stimuli/sample_from_test/GRAD-10_3.wav b/stimuli/sample_from_test/GRAD-10_3.wav new file mode 100644 index 0000000..3fa5f49 Binary files /dev/null and b/stimuli/sample_from_test/GRAD-10_3.wav differ diff --git a/stimuli/sample_from_test/GRAD-10_4.wav b/stimuli/sample_from_test/GRAD-10_4.wav new file mode 100644 index 0000000..d5372f4 Binary files /dev/null and b/stimuli/sample_from_test/GRAD-10_4.wav differ diff --git a/stimuli/sample_from_test/GRAD-10_5.wav b/stimuli/sample_from_test/GRAD-10_5.wav new file mode 100644 index 0000000..ce3b744 Binary files /dev/null and b/stimuli/sample_from_test/GRAD-10_5.wav differ diff --git a/stimuli/sample_from_test/GRAD-10_6.wav b/stimuli/sample_from_test/GRAD-10_6.wav new file mode 100644 index 0000000..1328840 Binary files /dev/null and b/stimuli/sample_from_test/GRAD-10_6.wav differ diff --git a/stimuli/sample_from_test/GRAD-4_1.wav b/stimuli/sample_from_test/GRAD-4_1.wav new file mode 100644 index 0000000..00061b2 Binary files /dev/null and b/stimuli/sample_from_test/GRAD-4_1.wav differ diff --git a/stimuli/sample_from_test/GRAD-4_2.wav b/stimuli/sample_from_test/GRAD-4_2.wav new file mode 100644 index 0000000..b0e5282 Binary files /dev/null and b/stimuli/sample_from_test/GRAD-4_2.wav differ diff --git a/stimuli/sample_from_test/GRAD-4_3.wav b/stimuli/sample_from_test/GRAD-4_3.wav new file mode 100644 index 0000000..1ed861c Binary files /dev/null and b/stimuli/sample_from_test/GRAD-4_3.wav differ diff --git a/stimuli/sample_from_test/GRAD-4_4.wav b/stimuli/sample_from_test/GRAD-4_4.wav new file mode 100644 index 0000000..74cda1c Binary files /dev/null and b/stimuli/sample_from_test/GRAD-4_4.wav differ diff --git a/stimuli/sample_from_test/GRAD-4_5.wav b/stimuli/sample_from_test/GRAD-4_5.wav new file mode 100644 index 0000000..00b3513 Binary files /dev/null and b/stimuli/sample_from_test/GRAD-4_5.wav differ diff --git a/stimuli/sample_from_test/GRAD-4_6.wav b/stimuli/sample_from_test/GRAD-4_6.wav new file mode 100644 index 0000000..c05ef30 Binary files /dev/null and b/stimuli/sample_from_test/GRAD-4_6.wav differ diff --git a/stimuli/sample_from_test/MAT-10_1.wav b/stimuli/sample_from_test/MAT-10_1.wav new file mode 100644 index 0000000..0def2a5 Binary files /dev/null and b/stimuli/sample_from_test/MAT-10_1.wav differ diff --git a/stimuli/sample_from_test/MAT-10_2.wav b/stimuli/sample_from_test/MAT-10_2.wav new file mode 100644 index 0000000..48c0e7c Binary files /dev/null and b/stimuli/sample_from_test/MAT-10_2.wav differ diff --git a/stimuli/sample_from_test/MAT-10_3.wav b/stimuli/sample_from_test/MAT-10_3.wav new file mode 100644 index 0000000..4f4167f Binary files /dev/null and b/stimuli/sample_from_test/MAT-10_3.wav differ diff --git a/stimuli/sample_from_test/MAT-10_4.wav b/stimuli/sample_from_test/MAT-10_4.wav new file mode 100644 index 0000000..3bea975 Binary files /dev/null and b/stimuli/sample_from_test/MAT-10_4.wav differ diff --git a/stimuli/sample_from_test/MAT-10_5.wav b/stimuli/sample_from_test/MAT-10_5.wav new file mode 100644 index 0000000..6fdd71f Binary files /dev/null and b/stimuli/sample_from_test/MAT-10_5.wav differ diff --git a/stimuli/sample_from_test/MAT-10_6.wav b/stimuli/sample_from_test/MAT-10_6.wav new file mode 100644 index 0000000..4e01d24 Binary files /dev/null and b/stimuli/sample_from_test/MAT-10_6.wav differ diff --git a/stimuli/sample_from_test/MAT-2_1.wav b/stimuli/sample_from_test/MAT-2_1.wav new file mode 100644 index 0000000..bb08513 Binary files /dev/null and b/stimuli/sample_from_test/MAT-2_1.wav differ diff --git a/stimuli/sample_from_test/MAT-2_2.wav b/stimuli/sample_from_test/MAT-2_2.wav new file mode 100644 index 0000000..4705325 Binary files /dev/null and b/stimuli/sample_from_test/MAT-2_2.wav differ diff --git a/stimuli/sample_from_test/MAT-2_3.wav b/stimuli/sample_from_test/MAT-2_3.wav new file mode 100644 index 0000000..5fee48d Binary files /dev/null and b/stimuli/sample_from_test/MAT-2_3.wav differ diff --git a/stimuli/sample_from_test/MAT-2_4.wav b/stimuli/sample_from_test/MAT-2_4.wav new file mode 100644 index 0000000..102c77c Binary files /dev/null and b/stimuli/sample_from_test/MAT-2_4.wav differ diff --git a/stimuli/sample_from_test/MAT-2_5.wav b/stimuli/sample_from_test/MAT-2_5.wav new file mode 100644 index 0000000..464f274 Binary files /dev/null and b/stimuli/sample_from_test/MAT-2_5.wav differ diff --git a/stimuli/sample_from_test/MAT-2_6.wav b/stimuli/sample_from_test/MAT-2_6.wav new file mode 100644 index 0000000..56dbe95 Binary files /dev/null and b/stimuli/sample_from_test/MAT-2_6.wav differ diff --git a/stimuli/sample_from_test/MAT-4_1.wav b/stimuli/sample_from_test/MAT-4_1.wav new file mode 100644 index 0000000..bbc2d33 Binary files /dev/null and b/stimuli/sample_from_test/MAT-4_1.wav differ diff --git a/stimuli/sample_from_test/MAT-4_2.wav b/stimuli/sample_from_test/MAT-4_2.wav new file mode 100644 index 0000000..6155332 Binary files /dev/null and b/stimuli/sample_from_test/MAT-4_2.wav differ diff --git a/stimuli/sample_from_test/MAT-4_3.wav b/stimuli/sample_from_test/MAT-4_3.wav new file mode 100644 index 0000000..5952715 Binary files /dev/null and b/stimuli/sample_from_test/MAT-4_3.wav differ diff --git a/stimuli/sample_from_test/MAT-4_4.wav b/stimuli/sample_from_test/MAT-4_4.wav new file mode 100644 index 0000000..1cbfa4f Binary files /dev/null and b/stimuli/sample_from_test/MAT-4_4.wav differ diff --git a/stimuli/sample_from_test/MAT-4_5.wav b/stimuli/sample_from_test/MAT-4_5.wav new file mode 100644 index 0000000..afc2ed7 Binary files /dev/null and b/stimuli/sample_from_test/MAT-4_5.wav differ diff --git a/stimuli/sample_from_test/MAT-4_6.wav b/stimuli/sample_from_test/MAT-4_6.wav new file mode 100644 index 0000000..6bbad54 Binary files /dev/null and b/stimuli/sample_from_test/MAT-4_6.wav differ diff --git a/stimuli/sample_from_test/VITS_1.wav b/stimuli/sample_from_test/VITS_1.wav new file mode 100644 index 0000000..462343a Binary files /dev/null and b/stimuli/sample_from_test/VITS_1.wav differ diff --git a/stimuli/sample_from_test/VITS_2.wav b/stimuli/sample_from_test/VITS_2.wav new file mode 100644 index 0000000..c722b2f Binary files /dev/null and b/stimuli/sample_from_test/VITS_2.wav differ diff --git a/stimuli/sample_from_test/VITS_3.wav b/stimuli/sample_from_test/VITS_3.wav new file mode 100644 index 0000000..c1211ec Binary files /dev/null and b/stimuli/sample_from_test/VITS_3.wav differ diff --git a/stimuli/sample_from_test/VITS_4.wav b/stimuli/sample_from_test/VITS_4.wav new file mode 100644 index 0000000..1dca652 Binary files /dev/null and b/stimuli/sample_from_test/VITS_4.wav differ diff --git a/stimuli/sample_from_test/VITS_5.wav b/stimuli/sample_from_test/VITS_5.wav new file mode 100644 index 0000000..20f5fd3 Binary files /dev/null and b/stimuli/sample_from_test/VITS_5.wav differ diff --git a/stimuli/sample_from_test/VITS_6.wav b/stimuli/sample_from_test/VITS_6.wav new file mode 100644 index 0000000..0bf6a99 Binary files /dev/null and b/stimuli/sample_from_test/VITS_6.wav differ diff --git a/stimuli/sample_from_test/VOC_1.wav b/stimuli/sample_from_test/VOC_1.wav new file mode 100644 index 0000000..44930e8 Binary files /dev/null and b/stimuli/sample_from_test/VOC_1.wav differ diff --git a/stimuli/sample_from_test/VOC_2.wav b/stimuli/sample_from_test/VOC_2.wav new file mode 100644 index 0000000..bbd9f4f Binary files /dev/null and b/stimuli/sample_from_test/VOC_2.wav differ diff --git a/stimuli/sample_from_test/VOC_3.wav b/stimuli/sample_from_test/VOC_3.wav new file mode 100644 index 0000000..7a7462f Binary files /dev/null and b/stimuli/sample_from_test/VOC_3.wav differ diff --git a/stimuli/sample_from_test/VOC_4.wav b/stimuli/sample_from_test/VOC_4.wav new file mode 100644 index 0000000..17dcb6a Binary files /dev/null and b/stimuli/sample_from_test/VOC_4.wav differ diff --git a/stimuli/sample_from_test/VOC_5.wav b/stimuli/sample_from_test/VOC_5.wav new file mode 100644 index 0000000..68c6406 Binary files /dev/null and b/stimuli/sample_from_test/VOC_5.wav differ diff --git a/stimuli/sample_from_test/VOC_6.wav b/stimuli/sample_from_test/VOC_6.wav new file mode 100644 index 0000000..5b89eb9 Binary files /dev/null and b/stimuli/sample_from_test/VOC_6.wav differ