Files
gradio-webrtc/text_to_speech_gallery/index.html
2025-06-17 12:04:47 +00:00

1108 lines
34 KiB
HTML

<!doctype html>
<html lang="en" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<link rel="canonical" href="https://fastrtc.org/text_to_speech_gallery/">
<link rel="prev" href="../advanced-configuration/">
<link rel="next" href="../speech_to_text_gallery/">
<link rel="icon" href="../fastrtc_logo.png">
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.6.14">
<title>Text-to-Speech Gallery - FastRTC</title>
<link rel="stylesheet" href="../assets/stylesheets/main.342714a4.min.css">
<link rel="stylesheet" href="../assets/stylesheets/palette.06af60db.min.css">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
<link rel="stylesheet" href="../stylesheets/extra.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github-dark.min.css">
<script>__md_scope=new URL("..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
</head>
<body dir="ltr" data-md-color-scheme="fastrtc-dark" data-md-color-primary="indigo" data-md-color-accent="indigo">
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
<a href="#how-to-add-your-own-text-to-speech-model" class="md-skip">
Skip to content
</a>
</div>
<div data-md-component="announce">
</div>
<header class="md-header md-header--shadow" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href=".." title="FastRTC"
class="md-header__button md-logo" aria-label="FastRTC" data-md-component="logo">
<img src="../fastrtc_logo.png" alt="logo">
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
FastRTC
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
Text-to-Speech Gallery
</span>
</div>
</div>
</div>
<div style="display: flex; align-items: center; margin-right: 1rem;">
<a href="https://hf.co/fastrtc" target="_blank" rel="noopener noreferrer">
<img src="/hf-logo.svg"
onerror="this.onerror=null; this.src='https://huggingface.co/datasets/freddyaboulton/bucket/resolve/main/hf-logo.svg';"
style="height: 24px; margin-right: 10px;">
</a>
<a href="https://gradio.app" target="_blank" rel="noopener noreferrer">
<img src="/gradio-logo.svg"
onerror="this.onerror=null; this.src='https://huggingface.co/datasets/freddyaboulton/bucket/resolve/main/gradio-logo.svg';"
style="height: 24px; margin-right: 10px;">
</a>
<a href="https://discord.gg/TSWU7HyaYu" target="_blank" rel="noopener noreferrer">
<img src="/Discord-Symbol-White.svg" style="height: 16px; margin-right: 10px;">
</a>
</div>
<div class="md-header__source">
<a href="https://github.com/gradio-app/fastrtc" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
</div>
<div class="md-source__repository">
fastrtc
</div>
</a>
</div>
</nav>
</header>
<div class="md-container" data-md-component="container">
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href=".." title="FastRTC" class="md-nav__button md-logo" aria-label="FastRTC" data-md-component="logo">
<img src="../fastrtc_logo.png" alt="logo">
</a>
FastRTC
</label>
<div class="md-nav__source">
<a href="https://github.com/gradio-app/fastrtc" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
</div>
<div class="md-source__repository">
fastrtc
</div>
</a>
</div>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href=".." class="md-nav__link">
<span class="md-ellipsis">
Home
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
<span class="md-ellipsis">
User Guide
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2">
<span class="md-nav__icon md-icon"></span>
User Guide
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../userguide/streams/" class="md-nav__link">
<span class="md-ellipsis">
Core Concepts
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../userguide/audio/" class="md-nav__link">
<span class="md-ellipsis">
Audio Streaming
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../userguide/video/" class="md-nav__link">
<span class="md-ellipsis">
Video Streaming
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../userguide/audio-video/" class="md-nav__link">
<span class="md-ellipsis">
Audio-Video Streaming
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../userguide/gradio/" class="md-nav__link">
<span class="md-ellipsis">
Gradio
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../userguide/api/" class="md-nav__link">
<span class="md-ellipsis">
API
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../cookbook/" class="md-nav__link">
<span class="md-ellipsis">
Cookbook
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../deployment/" class="md-nav__link">
<span class="md-ellipsis">
Deployment
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../advanced-configuration/" class="md-nav__link">
<span class="md-ellipsis">
Advanced Configuration
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6" checked>
<label class="md-nav__link" for="__nav_6" id="__nav_6_label" tabindex="0">
<span class="md-ellipsis">
Plugin Ecosystem
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_6_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_6">
<span class="md-nav__icon md-icon"></span>
Plugin Ecosystem
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--active">
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
<label class="md-nav__link md-nav__link--active" for="__toc">
<span class="md-ellipsis">
Text-to-Speech Gallery
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<a href="./" class="md-nav__link md-nav__link--active">
<span class="md-ellipsis">
Text-to-Speech Gallery
</span>
</a>
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#how-to-add-your-own-text-to-speech-model" class="md-nav__link">
<span class="md-ellipsis">
How to add your own Text-to-Speech model
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../speech_to_text_gallery/" class="md-nav__link">
<span class="md-ellipsis">
Speech-to-Text Gallery
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../turn_taking_gallery/" class="md-nav__link">
<span class="md-ellipsis">
Turn-taking Gallery
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../utils/" class="md-nav__link">
<span class="md-ellipsis">
Utils
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../faq/" class="md-nav__link">
<span class="md-ellipsis">
Frequently Asked Questions
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_9" >
<label class="md-nav__link" for="__nav_9" id="__nav_9_label" tabindex="0">
<span class="md-ellipsis">
API Reference
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_9_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_9">
<span class="md-nav__icon md-icon"></span>
API Reference
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../reference/stream/" class="md-nav__link">
<span class="md-ellipsis">
Stream
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../reference/reply_on_pause/" class="md-nav__link">
<span class="md-ellipsis">
Pause Detection Handlers
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../reference/stream_handlers/" class="md-nav__link">
<span class="md-ellipsis">
Stream Handlers
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../reference/utils/" class="md-nav__link">
<span class="md-ellipsis">
Utils
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../reference/credentials/" class="md-nav__link">
<span class="md-ellipsis">
TURN Credentials
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#how-to-add-your-own-text-to-speech-model" class="md-nav__link">
<span class="md-ellipsis">
How to add your own Text-to-Speech model
</span>
</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<article class="md-content__inner md-typeset">
<h1>Text-to-Speech Gallery</h1>
<style>
.tag-button {
cursor: pointer;
opacity: 0.5;
transition: opacity 0.2s ease;
}
.tag-button > code {
color: var(--supernova);
}
.tag-button.active {
opacity: 1;
}
</style>
<p>A collection of Text-to-Speech models ready to use with FastRTC. Click on the tags below to find the TTS model you're looking for!</p>
<div class="admonition tip">
<p class="admonition-title">Note</p>
<p>The model you want to use does not have to be in the gallery. This is just a collection of models with a common interface that are easy to "plug and play" into your FastRTC app. But You can use any model you want without having to do any special setup. Simply use it from your stream handler!</p>
</div>
<div class="tag-buttons">
<button class="tag-button" data-tag="cpu"><code>cpu</code></button>
</div>
<script>
function filterCards() {
const activeButtons = document.querySelectorAll('.tag-button.active');
const selectedTags = Array.from(activeButtons).map(button => button.getAttribute('data-tag'));
const cards = document.querySelectorAll('.grid.cards > ul > li > p[data-tags]');
cards.forEach(card => {
const cardTags = card.getAttribute('data-tags').split(',');
const shouldShow = selectedTags.length === 0 || selectedTags.some(tag => cardTags.includes(tag));
card.parentElement.style.display = shouldShow ? 'block' : 'none';
});
}
document.querySelectorAll('.tag-button').forEach(button => {
button.addEventListener('click', () => {
button.classList.toggle('active');
filterCards();
});
});
</script>
<div class="grid cards">
<ul>
<li>
<p data-tags="cpu"><img alt="🗣" class="twemoji lg middle" src="https://cdn.jsdelivr.net/gh/jdecked/twemoji@15.1.0/assets/svg/1f5e3.svg" title=":speaking_head:" /><img alt="👀" class="twemoji lg middle" src="https://cdn.jsdelivr.net/gh/jdecked/twemoji@15.1.0/assets/svg/1f440.svg" title=":eyes:" /> Orpheus.cpp</p>
<hr />
<p>Description:
A llama.cpp port of <a href="https://github.com/canopyai/Orpheus-TTS/tree/main">Orpheus</a> for fast lifelike speech synthesis on CPU!</p>
<p>Install Instructions
<div class="language-python highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="n">pip</span> <span class="n">install</span> <span class="n">orpheus</span><span class="o">-</span><span class="n">cpp</span>
</span></code></pre></div></p>
<p><video src="https://github.com/user-attachments/assets/54dfffc9-1981-4d12-b4d1-eb68ab27e5ad" controls style="text-align: center"></video></p>
<p><a href="https://github.com/freddyaboulton/orpheus-cpp"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path d="m11.28 3.22 4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.275-.326.75.75 0 0 1 .215-.734L13.94 8l-3.72-3.72a.749.749 0 0 1 .326-1.275.75.75 0 0 1 .734.215m-6.56 0a.75.75 0 0 1 1.042.018.75.75 0 0 1 .018 1.042L2.06 8l3.72 3.72a.749.749 0 0 1-.326 1.275.75.75 0 0 1-.734-.215L.47 8.53a.75.75 0 0 1 0-1.06Z"/></svg></span> Repository</a></p>
</li>
<li>
<p data-tags="pytorch"><img alt="🗣" class="twemoji lg middle" src="https://cdn.jsdelivr.net/gh/jdecked/twemoji@15.1.0/assets/svg/1f5e3.svg" title=":speaking_head:" /><img alt="👀" class="twemoji lg middle" src="https://cdn.jsdelivr.net/gh/jdecked/twemoji@15.1.0/assets/svg/1f440.svg" title=":eyes:" /> <strong>Your TTS Model</strong></p>
<hr />
<p>Description</p>
<p>Install Instructions</p>
<p>Usage</p>
<p><a href="Your demo here"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13.22 19.03a.75.75 0 0 1 0-1.06L18.19 13H3.75a.75.75 0 0 1 0-1.5h14.44l-4.97-4.97a.749.749 0 0 1 .326-1.275.75.75 0 0 1 .734.215l6.25 6.25a.75.75 0 0 1 0 1.06l-6.25 6.25a.75.75 0 0 1-1.06 0"/></svg></span> Demo</a></p>
<p><a href="Code here"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path d="m11.28 3.22 4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.275-.326.75.75 0 0 1 .215-.734L13.94 8l-3.72-3.72a.749.749 0 0 1 .326-1.275.75.75 0 0 1 .734.215m-6.56 0a.75.75 0 0 1 1.042.018.75.75 0 0 1 .018 1.042L2.06 8l3.72 3.72a.749.749 0 0 1-.326 1.275.75.75 0 0 1-.734-.215L.47 8.53a.75.75 0 0 1 0-1.06Z"/></svg></span> Repository</a></p>
</li>
</ul>
</div>
<h2 id="how-to-add-your-own-text-to-speech-model">How to add your own Text-to-Speech model</h2>
<ol>
<li>
<p>Your model can be implemented in <strong>any</strong> framework you want but it must implement the <code>TTSModel</code> protocol.</p>
<div class="language-python highlight"><pre><span></span><code><span id="__span-1-1"><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a><span class="k">class</span><span class="w"> </span><span class="nc">TTSModel</span><span class="p">(</span><span class="n">Protocol</span><span class="p">):</span>
</span><span id="__span-1-2"><a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a> <span class="k">def</span><span class="w"> </span><span class="nf">tts</span><span class="p">(</span>
</span><span id="__span-1-3"><a id="__codelineno-1-3" name="__codelineno-1-3" href="#__codelineno-1-3"></a> <span class="bp">self</span><span class="p">,</span> <span class="n">text</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">options</span><span class="p">:</span> <span class="n">TTSOptions</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span>
</span><span id="__span-1-4"><a id="__codelineno-1-4" name="__codelineno-1-4" href="#__codelineno-1-4"></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">NDArray</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">float32</span> <span class="o">|</span> <span class="n">np</span><span class="o">.</span><span class="n">int16</span><span class="p">]]:</span> <span class="o">...</span>
</span><span id="__span-1-5"><a id="__codelineno-1-5" name="__codelineno-1-5" href="#__codelineno-1-5"></a>
</span><span id="__span-1-6"><a id="__codelineno-1-6" name="__codelineno-1-6" href="#__codelineno-1-6"></a> <span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="nf">stream_tts</span><span class="p">(</span>
</span><span id="__span-1-7"><a id="__codelineno-1-7" name="__codelineno-1-7" href="#__codelineno-1-7"></a> <span class="bp">self</span><span class="p">,</span> <span class="n">text</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">options</span><span class="p">:</span> <span class="n">TTSOptions</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span>
</span><span id="__span-1-8"><a id="__codelineno-1-8" name="__codelineno-1-8" href="#__codelineno-1-8"></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">AsyncGenerator</span><span class="p">[</span><span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">NDArray</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">float32</span> <span class="o">|</span> <span class="n">np</span><span class="o">.</span><span class="n">int16</span><span class="p">]],</span> <span class="kc">None</span><span class="p">]:</span> <span class="o">...</span>
</span><span id="__span-1-9"><a id="__codelineno-1-9" name="__codelineno-1-9" href="#__codelineno-1-9"></a>
</span><span id="__span-1-10"><a id="__codelineno-1-10" name="__codelineno-1-10" href="#__codelineno-1-10"></a> <span class="k">def</span><span class="w"> </span><span class="nf">stream_tts_sync</span><span class="p">(</span>
</span><span id="__span-1-11"><a id="__codelineno-1-11" name="__codelineno-1-11" href="#__codelineno-1-11"></a> <span class="bp">self</span><span class="p">,</span> <span class="n">text</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">options</span><span class="p">:</span> <span class="n">TTSOptions</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span>
</span><span id="__span-1-12"><a id="__codelineno-1-12" name="__codelineno-1-12" href="#__codelineno-1-12"></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Generator</span><span class="p">[</span><span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">NDArray</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">float32</span> <span class="o">|</span> <span class="n">np</span><span class="o">.</span><span class="n">int16</span><span class="p">]],</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">]:</span> <span class="o">...</span>
</span></code></pre></div>
<ul>
<li>
<p>The <code>tts</code> methods should take in a string of the text to be spoken and an optional <code>TTSOptions</code>.</p>
</li>
<li>
<p>The <code>audio</code> tuple should be of the form <code>(sample_rate, audio_array)</code> where <code>sample_rate</code> is the sample rate of the audio array and <code>audio_array</code> is a numpy array of the audio data. It can be of type <code>np.int16</code> or <code>np.float32</code>.</p>
</li>
</ul>
</li>
<li>
<p>Once you have your model implemented, you can use it in your handler!</p>
<div class="language-python highlight"><pre><span></span><code><span id="__span-2-1"><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">fastrtc</span><span class="w"> </span><span class="kn">import</span> <span class="n">Stream</span><span class="p">,</span> <span class="n">AdditionalOutputs</span><span class="p">,</span> <span class="n">ReplyOnPause</span><span class="p">,</span> <span class="n">get_stt_model</span>
</span><span id="__span-2-2"><a id="__codelineno-2-2" name="__codelineno-2-2" href="#__codelineno-2-2"></a><span class="kn">from</span><span class="w"> </span><span class="nn">your_model</span><span class="w"> </span><span class="kn">import</span> <span class="n">YourModel</span>
</span><span id="__span-2-3"><a id="__codelineno-2-3" name="__codelineno-2-3" href="#__codelineno-2-3"></a>
</span><span id="__span-2-4"><a id="__codelineno-2-4" name="__codelineno-2-4" href="#__codelineno-2-4"></a><span class="n">model</span> <span class="o">=</span> <span class="n">YourModel</span><span class="p">()</span> <span class="c1"># implement the TTSModel protocol</span>
</span><span id="__span-2-5"><a id="__codelineno-2-5" name="__codelineno-2-5" href="#__codelineno-2-5"></a><span class="n">options</span> <span class="o">=</span> <span class="n">YourTTSOptions</span><span class="p">()</span> <span class="c1"># implement the TTSOptions protocol</span>
</span><span id="__span-2-6"><a id="__codelineno-2-6" name="__codelineno-2-6" href="#__codelineno-2-6"></a><span class="n">stt_model</span> <span class="o">=</span> <span class="n">get_stt_model</span><span class="p">(</span><span class="n">model</span><span class="p">)</span>
</span><span id="__span-2-7"><a id="__codelineno-2-7" name="__codelineno-2-7" href="#__codelineno-2-7"></a>
</span><span id="__span-2-8"><a id="__codelineno-2-8" name="__codelineno-2-8" href="#__codelineno-2-8"></a><span class="k">def</span><span class="w"> </span><span class="nf">echo</span><span class="p">(</span><span class="n">audio</span><span class="p">):</span>
</span><span id="__span-2-9"><a id="__codelineno-2-9" name="__codelineno-2-9" href="#__codelineno-2-9"></a> <span class="n">text</span> <span class="o">=</span> <span class="n">stt_model</span><span class="o">.</span><span class="n">tts</span><span class="p">(</span><span class="n">audio</span><span class="p">)</span>
</span><span id="__span-2-10"><a id="__codelineno-2-10" name="__codelineno-2-10" href="#__codelineno-2-10"></a> <span class="k">for</span> <span class="n">chunk</span> <span class="ow">in</span> <span class="n">model</span><span class="o">.</span><span class="n">stream_tts</span><span class="p">(</span><span class="n">text</span><span class="p">,</span> <span class="n">options</span><span class="p">):</span>
</span><span id="__span-2-11"><a id="__codelineno-2-11" name="__codelineno-2-11" href="#__codelineno-2-11"></a> <span class="k">yield</span> <span class="n">chunk</span>
</span><span id="__span-2-12"><a id="__codelineno-2-12" name="__codelineno-2-12" href="#__codelineno-2-12"></a>
</span><span id="__span-2-13"><a id="__codelineno-2-13" name="__codelineno-2-13" href="#__codelineno-2-13"></a><span class="n">stream</span> <span class="o">=</span> <span class="n">Stream</span><span class="p">(</span><span class="n">ReplyOnPause</span><span class="p">(</span><span class="n">echo</span><span class="p">),</span> <span class="n">mode</span><span class="o">=</span><span class="s2">&quot;send-receive&quot;</span><span class="p">,</span> <span class="n">modality</span><span class="o">=</span><span class="s2">&quot;audio&quot;</span><span class="p">,</span>
</span><span id="__span-2-14"><a id="__codelineno-2-14" name="__codelineno-2-14" href="#__codelineno-2-14"></a> <span class="n">additional_outputs</span><span class="o">=</span><span class="p">[</span><span class="n">gr</span><span class="o">.</span><span class="n">Textbox</span><span class="p">(</span><span class="n">label</span><span class="o">=</span><span class="s2">&quot;Transcription&quot;</span><span class="p">)],</span>
</span><span id="__span-2-15"><a id="__codelineno-2-15" name="__codelineno-2-15" href="#__codelineno-2-15"></a> <span class="n">additional_outputs_handler</span><span class="o">=</span><span class="k">lambda</span> <span class="n">old</span><span class="p">,</span><span class="n">new</span><span class="p">:</span><span class="n">old</span> <span class="o">+</span> <span class="n">new</span><span class="p">)</span>
</span><span id="__span-2-16"><a id="__codelineno-2-16" name="__codelineno-2-16" href="#__codelineno-2-16"></a><span class="n">stream</span><span class="o">.</span><span class="n">ui</span><span class="o">.</span><span class="n">launch</span><span class="p">()</span>
</span></code></pre></div>
</li>
<li>
<p>Open a <a href="https://github.com/freddyaboulton/fastrtc/edit/main/docs/text_to_speech_gallery.md">PR</a> to add your model to the gallery! Ideally your model package should be pip installable so others can try it out easily.</p>
</li>
</ol>
</article>
</div>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
</main>
<footer class="md-footer">
<div class="md-footer-meta md-typeset">
<div class="md-footer-meta__inner md-grid">
<div class="md-copyright">
Made with
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
Material for MkDocs
</a>
</div>
</div>
</div>
</footer>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<script id="__config" type="application/json">{"base": "..", "features": ["content.code.copy", "content.code.annotate", "navigation.indexes"], "search": "../assets/javascripts/workers/search.d50fe291.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script src="../assets/javascripts/bundle.13a4f30d.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
</body>
</html>