mirror of
https://github.com/HumanAIGC-Engineering/gradio-webrtc.git
synced 2026-02-05 01:49:23 +08:00
1714 lines
93 KiB
HTML
1714 lines
93 KiB
HTML
|
|
<!doctype html>
|
|
<html lang="en" class="no-js">
|
|
<head>
|
|
|
|
<meta charset="utf-8">
|
|
<meta name="viewport" content="width=device-width,initial-scale=1">
|
|
|
|
|
|
|
|
<link rel="canonical" href="https://fastrtc.org/userguide/audio/">
|
|
|
|
|
|
<link rel="prev" href="../streams/">
|
|
|
|
|
|
<link rel="next" href="../video/">
|
|
|
|
|
|
<link rel="icon" href="../../fastrtc_logo.png">
|
|
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.6.14">
|
|
|
|
|
|
|
|
<title>Audio Streaming - FastRTC</title>
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../../assets/stylesheets/main.342714a4.min.css">
|
|
|
|
|
|
<link rel="stylesheet" href="../../assets/stylesheets/palette.06af60db.min.css">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
|
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../../stylesheets/extra.css">
|
|
|
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github-dark.min.css">
|
|
|
|
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</head>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<body dir="ltr" data-md-color-scheme="fastrtc-dark" data-md-color-primary="indigo" data-md-color-accent="indigo">
|
|
|
|
|
|
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
|
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
|
<label class="md-overlay" for="__drawer"></label>
|
|
<div data-md-component="skip">
|
|
|
|
|
|
<a href="#reply-on-pause" class="md-skip">
|
|
Skip to content
|
|
</a>
|
|
|
|
</div>
|
|
<div data-md-component="announce">
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<header class="md-header md-header--shadow" data-md-component="header">
|
|
<nav class="md-header__inner md-grid" aria-label="Header">
|
|
<a href="../.." title="FastRTC"
|
|
class="md-header__button md-logo" aria-label="FastRTC" data-md-component="logo">
|
|
|
|
<img src="../../fastrtc_logo.png" alt="logo">
|
|
|
|
</a>
|
|
<label class="md-header__button md-icon" for="__drawer">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
|
|
</label>
|
|
<div class="md-header__title" data-md-component="header-title">
|
|
<div class="md-header__ellipsis">
|
|
<div class="md-header__topic">
|
|
<span class="md-ellipsis">
|
|
FastRTC
|
|
</span>
|
|
</div>
|
|
<div class="md-header__topic" data-md-component="header-topic">
|
|
<span class="md-ellipsis">
|
|
|
|
Audio Streaming
|
|
|
|
</span>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<div style="display: flex; align-items: center; margin-right: 1rem;">
|
|
<a href="https://hf.co/fastrtc" target="_blank" rel="noopener noreferrer">
|
|
<img src="/hf-logo.svg"
|
|
onerror="this.onerror=null; this.src='https://huggingface.co/datasets/freddyaboulton/bucket/resolve/main/hf-logo.svg';"
|
|
style="height: 24px; margin-right: 10px;">
|
|
</a>
|
|
<a href="https://gradio.app" target="_blank" rel="noopener noreferrer">
|
|
<img src="/gradio-logo.svg"
|
|
onerror="this.onerror=null; this.src='https://huggingface.co/datasets/freddyaboulton/bucket/resolve/main/gradio-logo.svg';"
|
|
style="height: 24px; margin-right: 10px;">
|
|
</a>
|
|
<a href="https://discord.gg/TSWU7HyaYu" target="_blank" rel="noopener noreferrer">
|
|
<img src="/Discord-Symbol-White.svg" style="height: 16px; margin-right: 10px;">
|
|
</a>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="md-header__source">
|
|
<a href="https://github.com/gradio-app/fastrtc" title="Go to repository" class="md-source" data-md-component="source">
|
|
<div class="md-source__icon md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
|
|
</div>
|
|
<div class="md-source__repository">
|
|
fastrtc
|
|
</div>
|
|
</a>
|
|
</div>
|
|
|
|
</nav>
|
|
|
|
</header>
|
|
|
|
<div class="md-container" data-md-component="container">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<main class="md-main" data-md-component="main">
|
|
<div class="md-main__inner md-grid">
|
|
|
|
|
|
|
|
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
|
<div class="md-sidebar__scrollwrap">
|
|
<div class="md-sidebar__inner">
|
|
|
|
|
|
|
|
|
|
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
|
<label class="md-nav__title" for="__drawer">
|
|
<a href="../.." title="FastRTC" class="md-nav__button md-logo" aria-label="FastRTC" data-md-component="logo">
|
|
|
|
<img src="../../fastrtc_logo.png" alt="logo">
|
|
|
|
</a>
|
|
FastRTC
|
|
</label>
|
|
|
|
<div class="md-nav__source">
|
|
<a href="https://github.com/gradio-app/fastrtc" title="Go to repository" class="md-source" data-md-component="source">
|
|
<div class="md-source__icon md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
|
|
</div>
|
|
<div class="md-source__repository">
|
|
fastrtc
|
|
</div>
|
|
</a>
|
|
</div>
|
|
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../.." class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Home
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" checked>
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
User Guide
|
|
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="true">
|
|
<label class="md-nav__title" for="__nav_2">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
User Guide
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../streams/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Core Concepts
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--active">
|
|
|
|
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
|
|
|
|
|
|
|
|
<label class="md-nav__link md-nav__link--active" for="__toc">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Audio Streaming
|
|
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<a href="./" class="md-nav__link md-nav__link--active">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Audio Streaming
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
|
|
|
|
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|
|
|
|
|
|
|
|
|
<label class="md-nav__title" for="__toc">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Table of contents
|
|
</label>
|
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#reply-on-pause" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Reply On Pause
|
|
</span>
|
|
</a>
|
|
|
|
<nav class="md-nav" aria-label="Reply On Pause">
|
|
<ul class="md-nav__list">
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#interruptions" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Interruptions
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#startup-function" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Startup Function
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#reply-on-stopwords" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Reply On Stopwords
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#stream-handler" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Stream Handler
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#async-stream-handlers" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Async Stream Handlers
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#text-to-speech" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Text To Speech
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#speech-to-text" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Speech To Text
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#requesting-inputs" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Requesting Inputs
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#considerations-for-telephone-use" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Considerations for Telephone Use
|
|
</span>
|
|
</a>
|
|
|
|
<nav class="md-nav" aria-label="Considerations for Telephone Use">
|
|
<ul class="md-nav__list">
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#replyonpause-and-telephone-use" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
ReplyOnPause and telephone use
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#telephone-integration" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Telephone Integration
|
|
</span>
|
|
</a>
|
|
|
|
<nav class="md-nav" aria-label="Telephone Integration">
|
|
<ul class="md-nav__list">
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#setup-process" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Setup Process
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#configuring-twilio" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Configuring Twilio
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#code-example" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Code Example
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#outbound-calls-with-twilio" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Outbound calls with Twilio
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../video/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Video Streaming
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../audio-video/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Audio-Video Streaming
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../gradio/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Gradio
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../api/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
API
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../cookbook/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Cookbook
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../deployment/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Deployment
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../advanced-configuration/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Advanced Configuration
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_6" id="__nav_6_label" tabindex="0">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Plugin Ecosystem
|
|
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_6_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_6">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Plugin Ecosystem
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../text_to_speech_gallery/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Text-to-Speech Gallery
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../speech_to_text_gallery/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Speech-to-Text Gallery
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../turn_taking_gallery/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Turn-taking Gallery
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../utils/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Utils
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../faq/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Frequently Asked Questions
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_9" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_9" id="__nav_9_label" tabindex="0">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
API Reference
|
|
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_9_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_9">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
API Reference
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../reference/stream/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Stream
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../reference/reply_on_pause/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Pause Detection Handlers
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../reference/stream_handlers/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Stream Handlers
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../reference/utils/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Utils
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../reference/credentials/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
TURN Credentials
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
|
|
<div class="md-sidebar__scrollwrap">
|
|
<div class="md-sidebar__inner">
|
|
|
|
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|
|
|
|
|
|
|
|
|
<label class="md-nav__title" for="__toc">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Table of contents
|
|
</label>
|
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#reply-on-pause" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Reply On Pause
|
|
</span>
|
|
</a>
|
|
|
|
<nav class="md-nav" aria-label="Reply On Pause">
|
|
<ul class="md-nav__list">
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#interruptions" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Interruptions
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#startup-function" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Startup Function
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#reply-on-stopwords" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Reply On Stopwords
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#stream-handler" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Stream Handler
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#async-stream-handlers" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Async Stream Handlers
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#text-to-speech" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Text To Speech
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#speech-to-text" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Speech To Text
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#requesting-inputs" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Requesting Inputs
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#considerations-for-telephone-use" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Considerations for Telephone Use
|
|
</span>
|
|
</a>
|
|
|
|
<nav class="md-nav" aria-label="Considerations for Telephone Use">
|
|
<ul class="md-nav__list">
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#replyonpause-and-telephone-use" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
ReplyOnPause and telephone use
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#telephone-integration" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Telephone Integration
|
|
</span>
|
|
</a>
|
|
|
|
<nav class="md-nav" aria-label="Telephone Integration">
|
|
<ul class="md-nav__list">
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#setup-process" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Setup Process
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#configuring-twilio" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Configuring Twilio
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#code-example" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Code Example
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#outbound-calls-with-twilio" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Outbound calls with Twilio
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="md-content" data-md-component="content">
|
|
<article class="md-content__inner md-typeset">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<h1>Audio Streaming</h1>
|
|
|
|
<h2 id="reply-on-pause">Reply On Pause</h2>
|
|
<p>Typically, you want to run a python function whenever a user has stopped speaking. This can be done by wrapping a python generator with the <code>ReplyOnPause</code> class and passing it to the <code>handler</code> argument of the <code>Stream</code> object. The <code>ReplyOnPause</code> class will handle the voice detection and turn taking logic automatically!</p>
|
|
<div class="tabbed-set tabbed-alternate" data-tabs="1:2"><input checked="checked" id="__tabbed_1_1" name="__tabbed_1" type="radio" /><input id="__tabbed_1_2" name="__tabbed_1" type="radio" /><div class="tabbed-labels"><label for="__tabbed_1_1">Code</label><label for="__tabbed_1_2">Notes</label></div>
|
|
<div class="tabbed-content">
|
|
<div class="tabbed-block">
|
|
<div class="language-python highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">fastrtc</span><span class="w"> </span><span class="kn">import</span> <span class="n">ReplyOnPause</span><span class="p">,</span> <span class="n">Stream</span>
|
|
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a>
|
|
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="k">def</span><span class="w"> </span><span class="nf">response</span><span class="p">(</span><span class="n">audio</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">]):</span> <span class="c1"># (1)</span>
|
|
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> <span class="n">sample_rate</span><span class="p">,</span> <span class="n">audio_array</span> <span class="o">=</span> <span class="n">audio</span>
|
|
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a> <span class="c1"># Generate response</span>
|
|
</span><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a> <span class="k">for</span> <span class="n">audio_chunk</span> <span class="ow">in</span> <span class="n">generate_response</span><span class="p">(</span><span class="n">sample_rate</span><span class="p">,</span> <span class="n">audio_array</span><span class="p">):</span>
|
|
</span><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7" href="#__codelineno-0-7"></a> <span class="k">yield</span> <span class="p">(</span><span class="n">sample_rate</span><span class="p">,</span> <span class="n">audio_chunk</span><span class="p">)</span> <span class="c1"># (2)</span>
|
|
</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8" href="#__codelineno-0-8"></a>
|
|
</span><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9" href="#__codelineno-0-9"></a><span class="n">stream</span> <span class="o">=</span> <span class="n">Stream</span><span class="p">(</span>
|
|
</span><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10" href="#__codelineno-0-10"></a> <span class="n">handler</span><span class="o">=</span><span class="n">ReplyOnPause</span><span class="p">(</span><span class="n">response</span><span class="p">),</span>
|
|
</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11" href="#__codelineno-0-11"></a> <span class="n">modality</span><span class="o">=</span><span class="s2">"audio"</span><span class="p">,</span>
|
|
</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12" href="#__codelineno-0-12"></a> <span class="n">mode</span><span class="o">=</span><span class="s2">"send-receive"</span>
|
|
</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13" href="#__codelineno-0-13"></a><span class="p">)</span>
|
|
</span></code></pre></div>
|
|
<ol>
|
|
<li>
|
|
<p>The python generator will receive the <strong>entire</strong> audio up until the user stopped. It will be a tuple of the form (sampling_rate, numpy array of audio). The array will have a shape of (1, num_samples). You can also pass in additional input components.</p>
|
|
</li>
|
|
<li>
|
|
<p>The generator must yield audio chunks as a tuple of (sampling_rate, numpy audio array). Each numpy audio array must have a shape of (1, num_samples).</p>
|
|
</li>
|
|
</ol>
|
|
</div>
|
|
<div class="tabbed-block">
|
|
<ol>
|
|
<li>
|
|
<p>The python generator will receive the <strong>entire</strong> audio up until the user stopped. It will be a tuple of the form (sampling_rate, numpy array of audio). The array will have a shape of (1, num_samples). You can also pass in additional input components.</p>
|
|
</li>
|
|
<li>
|
|
<p>The generator must yield audio chunks as a tuple of (sampling_rate, numpy audio array). Each numpy audio array must have a shape of (1, num_samples).</p>
|
|
</li>
|
|
</ol>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="admonition tip">
|
|
<p class="admonition-title">Asynchronous</p>
|
|
<p>You can also use an async generator with <code>ReplyOnPause</code>.</p>
|
|
</div>
|
|
<div class="admonition tip">
|
|
<p class="admonition-title">Parameters</p>
|
|
<p>You can customize the voice detection parameters by passing in <code>algo_options</code> and <code>model_options</code> to the <code>ReplyOnPause</code> class.
|
|
<div class="language-python highlight"><pre><span></span><code><span id="__span-1-1"><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">fastrtc</span><span class="w"> </span><span class="kn">import</span> <span class="n">AlgoOptions</span><span class="p">,</span> <span class="n">SileroVadOptions</span>
|
|
</span><span id="__span-1-2"><a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a>
|
|
</span><span id="__span-1-3"><a id="__codelineno-1-3" name="__codelineno-1-3" href="#__codelineno-1-3"></a><span class="n">stream</span> <span class="o">=</span> <span class="n">Stream</span><span class="p">(</span>
|
|
</span><span id="__span-1-4"><a id="__codelineno-1-4" name="__codelineno-1-4" href="#__codelineno-1-4"></a> <span class="n">handler</span><span class="o">=</span><span class="n">ReplyOnPause</span><span class="p">(</span>
|
|
</span><span id="__span-1-5"><a id="__codelineno-1-5" name="__codelineno-1-5" href="#__codelineno-1-5"></a> <span class="n">response</span><span class="p">,</span>
|
|
</span><span id="__span-1-6"><a id="__codelineno-1-6" name="__codelineno-1-6" href="#__codelineno-1-6"></a> <span class="n">algo_options</span><span class="o">=</span><span class="n">AlgoOptions</span><span class="p">(</span>
|
|
</span><span id="__span-1-7"><a id="__codelineno-1-7" name="__codelineno-1-7" href="#__codelineno-1-7"></a> <span class="n">audio_chunk_duration</span><span class="o">=</span><span class="mf">0.6</span><span class="p">,</span>
|
|
</span><span id="__span-1-8"><a id="__codelineno-1-8" name="__codelineno-1-8" href="#__codelineno-1-8"></a> <span class="n">started_talking_threshold</span><span class="o">=</span><span class="mf">0.2</span><span class="p">,</span>
|
|
</span><span id="__span-1-9"><a id="__codelineno-1-9" name="__codelineno-1-9" href="#__codelineno-1-9"></a> <span class="n">speech_threshold</span><span class="o">=</span><span class="mf">0.1</span>
|
|
</span><span id="__span-1-10"><a id="__codelineno-1-10" name="__codelineno-1-10" href="#__codelineno-1-10"></a> <span class="p">),</span>
|
|
</span><span id="__span-1-11"><a id="__codelineno-1-11" name="__codelineno-1-11" href="#__codelineno-1-11"></a> <span class="n">model_options</span><span class="o">=</span><span class="n">SileroVadOptions</span><span class="p">(</span>
|
|
</span><span id="__span-1-12"><a id="__codelineno-1-12" name="__codelineno-1-12" href="#__codelineno-1-12"></a> <span class="n">threshold</span><span class="o">=</span><span class="mf">0.5</span><span class="p">,</span>
|
|
</span><span id="__span-1-13"><a id="__codelineno-1-13" name="__codelineno-1-13" href="#__codelineno-1-13"></a> <span class="n">min_speech_duration_ms</span><span class="o">=</span><span class="mi">250</span><span class="p">,</span>
|
|
</span><span id="__span-1-14"><a id="__codelineno-1-14" name="__codelineno-1-14" href="#__codelineno-1-14"></a> <span class="n">min_silence_duration_ms</span><span class="o">=</span><span class="mi">100</span>
|
|
</span><span id="__span-1-15"><a id="__codelineno-1-15" name="__codelineno-1-15" href="#__codelineno-1-15"></a> <span class="p">)</span>
|
|
</span><span id="__span-1-16"><a id="__codelineno-1-16" name="__codelineno-1-16" href="#__codelineno-1-16"></a> <span class="p">)</span>
|
|
</span><span id="__span-1-17"><a id="__codelineno-1-17" name="__codelineno-1-17" href="#__codelineno-1-17"></a><span class="p">)</span>
|
|
</span></code></pre></div></p>
|
|
</div>
|
|
<h3 id="interruptions">Interruptions</h3>
|
|
<p>By default, the <code>ReplyOnPause</code> handler will allow you to interrupt the response at any time by speaking again. If you do not want to allow interruption, you can set the <code>can_interrupt</code> parameter to <code>False</code>.</p>
|
|
<div class="language-python highlight"><pre><span></span><code><span id="__span-2-1"><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">fastrtc</span><span class="w"> </span><span class="kn">import</span> <span class="n">Stream</span><span class="p">,</span> <span class="n">ReplyOnPause</span>
|
|
</span><span id="__span-2-2"><a id="__codelineno-2-2" name="__codelineno-2-2" href="#__codelineno-2-2"></a>
|
|
</span><span id="__span-2-3"><a id="__codelineno-2-3" name="__codelineno-2-3" href="#__codelineno-2-3"></a><span class="n">stream</span> <span class="o">=</span> <span class="n">Stream</span><span class="p">(</span>
|
|
</span><span id="__span-2-4"><a id="__codelineno-2-4" name="__codelineno-2-4" href="#__codelineno-2-4"></a> <span class="n">handler</span><span class="o">=</span><span class="n">ReplyOnPause</span><span class="p">(</span>
|
|
</span><span id="__span-2-5"><a id="__codelineno-2-5" name="__codelineno-2-5" href="#__codelineno-2-5"></a> <span class="n">response</span><span class="p">,</span>
|
|
</span><span id="__span-2-6"><a id="__codelineno-2-6" name="__codelineno-2-6" href="#__codelineno-2-6"></a> <span class="n">can_interrupt</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
|
|
</span><span id="__span-2-7"><a id="__codelineno-2-7" name="__codelineno-2-7" href="#__codelineno-2-7"></a> <span class="p">)</span>
|
|
</span><span id="__span-2-8"><a id="__codelineno-2-8" name="__codelineno-2-8" href="#__codelineno-2-8"></a><span class="p">)</span>
|
|
</span></code></pre></div>
|
|
<video width=98% src="https://github.com/user-attachments/assets/dba68dd7-7444-439b-b948-59171067e850" controls style="text-align: center"></video>
|
|
|
|
<div class="admonition tip">
|
|
<p class="admonition-title">Muting Response Audio</p>
|
|
<p>You can directly talk over the output audio and the interruption will still work. However, in these cases, the audio transcription may be incorrect. To prevent this, it's best practice to mute the output audio before talking over it.</p>
|
|
</div>
|
|
<h3 id="startup-function">Startup Function</h3>
|
|
<p>You can pass in a <code>startup_fn</code> to the <code>ReplyOnPause</code> class. This function will be called when the connection is first established. It is helpful for generating initial responses.</p>
|
|
<div class="language-python highlight"><pre><span></span><code><span id="__span-3-1"><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">fastrtc</span><span class="w"> </span><span class="kn">import</span> <span class="n">get_tts_model</span><span class="p">,</span> <span class="n">Stream</span><span class="p">,</span> <span class="n">ReplyOnPause</span>
|
|
</span><span id="__span-3-2"><a id="__codelineno-3-2" name="__codelineno-3-2" href="#__codelineno-3-2"></a>
|
|
</span><span id="__span-3-3"><a id="__codelineno-3-3" name="__codelineno-3-3" href="#__codelineno-3-3"></a><span class="n">tts_client</span> <span class="o">=</span> <span class="n">get_tts_model</span><span class="p">()</span>
|
|
</span><span id="__span-3-4"><a id="__codelineno-3-4" name="__codelineno-3-4" href="#__codelineno-3-4"></a>
|
|
</span><span id="__span-3-5"><a id="__codelineno-3-5" name="__codelineno-3-5" href="#__codelineno-3-5"></a>
|
|
</span><span id="__span-3-6"><a id="__codelineno-3-6" name="__codelineno-3-6" href="#__codelineno-3-6"></a><span class="k">def</span><span class="w"> </span><span class="nf">echo</span><span class="p">(</span><span class="n">audio</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">]):</span>
|
|
</span><span id="__span-3-7"><a id="__codelineno-3-7" name="__codelineno-3-7" href="#__codelineno-3-7"></a> <span class="c1"># Implement any iterator that yields audio</span>
|
|
</span><span id="__span-3-8"><a id="__codelineno-3-8" name="__codelineno-3-8" href="#__codelineno-3-8"></a> <span class="c1"># See "LLM Voice Chat" for a more complete example</span>
|
|
</span><span id="__span-3-9"><a id="__codelineno-3-9" name="__codelineno-3-9" href="#__codelineno-3-9"></a> <span class="k">yield</span> <span class="n">audio</span>
|
|
</span><span id="__span-3-10"><a id="__codelineno-3-10" name="__codelineno-3-10" href="#__codelineno-3-10"></a>
|
|
</span><span id="__span-3-11"><a id="__codelineno-3-11" name="__codelineno-3-11" href="#__codelineno-3-11"></a>
|
|
</span><span id="__span-3-12"><a id="__codelineno-3-12" name="__codelineno-3-12" href="#__codelineno-3-12"></a><span class="k">def</span><span class="w"> </span><span class="nf">startup</span><span class="p">():</span>
|
|
</span><span id="__span-3-13"><a id="__codelineno-3-13" name="__codelineno-3-13" href="#__codelineno-3-13"></a> <span class="k">for</span> <span class="n">chunk</span> <span class="ow">in</span> <span class="n">tts_client</span><span class="o">.</span><span class="n">stream_tts_sync</span><span class="p">(</span><span class="s2">"Welcome to the echo audio demo!"</span><span class="p">):</span>
|
|
</span><span id="__span-3-14"><a id="__codelineno-3-14" name="__codelineno-3-14" href="#__codelineno-3-14"></a> <span class="k">yield</span> <span class="n">chunk</span>
|
|
</span><span id="__span-3-15"><a id="__codelineno-3-15" name="__codelineno-3-15" href="#__codelineno-3-15"></a>
|
|
</span><span id="__span-3-16"><a id="__codelineno-3-16" name="__codelineno-3-16" href="#__codelineno-3-16"></a>
|
|
</span><span id="__span-3-17"><a id="__codelineno-3-17" name="__codelineno-3-17" href="#__codelineno-3-17"></a><span class="n">stream</span> <span class="o">=</span> <span class="n">Stream</span><span class="p">(</span>
|
|
</span><span id="__span-3-18"><a id="__codelineno-3-18" name="__codelineno-3-18" href="#__codelineno-3-18"></a> <span class="n">handler</span><span class="o">=</span><span class="n">ReplyOnPause</span><span class="p">(</span><span class="n">echo</span><span class="p">,</span> <span class="n">startup_fn</span><span class="o">=</span><span class="n">startup</span><span class="p">),</span>
|
|
</span><span id="__span-3-19"><a id="__codelineno-3-19" name="__codelineno-3-19" href="#__codelineno-3-19"></a> <span class="n">modality</span><span class="o">=</span><span class="s2">"audio"</span><span class="p">,</span>
|
|
</span><span id="__span-3-20"><a id="__codelineno-3-20" name="__codelineno-3-20" href="#__codelineno-3-20"></a> <span class="n">mode</span><span class="o">=</span><span class="s2">"send-receive"</span><span class="p">,</span>
|
|
</span><span id="__span-3-21"><a id="__codelineno-3-21" name="__codelineno-3-21" href="#__codelineno-3-21"></a> <span class="n">ui_args</span><span class="o">=</span><span class="p">{</span><span class="s2">"title"</span><span class="p">:</span> <span class="s2">"Echo Audio"</span><span class="p">},</span>
|
|
</span><span id="__span-3-22"><a id="__codelineno-3-22" name="__codelineno-3-22" href="#__codelineno-3-22"></a><span class="p">)</span>
|
|
</span></code></pre></div>
|
|
<video width=98% src="https://github.com/user-attachments/assets/c6b1cb51-5790-4522-80c3-e24e58ef9f11" controls style="text-align: center"></video>
|
|
|
|
<h2 id="reply-on-stopwords">Reply On Stopwords</h2>
|
|
<p>You can configure your AI model to run whenever a set of "stop words" are detected, like "Hey Siri" or "computer", with the <code>ReplyOnStopWords</code> class. </p>
|
|
<p>The API is similar to <code>ReplyOnPause</code> with the addition of a <code>stop_words</code> parameter.</p>
|
|
<div class="tabbed-set tabbed-alternate" data-tabs="2:2"><input checked="checked" id="__tabbed_2_1" name="__tabbed_2" type="radio" /><input id="__tabbed_2_2" name="__tabbed_2" type="radio" /><div class="tabbed-labels"><label for="__tabbed_2_1">Code</label><label for="__tabbed_2_2">Notes</label></div>
|
|
<div class="tabbed-content">
|
|
<div class="tabbed-block">
|
|
<div class="language-py highlight"><pre><span></span><code><span id="__span-4-1"><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">fastrtc</span><span class="w"> </span><span class="kn">import</span> <span class="n">Stream</span><span class="p">,</span> <span class="n">ReplyOnStopWords</span>
|
|
</span><span id="__span-4-2"><a id="__codelineno-4-2" name="__codelineno-4-2" href="#__codelineno-4-2"></a>
|
|
</span><span id="__span-4-3"><a id="__codelineno-4-3" name="__codelineno-4-3" href="#__codelineno-4-3"></a><span class="k">def</span><span class="w"> </span><span class="nf">response</span><span class="p">(</span><span class="n">audio</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">]):</span>
|
|
</span><span id="__span-4-4"><a id="__codelineno-4-4" name="__codelineno-4-4" href="#__codelineno-4-4"></a><span class="w"> </span><span class="sd">"""This function must yield audio frames"""</span>
|
|
</span><span id="__span-4-5"><a id="__codelineno-4-5" name="__codelineno-4-5" href="#__codelineno-4-5"></a> <span class="o">...</span>
|
|
</span><span id="__span-4-6"><a id="__codelineno-4-6" name="__codelineno-4-6" href="#__codelineno-4-6"></a> <span class="k">for</span> <span class="n">numpy_array</span> <span class="ow">in</span> <span class="n">generated_audio</span><span class="p">:</span>
|
|
</span><span id="__span-4-7"><a id="__codelineno-4-7" name="__codelineno-4-7" href="#__codelineno-4-7"></a> <span class="k">yield</span> <span class="p">(</span><span class="n">sampling_rate</span><span class="p">,</span> <span class="n">numpy_array</span><span class="p">,</span> <span class="s2">"mono"</span><span class="p">)</span>
|
|
</span><span id="__span-4-8"><a id="__codelineno-4-8" name="__codelineno-4-8" href="#__codelineno-4-8"></a>
|
|
</span><span id="__span-4-9"><a id="__codelineno-4-9" name="__codelineno-4-9" href="#__codelineno-4-9"></a><span class="n">stream</span> <span class="o">=</span> <span class="n">Stream</span><span class="p">(</span>
|
|
</span><span id="__span-4-10"><a id="__codelineno-4-10" name="__codelineno-4-10" href="#__codelineno-4-10"></a> <span class="n">handler</span><span class="o">=</span><span class="n">ReplyOnStopWords</span><span class="p">(</span><span class="n">generate</span><span class="p">,</span>
|
|
</span><span id="__span-4-11"><a id="__codelineno-4-11" name="__codelineno-4-11" href="#__codelineno-4-11"></a> <span class="n">input_sample_rate</span><span class="o">=</span><span class="mi">16000</span><span class="p">,</span>
|
|
</span><span id="__span-4-12"><a id="__codelineno-4-12" name="__codelineno-4-12" href="#__codelineno-4-12"></a> <span class="n">stop_words</span><span class="o">=</span><span class="p">[</span><span class="s2">"computer"</span><span class="p">]),</span> <span class="c1"># (1)</span>
|
|
</span><span id="__span-4-13"><a id="__codelineno-4-13" name="__codelineno-4-13" href="#__codelineno-4-13"></a> <span class="n">modality</span><span class="o">=</span><span class="s2">"audio"</span><span class="p">,</span>
|
|
</span><span id="__span-4-14"><a id="__codelineno-4-14" name="__codelineno-4-14" href="#__codelineno-4-14"></a> <span class="n">mode</span><span class="o">=</span><span class="s2">"send-receive"</span>
|
|
</span><span id="__span-4-15"><a id="__codelineno-4-15" name="__codelineno-4-15" href="#__codelineno-4-15"></a><span class="p">)</span>
|
|
</span></code></pre></div>
|
|
<ol>
|
|
<li>The <code>stop_words</code> can be single words or pairs of words. Be sure to include common misspellings of your word for more robust detection, e.g. "llama", "lamma". In my experience, it's best to use two very distinct words like "ok computer" or "hello iris". </li>
|
|
</ol>
|
|
</div>
|
|
<div class="tabbed-block">
|
|
<ol>
|
|
<li>The <code>stop_words</code> can be single words or pairs of words. Be sure to include common misspellings of your word for more robust detection, e.g. "llama", "lamma". In my experience, it's best to use two very distinct words like "ok computer" or "hello iris". </li>
|
|
</ol>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="admonition tip">
|
|
<p class="admonition-title">Extra Dependencies</p>
|
|
<p>The <code>ReplyOnStopWords</code> class requires the <code>stopword</code> extra. Run <code>pip install fastrtc[stopword]</code> to install it.</p>
|
|
</div>
|
|
<div class="admonition warning">
|
|
<p class="admonition-title">English Only</p>
|
|
<p>The <code>ReplyOnStopWords</code> class is currently only supported for English.</p>
|
|
</div>
|
|
<h2 id="stream-handler">Stream Handler</h2>
|
|
<p><code>ReplyOnPause</code> and <code>ReplyOnStopWords</code> are implementations of a <code>StreamHandler</code>. The <code>StreamHandler</code> is a low-level abstraction that gives you arbitrary control over how the input audio stream and output audio stream are created. The following example echos back the user audio.</p>
|
|
<div class="tabbed-set tabbed-alternate" data-tabs="3:2"><input checked="checked" id="__tabbed_3_1" name="__tabbed_3" type="radio" /><input id="__tabbed_3_2" name="__tabbed_3" type="radio" /><div class="tabbed-labels"><label for="__tabbed_3_1">Code</label><label for="__tabbed_3_2">Notes</label></div>
|
|
<div class="tabbed-content">
|
|
<div class="tabbed-block">
|
|
<div class="language-py highlight"><pre><span></span><code><span id="__span-5-1"><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a><span class="kn">import</span><span class="w"> </span><span class="nn">gradio</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">gr</span>
|
|
</span><span id="__span-5-2"><a id="__codelineno-5-2" name="__codelineno-5-2" href="#__codelineno-5-2"></a><span class="kn">from</span><span class="w"> </span><span class="nn">fastrtc</span><span class="w"> </span><span class="kn">import</span> <span class="n">StreamHandler</span>
|
|
</span><span id="__span-5-3"><a id="__codelineno-5-3" name="__codelineno-5-3" href="#__codelineno-5-3"></a><span class="kn">from</span><span class="w"> </span><span class="nn">queue</span><span class="w"> </span><span class="kn">import</span> <span class="n">Queue</span>
|
|
</span><span id="__span-5-4"><a id="__codelineno-5-4" name="__codelineno-5-4" href="#__codelineno-5-4"></a>
|
|
</span><span id="__span-5-5"><a id="__codelineno-5-5" name="__codelineno-5-5" href="#__codelineno-5-5"></a><span class="k">class</span><span class="w"> </span><span class="nc">EchoHandler</span><span class="p">(</span><span class="n">StreamHandler</span><span class="p">):</span>
|
|
</span><span id="__span-5-6"><a id="__codelineno-5-6" name="__codelineno-5-6" href="#__codelineno-5-6"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
|
</span><span id="__span-5-7"><a id="__codelineno-5-7" name="__codelineno-5-7" href="#__codelineno-5-7"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
|
</span><span id="__span-5-8"><a id="__codelineno-5-8" name="__codelineno-5-8" href="#__codelineno-5-8"></a> <span class="bp">self</span><span class="o">.</span><span class="n">queue</span> <span class="o">=</span> <span class="n">Queue</span><span class="p">()</span>
|
|
</span><span id="__span-5-9"><a id="__codelineno-5-9" name="__codelineno-5-9" href="#__codelineno-5-9"></a>
|
|
</span><span id="__span-5-10"><a id="__codelineno-5-10" name="__codelineno-5-10" href="#__codelineno-5-10"></a> <span class="k">def</span><span class="w"> </span><span class="nf">receive</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">frame</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> <span class="c1"># (1)</span>
|
|
</span><span id="__span-5-11"><a id="__codelineno-5-11" name="__codelineno-5-11" href="#__codelineno-5-11"></a> <span class="bp">self</span><span class="o">.</span><span class="n">queue</span><span class="o">.</span><span class="n">put</span><span class="p">(</span><span class="n">frame</span><span class="p">)</span>
|
|
</span><span id="__span-5-12"><a id="__codelineno-5-12" name="__codelineno-5-12" href="#__codelineno-5-12"></a>
|
|
</span><span id="__span-5-13"><a id="__codelineno-5-13" name="__codelineno-5-13" href="#__codelineno-5-13"></a> <span class="k">def</span><span class="w"> </span><span class="nf">emit</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> <span class="c1"># (2)</span>
|
|
</span><span id="__span-5-14"><a id="__codelineno-5-14" name="__codelineno-5-14" href="#__codelineno-5-14"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">queue</span><span class="o">.</span><span class="n">get</span><span class="p">()</span>
|
|
</span><span id="__span-5-15"><a id="__codelineno-5-15" name="__codelineno-5-15" href="#__codelineno-5-15"></a>
|
|
</span><span id="__span-5-16"><a id="__codelineno-5-16" name="__codelineno-5-16" href="#__codelineno-5-16"></a> <span class="k">def</span><span class="w"> </span><span class="nf">copy</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">StreamHandler</span><span class="p">:</span>
|
|
</span><span id="__span-5-17"><a id="__codelineno-5-17" name="__codelineno-5-17" href="#__codelineno-5-17"></a> <span class="k">return</span> <span class="n">EchoHandler</span><span class="p">()</span>
|
|
</span><span id="__span-5-18"><a id="__codelineno-5-18" name="__codelineno-5-18" href="#__codelineno-5-18"></a>
|
|
</span><span id="__span-5-19"><a id="__codelineno-5-19" name="__codelineno-5-19" href="#__codelineno-5-19"></a> <span class="k">def</span><span class="w"> </span><span class="nf">shutdown</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> <span class="c1"># (3)</span>
|
|
</span><span id="__span-5-20"><a id="__codelineno-5-20" name="__codelineno-5-20" href="#__codelineno-5-20"></a> <span class="k">pass</span>
|
|
</span><span id="__span-5-21"><a id="__codelineno-5-21" name="__codelineno-5-21" href="#__codelineno-5-21"></a>
|
|
</span><span id="__span-5-22"><a id="__codelineno-5-22" name="__codelineno-5-22" href="#__codelineno-5-22"></a> <span class="k">def</span><span class="w"> </span><span class="nf">start_up</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> <span class="c1"># (4)</span>
|
|
</span><span id="__span-5-23"><a id="__codelineno-5-23" name="__codelineno-5-23" href="#__codelineno-5-23"></a> <span class="k">pass</span>
|
|
</span><span id="__span-5-24"><a id="__codelineno-5-24" name="__codelineno-5-24" href="#__codelineno-5-24"></a>
|
|
</span><span id="__span-5-25"><a id="__codelineno-5-25" name="__codelineno-5-25" href="#__codelineno-5-25"></a><span class="n">stream</span> <span class="o">=</span> <span class="n">Stream</span><span class="p">(</span>
|
|
</span><span id="__span-5-26"><a id="__codelineno-5-26" name="__codelineno-5-26" href="#__codelineno-5-26"></a> <span class="n">handler</span><span class="o">=</span><span class="n">EchoHandler</span><span class="p">(),</span>
|
|
</span><span id="__span-5-27"><a id="__codelineno-5-27" name="__codelineno-5-27" href="#__codelineno-5-27"></a> <span class="n">modality</span><span class="o">=</span><span class="s2">"audio"</span><span class="p">,</span>
|
|
</span><span id="__span-5-28"><a id="__codelineno-5-28" name="__codelineno-5-28" href="#__codelineno-5-28"></a> <span class="n">mode</span><span class="o">=</span><span class="s2">"send-receive"</span>
|
|
</span><span id="__span-5-29"><a id="__codelineno-5-29" name="__codelineno-5-29" href="#__codelineno-5-29"></a><span class="p">)</span>
|
|
</span></code></pre></div>
|
|
<ol>
|
|
<li>The <code>StreamHandler</code> class implements three methods: <code>receive</code>, <code>emit</code> and <code>copy</code>. The <code>receive</code> method is called when a new frame is received from the client, and the <code>emit</code> method returns the next frame to send to the client. The <code>copy</code> method is called at the beginning of the stream to ensure each user has a unique stream handler.</li>
|
|
<li>The <code>emit</code> method SHOULD NOT block. If a frame is not ready to be sent, the method should return <code>None</code>. If you need to wait for a frame, use <a href="../../utils#wait_for_item"><code>wait_for_item</code></a> from the <code>utils</code> module.</li>
|
|
<li>The <code>shutdown</code> method is called when the stream is closed. It should be used to clean up any resources.</li>
|
|
<li>The <code>start_up</code> method is called when the stream is first created. It should be used to initialize any resources. See <a href="https://huggingface.co/spaces/fastrtc/talk-to-openai-gradio">Talk To OpenAI</a> or <a href="https://huggingface.co/spaces/fastrtc/talk-to-gemini-gradio">Talk To Gemini</a> for an example of a <code>StreamHandler</code> that uses the <code>start_up</code> method to connect to an API. </li>
|
|
</ol>
|
|
</div>
|
|
<div class="tabbed-block">
|
|
<ol>
|
|
<li>The <code>StreamHandler</code> class implements three methods: <code>receive</code>, <code>emit</code> and <code>copy</code>. The <code>receive</code> method is called when a new frame is received from the client, and the <code>emit</code> method returns the next frame to send to the client. The <code>copy</code> method is called at the beginning of the stream to ensure each user has a unique stream handler.</li>
|
|
<li>The <code>emit</code> method SHOULD NOT block. If a frame is not ready to be sent, the method should return <code>None</code>. If you need to wait for a frame, use <a href="../../utils#wait_for_item"><code>wait_for_item</code></a> from the <code>utils</code> module.</li>
|
|
<li>The <code>shutdown</code> method is called when the stream is closed. It should be used to clean up any resources.</li>
|
|
<li>The <code>start_up</code> method is called when the stream is first created. It should be used to initialize any resources. See <a href="https://huggingface.co/spaces/fastrtc/talk-to-openai-gradio">Talk To OpenAI</a> or <a href="https://huggingface.co/spaces/fastrtc/talk-to-gemini-gradio">Talk To Gemini</a> for an example of a <code>StreamHandler</code> that uses the <code>start_up</code> method to connect to an API.</li>
|
|
</ol>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="admonition tip">
|
|
<p class="admonition-title">Tip</p>
|
|
<p>See this <a href="https://huggingface.co/spaces/fastrtc/talk-to-gemini-gradio">Talk To Gemini</a> for a complete example of a more complex stream handler.</p>
|
|
</div>
|
|
<div class="admonition warning">
|
|
<p class="admonition-title">Warning</p>
|
|
<p>The <code>emit</code> method should not block. If you need to wait for a frame, use <a href="../../utils#wait_for_item"><code>wait_for_item</code></a> from the <code>utils</code> module.</p>
|
|
</div>
|
|
<h2 id="async-stream-handlers">Async Stream Handlers</h2>
|
|
<p>It is also possible to create asynchronous stream handlers. This is very convenient for accessing async APIs from major LLM developers, like Google and OpenAI. The main difference is that <code>receive</code>, <code>emit</code>, and <code>start_up</code> are now defined with <code>async def</code>.</p>
|
|
<p>Here is a simple example of using <code>AsyncStreamHandler</code>:</p>
|
|
<div class="tabbed-set tabbed-alternate" data-tabs="4:1"><input checked="checked" id="__tabbed_4_1" name="__tabbed_4" type="radio" /><div class="tabbed-labels"><label for="__tabbed_4_1">Code</label></div>
|
|
<div class="tabbed-content">
|
|
<div class="tabbed-block">
|
|
<div class="language-py highlight"><pre><span></span><code><span id="__span-6-1"><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">fastrtc</span><span class="w"> </span><span class="kn">import</span> <span class="n">AsyncStreamHandler</span><span class="p">,</span> <span class="n">wait_for_item</span><span class="p">,</span> <span class="n">Stream</span>
|
|
</span><span id="__span-6-2"><a id="__codelineno-6-2" name="__codelineno-6-2" href="#__codelineno-6-2"></a><span class="kn">import</span><span class="w"> </span><span class="nn">asyncio</span>
|
|
</span><span id="__span-6-3"><a id="__codelineno-6-3" name="__codelineno-6-3" href="#__codelineno-6-3"></a><span class="kn">import</span><span class="w"> </span><span class="nn">numpy</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">np</span>
|
|
</span><span id="__span-6-4"><a id="__codelineno-6-4" name="__codelineno-6-4" href="#__codelineno-6-4"></a>
|
|
</span><span id="__span-6-5"><a id="__codelineno-6-5" name="__codelineno-6-5" href="#__codelineno-6-5"></a><span class="k">class</span><span class="w"> </span><span class="nc">AsyncEchoHandler</span><span class="p">(</span><span class="n">AsyncStreamHandler</span><span class="p">):</span>
|
|
</span><span id="__span-6-6"><a id="__codelineno-6-6" name="__codelineno-6-6" href="#__codelineno-6-6"></a><span class="w"> </span><span class="sd">"""Simple Async Echo Handler"""</span>
|
|
</span><span id="__span-6-7"><a id="__codelineno-6-7" name="__codelineno-6-7" href="#__codelineno-6-7"></a>
|
|
</span><span id="__span-6-8"><a id="__codelineno-6-8" name="__codelineno-6-8" href="#__codelineno-6-8"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
|
</span><span id="__span-6-9"><a id="__codelineno-6-9" name="__codelineno-6-9" href="#__codelineno-6-9"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">input_sample_rate</span><span class="o">=</span><span class="mi">24000</span><span class="p">)</span>
|
|
</span><span id="__span-6-10"><a id="__codelineno-6-10" name="__codelineno-6-10" href="#__codelineno-6-10"></a> <span class="bp">self</span><span class="o">.</span><span class="n">queue</span> <span class="o">=</span> <span class="n">asyncio</span><span class="o">.</span><span class="n">Queue</span><span class="p">()</span>
|
|
</span><span id="__span-6-11"><a id="__codelineno-6-11" name="__codelineno-6-11" href="#__codelineno-6-11"></a>
|
|
</span><span id="__span-6-12"><a id="__codelineno-6-12" name="__codelineno-6-12" href="#__codelineno-6-12"></a> <span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="nf">receive</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">frame</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
|
</span><span id="__span-6-13"><a id="__codelineno-6-13" name="__codelineno-6-13" href="#__codelineno-6-13"></a> <span class="k">await</span> <span class="bp">self</span><span class="o">.</span><span class="n">queue</span><span class="o">.</span><span class="n">put</span><span class="p">(</span><span class="n">frame</span><span class="p">)</span>
|
|
</span><span id="__span-6-14"><a id="__codelineno-6-14" name="__codelineno-6-14" href="#__codelineno-6-14"></a>
|
|
</span><span id="__span-6-15"><a id="__codelineno-6-15" name="__codelineno-6-15" href="#__codelineno-6-15"></a> <span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="nf">emit</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
|
</span><span id="__span-6-16"><a id="__codelineno-6-16" name="__codelineno-6-16" href="#__codelineno-6-16"></a> <span class="k">return</span> <span class="k">await</span> <span class="n">wait_for_item</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">queue</span><span class="p">)</span>
|
|
</span><span id="__span-6-17"><a id="__codelineno-6-17" name="__codelineno-6-17" href="#__codelineno-6-17"></a>
|
|
</span><span id="__span-6-18"><a id="__codelineno-6-18" name="__codelineno-6-18" href="#__codelineno-6-18"></a> <span class="k">def</span><span class="w"> </span><span class="nf">copy</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
</span><span id="__span-6-19"><a id="__codelineno-6-19" name="__codelineno-6-19" href="#__codelineno-6-19"></a> <span class="k">return</span> <span class="n">AsyncEchoHandler</span><span class="p">()</span>
|
|
</span><span id="__span-6-20"><a id="__codelineno-6-20" name="__codelineno-6-20" href="#__codelineno-6-20"></a>
|
|
</span><span id="__span-6-21"><a id="__codelineno-6-21" name="__codelineno-6-21" href="#__codelineno-6-21"></a> <span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="nf">shutdown</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
</span><span id="__span-6-22"><a id="__codelineno-6-22" name="__codelineno-6-22" href="#__codelineno-6-22"></a> <span class="k">pass</span>
|
|
</span><span id="__span-6-23"><a id="__codelineno-6-23" name="__codelineno-6-23" href="#__codelineno-6-23"></a>
|
|
</span><span id="__span-6-24"><a id="__codelineno-6-24" name="__codelineno-6-24" href="#__codelineno-6-24"></a> <span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="nf">start_up</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
|
</span><span id="__span-6-25"><a id="__codelineno-6-25" name="__codelineno-6-25" href="#__codelineno-6-25"></a> <span class="k">pass</span>
|
|
</span></code></pre></div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="admonition tip">
|
|
<p class="admonition-title">Tip</p>
|
|
<p>See <a href="https://huggingface.co/spaces/fastrtc/talk-to-gemini">Talk To Gemini</a>, <a href="https://huggingface.co/spaces/fastrtc/talk-to-openai">Talk To Openai</a> for complete examples of <code>AsyncStreamHandler</code>s.</p>
|
|
</div>
|
|
<h2 id="text-to-speech">Text To Speech</h2>
|
|
<p>You can use an on-device text to speech model if you have the <code>tts</code> extra installed.
|
|
Import the <code>get_tts_model</code> function and call it with the model name you want to use.
|
|
At the moment, the only model supported is <code>kokoro</code>.</p>
|
|
<p>The <code>get_tts_model</code> function returns an object with three methods:</p>
|
|
<ul>
|
|
<li><code>tts</code>: Synchronous text to speech.</li>
|
|
<li><code>stream_tts_sync</code>: Synchronous text to speech streaming.</li>
|
|
<li><code>stream_tts</code>: Asynchronous text to speech streaming.</li>
|
|
</ul>
|
|
<div class="language-python highlight"><pre><span></span><code><span id="__span-7-1"><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">fastrtc</span><span class="w"> </span><span class="kn">import</span> <span class="n">get_tts_model</span>
|
|
</span><span id="__span-7-2"><a id="__codelineno-7-2" name="__codelineno-7-2" href="#__codelineno-7-2"></a>
|
|
</span><span id="__span-7-3"><a id="__codelineno-7-3" name="__codelineno-7-3" href="#__codelineno-7-3"></a><span class="n">model</span> <span class="o">=</span> <span class="n">get_tts_model</span><span class="p">(</span><span class="n">model</span><span class="o">=</span><span class="s2">"kokoro"</span><span class="p">)</span>
|
|
</span><span id="__span-7-4"><a id="__codelineno-7-4" name="__codelineno-7-4" href="#__codelineno-7-4"></a>
|
|
</span><span id="__span-7-5"><a id="__codelineno-7-5" name="__codelineno-7-5" href="#__codelineno-7-5"></a><span class="k">for</span> <span class="n">audio</span> <span class="ow">in</span> <span class="n">model</span><span class="o">.</span><span class="n">stream_tts_sync</span><span class="p">(</span><span class="s2">"Hello, world!"</span><span class="p">):</span>
|
|
</span><span id="__span-7-6"><a id="__codelineno-7-6" name="__codelineno-7-6" href="#__codelineno-7-6"></a> <span class="k">yield</span> <span class="n">audio</span>
|
|
</span><span id="__span-7-7"><a id="__codelineno-7-7" name="__codelineno-7-7" href="#__codelineno-7-7"></a>
|
|
</span><span id="__span-7-8"><a id="__codelineno-7-8" name="__codelineno-7-8" href="#__codelineno-7-8"></a><span class="k">async</span> <span class="k">for</span> <span class="n">audio</span> <span class="ow">in</span> <span class="n">model</span><span class="o">.</span><span class="n">stream_tts</span><span class="p">(</span><span class="s2">"Hello, world!"</span><span class="p">):</span>
|
|
</span><span id="__span-7-9"><a id="__codelineno-7-9" name="__codelineno-7-9" href="#__codelineno-7-9"></a> <span class="k">yield</span> <span class="n">audio</span>
|
|
</span><span id="__span-7-10"><a id="__codelineno-7-10" name="__codelineno-7-10" href="#__codelineno-7-10"></a>
|
|
</span><span id="__span-7-11"><a id="__codelineno-7-11" name="__codelineno-7-11" href="#__codelineno-7-11"></a><span class="n">audio</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">tts</span><span class="p">(</span><span class="s2">"Hello, world!"</span><span class="p">)</span>
|
|
</span></code></pre></div>
|
|
<div class="admonition tip">
|
|
<p class="admonition-title">Tip</p>
|
|
<p>You can customize the audio by passing in an instance of <code>KokoroTTSOptions</code> to the method.
|
|
See <a href="https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md">here</a> for a list of available voices.
|
|
<div class="language-python highlight"><pre><span></span><code><span id="__span-8-1"><a id="__codelineno-8-1" name="__codelineno-8-1" href="#__codelineno-8-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">fastrtc</span><span class="w"> </span><span class="kn">import</span> <span class="n">KokoroTTSOptions</span><span class="p">,</span> <span class="n">get_tts_model</span>
|
|
</span><span id="__span-8-2"><a id="__codelineno-8-2" name="__codelineno-8-2" href="#__codelineno-8-2"></a>
|
|
</span><span id="__span-8-3"><a id="__codelineno-8-3" name="__codelineno-8-3" href="#__codelineno-8-3"></a><span class="n">model</span> <span class="o">=</span> <span class="n">get_tts_model</span><span class="p">(</span><span class="n">model</span><span class="o">=</span><span class="s2">"kokoro"</span><span class="p">)</span>
|
|
</span><span id="__span-8-4"><a id="__codelineno-8-4" name="__codelineno-8-4" href="#__codelineno-8-4"></a>
|
|
</span><span id="__span-8-5"><a id="__codelineno-8-5" name="__codelineno-8-5" href="#__codelineno-8-5"></a><span class="n">options</span> <span class="o">=</span> <span class="n">KokoroTTSOptions</span><span class="p">(</span>
|
|
</span><span id="__span-8-6"><a id="__codelineno-8-6" name="__codelineno-8-6" href="#__codelineno-8-6"></a> <span class="n">voice</span><span class="o">=</span><span class="s2">"af_heart"</span><span class="p">,</span>
|
|
</span><span id="__span-8-7"><a id="__codelineno-8-7" name="__codelineno-8-7" href="#__codelineno-8-7"></a> <span class="n">speed</span><span class="o">=</span><span class="mf">1.0</span><span class="p">,</span>
|
|
</span><span id="__span-8-8"><a id="__codelineno-8-8" name="__codelineno-8-8" href="#__codelineno-8-8"></a> <span class="n">lang</span><span class="o">=</span><span class="s2">"en-us"</span>
|
|
</span><span id="__span-8-9"><a id="__codelineno-8-9" name="__codelineno-8-9" href="#__codelineno-8-9"></a><span class="p">)</span>
|
|
</span><span id="__span-8-10"><a id="__codelineno-8-10" name="__codelineno-8-10" href="#__codelineno-8-10"></a>
|
|
</span><span id="__span-8-11"><a id="__codelineno-8-11" name="__codelineno-8-11" href="#__codelineno-8-11"></a><span class="n">audio</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">tts</span><span class="p">(</span><span class="s2">"Hello, world!"</span><span class="p">,</span> <span class="n">options</span><span class="o">=</span><span class="n">options</span><span class="p">)</span>
|
|
</span></code></pre></div></p>
|
|
</div>
|
|
<h2 id="speech-to-text">Speech To Text</h2>
|
|
<p>You can use an on-device speech to text model if you have the <code>stt</code> or <code>stopword</code> extra installed.
|
|
Import the <code>get_stt_model</code> function and call it with the model name you want to use.
|
|
At the moment, the only models supported are <code>moonshine/base</code> and <code>moonshine/tiny</code>.</p>
|
|
<p>The <code>get_stt_model</code> function returns an object with the following method:</p>
|
|
<ul>
|
|
<li><code>stt</code>: Synchronous speech to text.</li>
|
|
</ul>
|
|
<div class="language-python highlight"><pre><span></span><code><span id="__span-9-1"><a id="__codelineno-9-1" name="__codelineno-9-1" href="#__codelineno-9-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">fastrtc</span><span class="w"> </span><span class="kn">import</span> <span class="n">get_stt_model</span>
|
|
</span><span id="__span-9-2"><a id="__codelineno-9-2" name="__codelineno-9-2" href="#__codelineno-9-2"></a>
|
|
</span><span id="__span-9-3"><a id="__codelineno-9-3" name="__codelineno-9-3" href="#__codelineno-9-3"></a><span class="n">model</span> <span class="o">=</span> <span class="n">get_stt_model</span><span class="p">(</span><span class="n">model</span><span class="o">=</span><span class="s2">"moonshine/base"</span><span class="p">)</span>
|
|
</span><span id="__span-9-4"><a id="__codelineno-9-4" name="__codelineno-9-4" href="#__codelineno-9-4"></a>
|
|
</span><span id="__span-9-5"><a id="__codelineno-9-5" name="__codelineno-9-5" href="#__codelineno-9-5"></a><span class="n">audio</span> <span class="o">=</span> <span class="p">(</span><span class="mi">16000</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="o">-</span><span class="mi">32768</span><span class="p">,</span> <span class="mi">32768</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">16000</span><span class="p">)))</span>
|
|
</span><span id="__span-9-6"><a id="__codelineno-9-6" name="__codelineno-9-6" href="#__codelineno-9-6"></a><span class="n">text</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">stt</span><span class="p">(</span><span class="n">audio</span><span class="p">)</span>
|
|
</span></code></pre></div>
|
|
<div class="admonition tip">
|
|
<p class="admonition-title">Example</p>
|
|
<p>See <a href="https://huggingface.co/spaces/fastrtc/llm-voice-chat">LLM Voice Chat</a> for an example of using the <code>stt</code> method in a <code>ReplyOnPause</code> handler.</p>
|
|
</div>
|
|
<div class="admonition warning">
|
|
<p class="admonition-title">English Only</p>
|
|
<p>The <code>stt</code> model is currently only supported for English.</p>
|
|
</div>
|
|
<h2 id="requesting-inputs">Requesting Inputs</h2>
|
|
<p>In <code>ReplyOnPause</code> and <code>ReplyOnStopWords</code>, any additional input data is automatically passed to your generator. For <code>StreamHandler</code>s, you must manually request the input data from the client.</p>
|
|
<p>You can do this by calling <code>await self.wait_for_args()</code> (for <code>AsyncStreamHandler</code>s) in either the <code>emit</code> or <code>receive</code> methods. For a <code>StreamHandler</code>, you can call <code>self.wait_for_args_sync()</code>.</p>
|
|
<p>We can access the value of this component via the <code>latest_args</code> property of the <code>StreamHandler</code>. The <code>latest_args</code> is a list storing each of the values. The 0th index is the dummy string <code>__webrtc_value__</code>.</p>
|
|
<h2 id="considerations-for-telephone-use">Considerations for Telephone Use</h2>
|
|
<p>In order for your handler to work over the phone, you must make sure that your handler is not expecting any additional input data besides the audio.</p>
|
|
<p>If you call <code>await self.wait_for_args()</code> your stream will wait forever for the additional input data.</p>
|
|
<p>The stream handlers have a <code>phone_mode</code> property that is set to <code>True</code> if the stream is running over the phone. You can use this property to determine if you should wait for additional input data.</p>
|
|
<div class="language-python highlight"><pre><span></span><code><span id="__span-10-1"><a id="__codelineno-10-1" name="__codelineno-10-1" href="#__codelineno-10-1"></a><span class="k">def</span><span class="w"> </span><span class="nf">emit</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
</span><span id="__span-10-2"><a id="__codelineno-10-2" name="__codelineno-10-2" href="#__codelineno-10-2"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">phone_mode</span><span class="p">:</span>
|
|
</span><span id="__span-10-3"><a id="__codelineno-10-3" name="__codelineno-10-3" href="#__codelineno-10-3"></a> <span class="bp">self</span><span class="o">.</span><span class="n">latest_args</span> <span class="o">=</span> <span class="p">[</span><span class="kc">None</span><span class="p">]</span>
|
|
</span><span id="__span-10-4"><a id="__codelineno-10-4" name="__codelineno-10-4" href="#__codelineno-10-4"></a> <span class="k">else</span><span class="p">:</span>
|
|
</span><span id="__span-10-5"><a id="__codelineno-10-5" name="__codelineno-10-5" href="#__codelineno-10-5"></a> <span class="k">await</span> <span class="bp">self</span><span class="o">.</span><span class="n">wait_for_args</span><span class="p">()</span>
|
|
</span></code></pre></div>
|
|
<h3 id="replyonpause-and-telephone-use"><code>ReplyOnPause</code> and telephone use</h3>
|
|
<p>The generator you pass to <code>ReplyOnPause</code> must have default arguments for all arguments except audio.</p>
|
|
<p>If you yield <code>AdditionalOutputs</code>, they will be passed in as the input arguments to the generator the next time it is called.</p>
|
|
<div class="admonition tip">
|
|
<p class="admonition-title">Tip</p>
|
|
<p>See <a href="https://huggingface.co/spaces/fastrtc/talk-to-claude">Talk To Claude</a> for an example of a <code>ReplyOnPause</code> handler that is compatible with telephone usage. Notice how the input chatbot history is yielded as an <code>AdditionalOutput</code> on each invocation.</p>
|
|
</div>
|
|
<h2 id="telephone-integration">Telephone Integration</h2>
|
|
<p>You can integrate a <code>Stream</code> with a SIP provider like Twilio to set up your own phone number for your application.</p>
|
|
<h3 id="setup-process">Setup Process</h3>
|
|
<ol>
|
|
<li>
|
|
<p><strong>Create a Twilio Account</strong>: Sign up for a <a href="https://login.twilio.com/u/signup">Twilio</a> account and purchase a phone number with voice capabilities. With a trial account, only the phone number you used during registration will be able to connect to your <code>Stream</code>.</p>
|
|
</li>
|
|
<li>
|
|
<p><strong>Mount Your Stream</strong>: Add your <code>Stream</code> to a FastAPI app using <code>stream.mount(app)</code> and run the server.</p>
|
|
</li>
|
|
<li>
|
|
<p><strong>Configure Twilio Webhook</strong>: Point your Twilio phone number to your webhook URL.</p>
|
|
</li>
|
|
</ol>
|
|
<h3 id="configuring-twilio">Configuring Twilio</h3>
|
|
<p>To configure your Twilio phone number:</p>
|
|
<ol>
|
|
<li>In your Twilio dashboard, navigate to <code>Manage</code> → <code>TwiML Apps</code> in the left sidebar</li>
|
|
<li>Click <code>Create TwiML App</code></li>
|
|
<li>Set the <code>Voice URL</code> to your FastAPI app's URL with <code>/telephone/incoming</code> appended (e.g., <code>https://your-app-url.com/telephone/incoming</code>)</li>
|
|
</ol>
|
|
<p><img alt="Twilio TwiML Apps Navigation" src="https://github.com/user-attachments/assets/9cd7b7de-d3e6-4fc8-9e50-ffe946d19c73" />
|
|
<img alt="Twilio Voice URL Configuration" src="https://github.com/user-attachments/assets/b8490e59-9f2c-4bb4-af59-a304100a5eaf" /></p>
|
|
<div class="admonition tip">
|
|
<p class="admonition-title">Local Development with Ngrok</p>
|
|
<p>For local development, use <a href="https://ngrok.com/">ngrok</a> to expose your local server:
|
|
<div class="language-bash highlight"><pre><span></span><code><span id="__span-11-1"><a id="__codelineno-11-1" name="__codelineno-11-1" href="#__codelineno-11-1"></a>ngrok<span class="w"> </span>http<span class="w"> </span><port>
|
|
</span></code></pre></div>
|
|
Then set your Twilio Voice URL to <code>https://your-ngrok-subdomain.ngrok.io/telephone/incoming-call</code></p>
|
|
</div>
|
|
<h3 id="code-example">Code Example</h3>
|
|
<p>Here's a simple example of setting up a Twilio endpoint:</p>
|
|
<div class="language-py highlight"><pre><span></span><code><span id="__span-12-1"><a id="__codelineno-12-1" name="__codelineno-12-1" href="#__codelineno-12-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">fastrtc</span><span class="w"> </span><span class="kn">import</span> <span class="n">Stream</span><span class="p">,</span> <span class="n">ReplyOnPause</span>
|
|
</span><span id="__span-12-2"><a id="__codelineno-12-2" name="__codelineno-12-2" href="#__codelineno-12-2"></a><span class="kn">from</span><span class="w"> </span><span class="nn">fastapi</span><span class="w"> </span><span class="kn">import</span> <span class="n">FastAPI</span>
|
|
</span><span id="__span-12-3"><a id="__codelineno-12-3" name="__codelineno-12-3" href="#__codelineno-12-3"></a>
|
|
</span><span id="__span-12-4"><a id="__codelineno-12-4" name="__codelineno-12-4" href="#__codelineno-12-4"></a><span class="k">def</span><span class="w"> </span><span class="nf">echo</span><span class="p">(</span><span class="n">audio</span><span class="p">):</span>
|
|
</span><span id="__span-12-5"><a id="__codelineno-12-5" name="__codelineno-12-5" href="#__codelineno-12-5"></a> <span class="k">yield</span> <span class="n">audio</span>
|
|
</span><span id="__span-12-6"><a id="__codelineno-12-6" name="__codelineno-12-6" href="#__codelineno-12-6"></a>
|
|
</span><span id="__span-12-7"><a id="__codelineno-12-7" name="__codelineno-12-7" href="#__codelineno-12-7"></a><span class="n">app</span> <span class="o">=</span> <span class="n">FastAPI</span><span class="p">()</span>
|
|
</span><span id="__span-12-8"><a id="__codelineno-12-8" name="__codelineno-12-8" href="#__codelineno-12-8"></a>
|
|
</span><span id="__span-12-9"><a id="__codelineno-12-9" name="__codelineno-12-9" href="#__codelineno-12-9"></a><span class="n">stream</span> <span class="o">=</span> <span class="n">Stream</span><span class="p">(</span><span class="n">ReplyOnPause</span><span class="p">(</span><span class="n">echo</span><span class="p">),</span> <span class="n">modality</span><span class="o">=</span><span class="s2">"audio"</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s2">"send-receive"</span><span class="p">)</span>
|
|
</span><span id="__span-12-10"><a id="__codelineno-12-10" name="__codelineno-12-10" href="#__codelineno-12-10"></a><span class="n">stream</span><span class="o">.</span><span class="n">mount</span><span class="p">(</span><span class="n">app</span><span class="p">)</span>
|
|
</span><span id="__span-12-11"><a id="__codelineno-12-11" name="__codelineno-12-11" href="#__codelineno-12-11"></a>
|
|
</span><span id="__span-12-12"><a id="__codelineno-12-12" name="__codelineno-12-12" href="#__codelineno-12-12"></a><span class="c1"># run with `uvicorn main:app`</span>
|
|
</span></code></pre></div>
|
|
<h3 id="outbound-calls-with-twilio">Outbound calls with Twilio</h3>
|
|
<p>Here's a simple example to call someone using the twilio-python module:</p>
|
|
<div class="language-py highlight"><pre><span></span><code><span id="__span-13-1"><a id="__codelineno-13-1" name="__codelineno-13-1" href="#__codelineno-13-1"></a><span class="n">app</span> <span class="o">=</span> <span class="n">FastAPI</span><span class="p">()</span>
|
|
</span><span id="__span-13-2"><a id="__codelineno-13-2" name="__codelineno-13-2" href="#__codelineno-13-2"></a>
|
|
</span><span id="__span-13-3"><a id="__codelineno-13-3" name="__codelineno-13-3" href="#__codelineno-13-3"></a><span class="nd">@app</span><span class="o">.</span><span class="n">post</span><span class="p">(</span><span class="s2">"/call"</span><span class="p">)</span>
|
|
</span><span id="__span-13-4"><a id="__codelineno-13-4" name="__codelineno-13-4" href="#__codelineno-13-4"></a><span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="nf">start_call</span><span class="p">(</span><span class="n">req</span><span class="p">:</span> <span class="n">Request</span><span class="p">):</span>
|
|
</span><span id="__span-13-5"><a id="__codelineno-13-5" name="__codelineno-13-5" href="#__codelineno-13-5"></a> <span class="n">body</span> <span class="o">=</span> <span class="k">await</span> <span class="n">req</span><span class="o">.</span><span class="n">json</span><span class="p">()</span>
|
|
</span><span id="__span-13-6"><a id="__codelineno-13-6" name="__codelineno-13-6" href="#__codelineno-13-6"></a> <span class="n">from_no</span> <span class="o">=</span> <span class="n">body</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"from"</span><span class="p">)</span>
|
|
</span><span id="__span-13-7"><a id="__codelineno-13-7" name="__codelineno-13-7" href="#__codelineno-13-7"></a> <span class="n">to_no</span> <span class="o">=</span> <span class="n">body</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"to"</span><span class="p">)</span>
|
|
</span><span id="__span-13-8"><a id="__codelineno-13-8" name="__codelineno-13-8" href="#__codelineno-13-8"></a> <span class="n">account_sid</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">getenv</span><span class="p">(</span><span class="s2">"TWILIO_ACCOUNT_SID"</span><span class="p">)</span>
|
|
</span><span id="__span-13-9"><a id="__codelineno-13-9" name="__codelineno-13-9" href="#__codelineno-13-9"></a> <span class="n">auth_token</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">getenv</span><span class="p">(</span><span class="s2">"TWILIO_AUTH_TOKEN"</span><span class="p">)</span>
|
|
</span><span id="__span-13-10"><a id="__codelineno-13-10" name="__codelineno-13-10" href="#__codelineno-13-10"></a> <span class="n">client</span> <span class="o">=</span> <span class="n">Client</span><span class="p">(</span><span class="n">account_sid</span><span class="p">,</span> <span class="n">auth_token</span><span class="p">)</span>
|
|
</span><span id="__span-13-11"><a id="__codelineno-13-11" name="__codelineno-13-11" href="#__codelineno-13-11"></a>
|
|
</span><span id="__span-13-12"><a id="__codelineno-13-12" name="__codelineno-13-12" href="#__codelineno-13-12"></a> <span class="c1"># Use the public URL of your application</span>
|
|
</span><span id="__span-13-13"><a id="__codelineno-13-13" name="__codelineno-13-13" href="#__codelineno-13-13"></a> <span class="c1"># here we're using ngrok to expose an app</span>
|
|
</span><span id="__span-13-14"><a id="__codelineno-13-14" name="__codelineno-13-14" href="#__codelineno-13-14"></a> <span class="c1"># running locally</span>
|
|
</span><span id="__span-13-15"><a id="__codelineno-13-15" name="__codelineno-13-15" href="#__codelineno-13-15"></a> <span class="n">call</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">calls</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
|
</span><span id="__span-13-16"><a id="__codelineno-13-16" name="__codelineno-13-16" href="#__codelineno-13-16"></a> <span class="n">to</span><span class="o">=</span><span class="n">to_no</span><span class="p">,</span>
|
|
</span><span id="__span-13-17"><a id="__codelineno-13-17" name="__codelineno-13-17" href="#__codelineno-13-17"></a> <span class="n">from_</span><span class="o">=</span><span class="n">from_no</span><span class="p">,</span>
|
|
</span><span id="__span-13-18"><a id="__codelineno-13-18" name="__codelineno-13-18" href="#__codelineno-13-18"></a> <span class="n">url</span><span class="o">=</span><span class="s2">"https://[your_ngrok_subdomain].ngrok.app/incoming-call"</span>
|
|
</span><span id="__span-13-19"><a id="__codelineno-13-19" name="__codelineno-13-19" href="#__codelineno-13-19"></a> <span class="p">)</span>
|
|
</span><span id="__span-13-20"><a id="__codelineno-13-20" name="__codelineno-13-20" href="#__codelineno-13-20"></a>
|
|
</span><span id="__span-13-21"><a id="__codelineno-13-21" name="__codelineno-13-21" href="#__codelineno-13-21"></a> <span class="k">return</span> <span class="p">{</span><span class="s2">"sid"</span><span class="p">:</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">call</span><span class="o">.</span><span class="n">sid</span><span class="si">}</span><span class="s2">"</span><span class="p">}</span>
|
|
</span><span id="__span-13-22"><a id="__codelineno-13-22" name="__codelineno-13-22" href="#__codelineno-13-22"></a>
|
|
</span><span id="__span-13-23"><a id="__codelineno-13-23" name="__codelineno-13-23" href="#__codelineno-13-23"></a><span class="nd">@app</span><span class="o">.</span><span class="n">api_route</span><span class="p">(</span><span class="s2">"/incoming-call"</span><span class="p">,</span> <span class="n">methods</span><span class="o">=</span><span class="p">[</span><span class="s2">"GET"</span><span class="p">,</span> <span class="s2">"POST"</span><span class="p">])</span>
|
|
</span><span id="__span-13-24"><a id="__codelineno-13-24" name="__codelineno-13-24" href="#__codelineno-13-24"></a><span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="nf">handle_incoming_call</span><span class="p">(</span><span class="n">req</span><span class="p">:</span> <span class="n">Request</span><span class="p">):</span>
|
|
</span><span id="__span-13-25"><a id="__codelineno-13-25" name="__codelineno-13-25" href="#__codelineno-13-25"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">twilio.twiml.voice_response</span><span class="w"> </span><span class="kn">import</span> <span class="n">VoiceResponse</span><span class="p">,</span> <span class="n">Connect</span>
|
|
</span><span id="__span-13-26"><a id="__codelineno-13-26" name="__codelineno-13-26" href="#__codelineno-13-26"></a> <span class="n">response</span> <span class="o">=</span> <span class="n">VoiceResponse</span><span class="p">()</span>
|
|
</span><span id="__span-13-27"><a id="__codelineno-13-27" name="__codelineno-13-27" href="#__codelineno-13-27"></a> <span class="n">response</span><span class="o">.</span><span class="n">say</span><span class="p">(</span><span class="s2">"Connecting to AI assistant"</span><span class="p">)</span>
|
|
</span><span id="__span-13-28"><a id="__codelineno-13-28" name="__codelineno-13-28" href="#__codelineno-13-28"></a> <span class="n">connect</span> <span class="o">=</span> <span class="n">Connect</span><span class="p">()</span>
|
|
</span><span id="__span-13-29"><a id="__codelineno-13-29" name="__codelineno-13-29" href="#__codelineno-13-29"></a> <span class="n">connect</span><span class="o">.</span><span class="n">stream</span><span class="p">(</span><span class="n">url</span><span class="o">=</span><span class="sa">f</span><span class="s1">'wss://</span><span class="si">{</span><span class="n">req</span><span class="o">.</span><span class="n">url</span><span class="o">.</span><span class="n">hostname</span><span class="si">}</span><span class="s1">/media-stream'</span><span class="p">)</span>
|
|
</span><span id="__span-13-30"><a id="__codelineno-13-30" name="__codelineno-13-30" href="#__codelineno-13-30"></a> <span class="n">response</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">connect</span><span class="p">)</span>
|
|
</span><span id="__span-13-31"><a id="__codelineno-13-31" name="__codelineno-13-31" href="#__codelineno-13-31"></a> <span class="k">return</span> <span class="n">HTMLResponse</span><span class="p">(</span><span class="n">content</span><span class="o">=</span><span class="nb">str</span><span class="p">(</span><span class="n">response</span><span class="p">),</span> <span class="n">media_type</span><span class="o">=</span><span class="s2">"application/xml"</span><span class="p">)</span>
|
|
</span><span id="__span-13-32"><a id="__codelineno-13-32" name="__codelineno-13-32" href="#__codelineno-13-32"></a>
|
|
</span><span id="__span-13-33"><a id="__codelineno-13-33" name="__codelineno-13-33" href="#__codelineno-13-33"></a><span class="nd">@app</span><span class="o">.</span><span class="n">websocket</span><span class="p">(</span><span class="s2">"/media-stream"</span><span class="p">)</span>
|
|
</span><span id="__span-13-34"><a id="__codelineno-13-34" name="__codelineno-13-34" href="#__codelineno-13-34"></a><span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="nf">handle_media_stream</span><span class="p">(</span><span class="n">websocket</span><span class="p">:</span> <span class="n">WebSocket</span><span class="p">):</span>
|
|
</span><span id="__span-13-35"><a id="__codelineno-13-35" name="__codelineno-13-35" href="#__codelineno-13-35"></a> <span class="c1"># stream is a FastRTC stream defined elsewhere</span>
|
|
</span><span id="__span-13-36"><a id="__codelineno-13-36" name="__codelineno-13-36" href="#__codelineno-13-36"></a> <span class="k">await</span> <span class="n">stream</span><span class="o">.</span><span class="n">telephone_handler</span><span class="p">(</span><span class="n">websocket</span><span class="p">)</span>
|
|
</span><span id="__span-13-37"><a id="__codelineno-13-37" name="__codelineno-13-37" href="#__codelineno-13-37"></a>
|
|
</span><span id="__span-13-38"><a id="__codelineno-13-38" name="__codelineno-13-38" href="#__codelineno-13-38"></a><span class="n">app</span> <span class="o">=</span> <span class="n">gr</span><span class="o">.</span><span class="n">mount_gradio_app</span><span class="p">(</span><span class="n">app</span><span class="p">,</span> <span class="n">stream</span><span class="o">.</span><span class="n">ui</span><span class="p">,</span> <span class="n">path</span><span class="o">=</span><span class="s2">"/"</span><span class="p">)</span>
|
|
</span></code></pre></div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</article>
|
|
</div>
|
|
|
|
|
|
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
|
</div>
|
|
|
|
</main>
|
|
|
|
<footer class="md-footer">
|
|
|
|
<div class="md-footer-meta md-typeset">
|
|
<div class="md-footer-meta__inner md-grid">
|
|
<div class="md-copyright">
|
|
|
|
|
|
Made with
|
|
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
|
|
Material for MkDocs
|
|
</a>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
</div>
|
|
</footer>
|
|
|
|
</div>
|
|
<div class="md-dialog" data-md-component="dialog">
|
|
<div class="md-dialog__inner md-typeset"></div>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<script id="__config" type="application/json">{"base": "../..", "features": ["content.code.copy", "content.code.annotate", "navigation.indexes"], "search": "../../assets/javascripts/workers/search.d50fe291.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
|
|
|
|
|
<script src="../../assets/javascripts/bundle.13a4f30d.min.js"></script>
|
|
|
|
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
|
|
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
|
|
|
|
|
|
</body>
|
|
</html> |