Files
gradio-webrtc/userguide/audio-video/index.html
2025-06-17 12:04:47 +00:00

956 lines
24 KiB
HTML

<!doctype html>
<html lang="en" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<link rel="canonical" href="https://fastrtc.org/userguide/audio-video/">
<link rel="prev" href="../video/">
<link rel="next" href="../gradio/">
<link rel="icon" href="../../fastrtc_logo.png">
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.6.14">
<title>Audio-Video Streaming - FastRTC</title>
<link rel="stylesheet" href="../../assets/stylesheets/main.342714a4.min.css">
<link rel="stylesheet" href="../../assets/stylesheets/palette.06af60db.min.css">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
<link rel="stylesheet" href="../../stylesheets/extra.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github-dark.min.css">
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
</head>
<body dir="ltr" data-md-color-scheme="fastrtc-dark" data-md-color-primary="indigo" data-md-color-accent="indigo">
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
<a href="#audio-video-streaming" class="md-skip">
Skip to content
</a>
</div>
<div data-md-component="announce">
</div>
<header class="md-header md-header--shadow" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="../.." title="FastRTC"
class="md-header__button md-logo" aria-label="FastRTC" data-md-component="logo">
<img src="../../fastrtc_logo.png" alt="logo">
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
FastRTC
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
Audio-Video Streaming
</span>
</div>
</div>
</div>
<div style="display: flex; align-items: center; margin-right: 1rem;">
<a href="https://hf.co/fastrtc" target="_blank" rel="noopener noreferrer">
<img src="/hf-logo.svg"
onerror="this.onerror=null; this.src='https://huggingface.co/datasets/freddyaboulton/bucket/resolve/main/hf-logo.svg';"
style="height: 24px; margin-right: 10px;">
</a>
<a href="https://gradio.app" target="_blank" rel="noopener noreferrer">
<img src="/gradio-logo.svg"
onerror="this.onerror=null; this.src='https://huggingface.co/datasets/freddyaboulton/bucket/resolve/main/gradio-logo.svg';"
style="height: 24px; margin-right: 10px;">
</a>
<a href="https://discord.gg/TSWU7HyaYu" target="_blank" rel="noopener noreferrer">
<img src="/Discord-Symbol-White.svg" style="height: 16px; margin-right: 10px;">
</a>
</div>
<div class="md-header__source">
<a href="https://github.com/gradio-app/fastrtc" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
</div>
<div class="md-source__repository">
fastrtc
</div>
</a>
</div>
</nav>
</header>
<div class="md-container" data-md-component="container">
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../.." title="FastRTC" class="md-nav__button md-logo" aria-label="FastRTC" data-md-component="logo">
<img src="../../fastrtc_logo.png" alt="logo">
</a>
FastRTC
</label>
<div class="md-nav__source">
<a href="https://github.com/gradio-app/fastrtc" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
</div>
<div class="md-source__repository">
fastrtc
</div>
</a>
</div>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../.." class="md-nav__link">
<span class="md-ellipsis">
Home
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" checked>
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
<span class="md-ellipsis">
User Guide
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_2">
<span class="md-nav__icon md-icon"></span>
User Guide
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../streams/" class="md-nav__link">
<span class="md-ellipsis">
Core Concepts
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../audio/" class="md-nav__link">
<span class="md-ellipsis">
Audio Streaming
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../video/" class="md-nav__link">
<span class="md-ellipsis">
Video Streaming
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--active">
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
<a href="./" class="md-nav__link md-nav__link--active">
<span class="md-ellipsis">
Audio-Video Streaming
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../gradio/" class="md-nav__link">
<span class="md-ellipsis">
Gradio
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../api/" class="md-nav__link">
<span class="md-ellipsis">
API
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../cookbook/" class="md-nav__link">
<span class="md-ellipsis">
Cookbook
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../deployment/" class="md-nav__link">
<span class="md-ellipsis">
Deployment
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../advanced-configuration/" class="md-nav__link">
<span class="md-ellipsis">
Advanced Configuration
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6" >
<label class="md-nav__link" for="__nav_6" id="__nav_6_label" tabindex="0">
<span class="md-ellipsis">
Plugin Ecosystem
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_6_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_6">
<span class="md-nav__icon md-icon"></span>
Plugin Ecosystem
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../text_to_speech_gallery/" class="md-nav__link">
<span class="md-ellipsis">
Text-to-Speech Gallery
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../speech_to_text_gallery/" class="md-nav__link">
<span class="md-ellipsis">
Speech-to-Text Gallery
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../turn_taking_gallery/" class="md-nav__link">
<span class="md-ellipsis">
Turn-taking Gallery
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../utils/" class="md-nav__link">
<span class="md-ellipsis">
Utils
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../faq/" class="md-nav__link">
<span class="md-ellipsis">
Frequently Asked Questions
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_9" >
<label class="md-nav__link" for="__nav_9" id="__nav_9_label" tabindex="0">
<span class="md-ellipsis">
API Reference
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_9_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_9">
<span class="md-nav__icon md-icon"></span>
API Reference
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../reference/stream/" class="md-nav__link">
<span class="md-ellipsis">
Stream
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../reference/reply_on_pause/" class="md-nav__link">
<span class="md-ellipsis">
Pause Detection Handlers
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../reference/stream_handlers/" class="md-nav__link">
<span class="md-ellipsis">
Stream Handlers
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../reference/utils/" class="md-nav__link">
<span class="md-ellipsis">
Utils
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../reference/credentials/" class="md-nav__link">
<span class="md-ellipsis">
TURN Credentials
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<article class="md-content__inner md-typeset">
<h1 id="audio-video-streaming">Audio-Video Streaming</h1>
<p>You can simultaneously stream audio and video using <code>AudioVideoStreamHandler</code> or <code>AsyncAudioVideoStreamHandler</code>.
They are identical to the audio <code>StreamHandlers</code> with the addition of <code>video_receive</code> and <code>video_emit</code> methods which take and return a <code>numpy</code> array, respectively.</p>
<p>Here is an example of the video handling functions for connecting with the Gemini multimodal API. In this case, we simply reflect the webcam feed back to the user but every second we'll send the latest webcam frame (and an additional image component) to the Gemini server.</p>
<p>Please see the "Gemini Audio Video Chat" example in the <a href="../../cookbook">cookbook</a> for the complete code.</p>
<div class="language-python highlight"><span class="filename">Async Gemini Video Handling</span><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="nf">video_receive</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">frame</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Send video frames to the server&quot;&quot;&quot;</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">session</span><span class="p">:</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> <span class="c1"># send image every 1 second</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a> <span class="c1"># otherwise we flood the API</span>
</span><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a> <span class="k">if</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_frame_time</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
</span><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7" href="#__codelineno-0-7"></a> <span class="bp">self</span><span class="o">.</span><span class="n">last_frame_time</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8" href="#__codelineno-0-8"></a> <span class="k">await</span> <span class="bp">self</span><span class="o">.</span><span class="n">session</span><span class="o">.</span><span class="n">send</span><span class="p">(</span><span class="n">encode_image</span><span class="p">(</span><span class="n">frame</span><span class="p">))</span>
</span><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9" href="#__codelineno-0-9"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">latest_args</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
</span><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10" href="#__codelineno-0-10"></a> <span class="k">await</span> <span class="bp">self</span><span class="o">.</span><span class="n">session</span><span class="o">.</span><span class="n">send</span><span class="p">(</span><span class="n">encode_image</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">latest_args</span><span class="p">[</span><span class="mi">2</span><span class="p">]))</span>
</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11" href="#__codelineno-0-11"></a> <span class="bp">self</span><span class="o">.</span><span class="n">video_queue</span><span class="o">.</span><span class="n">put_nowait</span><span class="p">(</span><span class="n">frame</span><span class="p">)</span>
</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12" href="#__codelineno-0-12"></a>
</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13" href="#__codelineno-0-13"></a><span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="nf">video_emit</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">VideoEmitType</span><span class="p">:</span>
</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14" href="#__codelineno-0-14"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Return video frames to the client&quot;&quot;&quot;</span>
</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15" href="#__codelineno-0-15"></a> <span class="k">return</span> <span class="k">await</span> <span class="bp">self</span><span class="o">.</span><span class="n">video_queue</span><span class="o">.</span><span class="n">get</span><span class="p">()</span>
</span></code></pre></div>
</article>
</div>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
</main>
<footer class="md-footer">
<div class="md-footer-meta md-typeset">
<div class="md-footer-meta__inner md-grid">
<div class="md-copyright">
Made with
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
Material for MkDocs
</a>
</div>
</div>
</div>
</footer>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<script id="__config" type="application/json">{"base": "../..", "features": ["content.code.copy", "content.code.annotate", "navigation.indexes"], "search": "../../assets/javascripts/workers/search.d50fe291.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script src="../../assets/javascripts/bundle.13a4f30d.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
</body>
</html>