mirror of
https://github.com/HumanAIGC-Engineering/gradio-webrtc.git
synced 2026-02-04 17:39:23 +08:00
1118 lines
32 KiB
HTML
1118 lines
32 KiB
HTML
|
|
<!doctype html>
|
|
<html lang="en" class="no-js">
|
|
<head>
|
|
|
|
<meta charset="utf-8">
|
|
<meta name="viewport" content="width=device-width,initial-scale=1">
|
|
|
|
|
|
|
|
<link rel="canonical" href="https://fastrtc.org/speech_to_text_gallery/">
|
|
|
|
|
|
<link rel="prev" href="../text_to_speech_gallery/">
|
|
|
|
|
|
<link rel="next" href="../turn_taking_gallery/">
|
|
|
|
|
|
<link rel="icon" href="../fastrtc_logo.png">
|
|
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.6.14">
|
|
|
|
|
|
|
|
<title>Speech-to-Text Gallery - FastRTC</title>
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../assets/stylesheets/main.342714a4.min.css">
|
|
|
|
|
|
<link rel="stylesheet" href="../assets/stylesheets/palette.06af60db.min.css">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
|
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../stylesheets/extra.css">
|
|
|
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github-dark.min.css">
|
|
|
|
<script>__md_scope=new URL("..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</head>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<body dir="ltr" data-md-color-scheme="fastrtc-dark" data-md-color-primary="indigo" data-md-color-accent="indigo">
|
|
|
|
|
|
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
|
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
|
<label class="md-overlay" for="__drawer"></label>
|
|
<div data-md-component="skip">
|
|
|
|
|
|
<a href="#how-to-add-your-own-stt-model" class="md-skip">
|
|
Skip to content
|
|
</a>
|
|
|
|
</div>
|
|
<div data-md-component="announce">
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<header class="md-header md-header--shadow" data-md-component="header">
|
|
<nav class="md-header__inner md-grid" aria-label="Header">
|
|
<a href=".." title="FastRTC"
|
|
class="md-header__button md-logo" aria-label="FastRTC" data-md-component="logo">
|
|
|
|
<img src="../fastrtc_logo.png" alt="logo">
|
|
|
|
</a>
|
|
<label class="md-header__button md-icon" for="__drawer">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
|
|
</label>
|
|
<div class="md-header__title" data-md-component="header-title">
|
|
<div class="md-header__ellipsis">
|
|
<div class="md-header__topic">
|
|
<span class="md-ellipsis">
|
|
FastRTC
|
|
</span>
|
|
</div>
|
|
<div class="md-header__topic" data-md-component="header-topic">
|
|
<span class="md-ellipsis">
|
|
|
|
Speech-to-Text Gallery
|
|
|
|
</span>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<div style="display: flex; align-items: center; margin-right: 1rem;">
|
|
<a href="https://hf.co/fastrtc" target="_blank" rel="noopener noreferrer">
|
|
<img src="/hf-logo.svg"
|
|
onerror="this.onerror=null; this.src='https://huggingface.co/datasets/freddyaboulton/bucket/resolve/main/hf-logo.svg';"
|
|
style="height: 24px; margin-right: 10px;">
|
|
</a>
|
|
<a href="https://gradio.app" target="_blank" rel="noopener noreferrer">
|
|
<img src="/gradio-logo.svg"
|
|
onerror="this.onerror=null; this.src='https://huggingface.co/datasets/freddyaboulton/bucket/resolve/main/gradio-logo.svg';"
|
|
style="height: 24px; margin-right: 10px;">
|
|
</a>
|
|
<a href="https://discord.gg/TSWU7HyaYu" target="_blank" rel="noopener noreferrer">
|
|
<img src="/Discord-Symbol-White.svg" style="height: 16px; margin-right: 10px;">
|
|
</a>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="md-header__source">
|
|
<a href="https://github.com/gradio-app/fastrtc" title="Go to repository" class="md-source" data-md-component="source">
|
|
<div class="md-source__icon md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
|
|
</div>
|
|
<div class="md-source__repository">
|
|
fastrtc
|
|
</div>
|
|
</a>
|
|
</div>
|
|
|
|
</nav>
|
|
|
|
</header>
|
|
|
|
<div class="md-container" data-md-component="container">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<main class="md-main" data-md-component="main">
|
|
<div class="md-main__inner md-grid">
|
|
|
|
|
|
|
|
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
|
<div class="md-sidebar__scrollwrap">
|
|
<div class="md-sidebar__inner">
|
|
|
|
|
|
|
|
|
|
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
|
<label class="md-nav__title" for="__drawer">
|
|
<a href=".." title="FastRTC" class="md-nav__button md-logo" aria-label="FastRTC" data-md-component="logo">
|
|
|
|
<img src="../fastrtc_logo.png" alt="logo">
|
|
|
|
</a>
|
|
FastRTC
|
|
</label>
|
|
|
|
<div class="md-nav__source">
|
|
<a href="https://github.com/gradio-app/fastrtc" title="Go to repository" class="md-source" data-md-component="source">
|
|
<div class="md-source__icon md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
|
|
</div>
|
|
<div class="md-source__repository">
|
|
fastrtc
|
|
</div>
|
|
</a>
|
|
</div>
|
|
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href=".." class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Home
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
User Guide
|
|
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_2">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
User Guide
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../userguide/streams/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Core Concepts
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../userguide/audio/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Audio Streaming
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../userguide/video/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Video Streaming
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../userguide/audio-video/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Audio-Video Streaming
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../userguide/gradio/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Gradio
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../userguide/api/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
API
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../cookbook/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Cookbook
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../deployment/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Deployment
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../advanced-configuration/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Advanced Configuration
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6" checked>
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_6" id="__nav_6_label" tabindex="0">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Plugin Ecosystem
|
|
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_6_label" aria-expanded="true">
|
|
<label class="md-nav__title" for="__nav_6">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Plugin Ecosystem
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../text_to_speech_gallery/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Text-to-Speech Gallery
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--active">
|
|
|
|
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
|
|
|
|
|
|
|
|
<label class="md-nav__link md-nav__link--active" for="__toc">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Speech-to-Text Gallery
|
|
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<a href="./" class="md-nav__link md-nav__link--active">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Speech-to-Text Gallery
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
|
|
|
|
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|
|
|
|
|
|
|
|
|
<label class="md-nav__title" for="__toc">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Table of contents
|
|
</label>
|
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#how-to-add-your-own-stt-model" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
How to add your own STT model
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../turn_taking_gallery/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Turn-taking Gallery
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../utils/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Utils
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../faq/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Frequently Asked Questions
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_9" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_9" id="__nav_9_label" tabindex="0">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
API Reference
|
|
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_9_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_9">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
API Reference
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../reference/stream/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Stream
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../reference/reply_on_pause/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Pause Detection Handlers
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../reference/stream_handlers/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Stream Handlers
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../reference/utils/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Utils
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../reference/credentials/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
TURN Credentials
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
|
|
<div class="md-sidebar__scrollwrap">
|
|
<div class="md-sidebar__inner">
|
|
|
|
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|
|
|
|
|
|
|
|
|
<label class="md-nav__title" for="__toc">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Table of contents
|
|
</label>
|
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#how-to-add-your-own-stt-model" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
How to add your own STT model
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="md-content" data-md-component="content">
|
|
<article class="md-content__inner md-typeset">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<h1>Speech-to-Text Gallery</h1>
|
|
|
|
<style>
|
|
.tag-button {
|
|
cursor: pointer;
|
|
opacity: 0.5;
|
|
transition: opacity 0.2s ease;
|
|
}
|
|
|
|
.tag-button > code {
|
|
color: var(--supernova);
|
|
}
|
|
|
|
.tag-button.active {
|
|
opacity: 1;
|
|
}
|
|
</style>
|
|
|
|
<p>A collection of Speech-to-Text models ready to use with FastRTC. Click on the tags below to find the STT model you're looking for!</p>
|
|
<div class="admonition tip">
|
|
<p class="admonition-title">Note</p>
|
|
<p>The model you want to use does not have to be in the gallery. This is just a collection of models with a common interface that are easy to "plug and play" into your FastRTC app. But You can use any model you want without having to do any special setup. Simply use it from your stream handler!</p>
|
|
</div>
|
|
<div class="tag-buttons">
|
|
<button class="tag-button" data-tag="pytorch"><code>pytorch</code></button>
|
|
</div>
|
|
|
|
<script>
|
|
function filterCards() {
|
|
const activeButtons = document.querySelectorAll('.tag-button.active');
|
|
const selectedTags = Array.from(activeButtons).map(button => button.getAttribute('data-tag'));
|
|
const cards = document.querySelectorAll('.grid.cards > ul > li > p[data-tags]');
|
|
|
|
cards.forEach(card => {
|
|
const cardTags = card.getAttribute('data-tags').split(',');
|
|
const shouldShow = selectedTags.length === 0 || selectedTags.some(tag => cardTags.includes(tag));
|
|
card.parentElement.style.display = shouldShow ? 'block' : 'none';
|
|
});
|
|
}
|
|
document.querySelectorAll('.tag-button').forEach(button => {
|
|
button.addEventListener('click', () => {
|
|
button.classList.toggle('active');
|
|
filterCards();
|
|
});
|
|
});
|
|
</script>
|
|
|
|
<div class="grid cards">
|
|
<ul>
|
|
<li>
|
|
<p data-tags="pytorch"><img alt="🗣" class="twemoji lg middle" src="https://cdn.jsdelivr.net/gh/jdecked/twemoji@15.1.0/assets/svg/1f5e3.svg" title=":speaking_head:" /><img alt="👀" class="twemoji lg middle" src="https://cdn.jsdelivr.net/gh/jdecked/twemoji@15.1.0/assets/svg/1f440.svg" title=":eyes:" /> distil-whisper-FastRTC</p>
|
|
<hr />
|
|
<p>Description:
|
|
<a href="https://github.com/huggingface/distil-whisper">Distil-whisper</a> from Hugging Face wrapped in a pypi package for plug and play!</p>
|
|
<p>Install Instructions
|
|
<div class="language-python highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="n">pip</span> <span class="n">install</span> <span class="n">distil</span><span class="o">-</span><span class="n">whisper</span><span class="o">-</span><span class="n">fastrtc</span>
|
|
</span></code></pre></div>
|
|
Use it the same way you would the native fastRTC TTS model!</p>
|
|
<p><a href="https://huggingface.co/spaces/Codeblockz/llm-voice-chat/"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13.22 19.03a.75.75 0 0 1 0-1.06L18.19 13H3.75a.75.75 0 0 1 0-1.5h14.44l-4.97-4.97a.749.749 0 0 1 .326-1.275.75.75 0 0 1 .734.215l6.25 6.25a.75.75 0 0 1 0 1.06l-6.25 6.25a.75.75 0 0 1-1.06 0"/></svg></span> Demo</a></p>
|
|
<p><a href="https://github.com/Codeblockz/distil-whisper-FastRTC"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path d="m11.28 3.22 4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.275-.326.75.75 0 0 1 .215-.734L13.94 8l-3.72-3.72a.749.749 0 0 1 .326-1.275.75.75 0 0 1 .734.215m-6.56 0a.75.75 0 0 1 1.042.018.75.75 0 0 1 .018 1.042L2.06 8l3.72 3.72a.749.749 0 0 1-.326 1.275.75.75 0 0 1-.734-.215L.47 8.53a.75.75 0 0 1 0-1.06Z"/></svg></span> Repository</a></p>
|
|
</li>
|
|
<li>
|
|
<p data-tags="sherpa-onnx"><img alt="🗣" class="twemoji lg middle" src="https://cdn.jsdelivr.net/gh/jdecked/twemoji@15.1.0/assets/svg/1f5e3.svg" title=":speaking_head:" /><img alt="👀" class="twemoji lg middle" src="https://cdn.jsdelivr.net/gh/jdecked/twemoji@15.1.0/assets/svg/1f440.svg" title=":eyes:" /> Kroko-ASR </p>
|
|
<hr />
|
|
<p>Description
|
|
<a href="https://huggingface.co/Banafo/Kroko-ASR">Kroko-ASR</a> is a lightweight TTS model </p>
|
|
<p>Install Instructions
|
|
<div class="language-python highlight"><pre><span></span><code><span id="__span-1-1"><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a><span class="n">pip</span> <span class="n">install</span> <span class="n">fastrtc</span><span class="o">-</span><span class="n">kroko</span>
|
|
</span></code></pre></div>
|
|
Check out the fastRTC-Kroko docs for examples!</p>
|
|
<p><a href="https://github.com/sgarg26/fastrtc-kroko"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path d="m11.28 3.22 4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.275-.326.75.75 0 0 1 .215-.734L13.94 8l-3.72-3.72a.749.749 0 0 1 .326-1.275.75.75 0 0 1 .734.215m-6.56 0a.75.75 0 0 1 1.042.018.75.75 0 0 1 .018 1.042L2.06 8l3.72 3.72a.749.749 0 0 1-.326 1.275.75.75 0 0 1-.734-.215L.47 8.53a.75.75 0 0 1 0-1.06Z"/></svg></span> Repository</a></p>
|
|
</li>
|
|
<li>
|
|
<p data-tags="whisper-cpp"><img alt="🗣" class="twemoji lg middle" src="https://cdn.jsdelivr.net/gh/jdecked/twemoji@15.1.0/assets/svg/1f5e3.svg" title=":speaking_head:" /><img alt="👀" class="twemoji lg middle" src="https://cdn.jsdelivr.net/gh/jdecked/twemoji@15.1.0/assets/svg/1f440.svg" title=":eyes:" /> fastrtc-whisper-cpp </p>
|
|
<hr />
|
|
<p>Description:
|
|
<a href="https://huggingface.co/ggerganov/whisper.cpp">whisper.cpp</a> is the ggml version of OpenAI's Whisper model. </p>
|
|
<p>Install Instructions
|
|
<div class="language-python highlight"><pre><span></span><code><span id="__span-2-1"><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a><span class="n">pip</span> <span class="n">install</span> <span class="n">fastrtc</span><span class="o">-</span><span class="n">whisper</span><span class="o">-</span><span class="n">cpp</span>
|
|
</span></code></pre></div>
|
|
Check out the fastrtc-whisper-cpp docs for examples!</p>
|
|
<p><a href="https://github.com/mahimairaja/fastrtc-whisper-cpp"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path d="m11.28 3.22 4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.275-.326.75.75 0 0 1 .215-.734L13.94 8l-3.72-3.72a.749.749 0 0 1 .326-1.275.75.75 0 0 1 .734.215m-6.56 0a.75.75 0 0 1 1.042.018.75.75 0 0 1 .018 1.042L2.06 8l3.72 3.72a.749.749 0 0 1-.326 1.275.75.75 0 0 1-.734-.215L.47 8.53a.75.75 0 0 1 0-1.06Z"/></svg></span> Repository</a></p>
|
|
</li>
|
|
<li>
|
|
<p data-tags="pytorch"><img alt="🗣" class="twemoji lg middle" src="https://cdn.jsdelivr.net/gh/jdecked/twemoji@15.1.0/assets/svg/1f5e3.svg" title=":speaking_head:" /><img alt="👀" class="twemoji lg middle" src="https://cdn.jsdelivr.net/gh/jdecked/twemoji@15.1.0/assets/svg/1f440.svg" title=":eyes:" /> <strong>Your STT Model</strong></p>
|
|
<hr />
|
|
<p>Description</p>
|
|
<p>Install Instructions</p>
|
|
<p>Usage</p>
|
|
<p><a href="Your demo here"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13.22 19.03a.75.75 0 0 1 0-1.06L18.19 13H3.75a.75.75 0 0 1 0-1.5h14.44l-4.97-4.97a.749.749 0 0 1 .326-1.275.75.75 0 0 1 .734.215l6.25 6.25a.75.75 0 0 1 0 1.06l-6.25 6.25a.75.75 0 0 1-1.06 0"/></svg></span> Demo</a></p>
|
|
<p><a href="Code here"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path d="m11.28 3.22 4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.275-.326.75.75 0 0 1 .215-.734L13.94 8l-3.72-3.72a.749.749 0 0 1 .326-1.275.75.75 0 0 1 .734.215m-6.56 0a.75.75 0 0 1 1.042.018.75.75 0 0 1 .018 1.042L2.06 8l3.72 3.72a.749.749 0 0 1-.326 1.275.75.75 0 0 1-.734-.215L.47 8.53a.75.75 0 0 1 0-1.06Z"/></svg></span> Repository</a></p>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
<h2 id="how-to-add-your-own-stt-model">How to add your own STT model</h2>
|
|
<ol>
|
|
<li>
|
|
<p>Your model can be implemented in <strong>any</strong> framework you want but it must implement the <code>STTModel</code> protocol.</p>
|
|
<div class="language-python highlight"><pre><span></span><code><span id="__span-3-1"><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a><span class="k">class</span><span class="w"> </span><span class="nc">STTModel</span><span class="p">(</span><span class="n">Protocol</span><span class="p">):</span>
|
|
</span><span id="__span-3-2"><a id="__codelineno-3-2" name="__codelineno-3-2" href="#__codelineno-3-2"></a> <span class="k">def</span><span class="w"> </span><span class="nf">stt</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">audio</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">NDArray</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">int16</span> <span class="o">|</span> <span class="n">np</span><span class="o">.</span><span class="n">float32</span><span class="p">]])</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
|
</span></code></pre></div>
|
|
<ul>
|
|
<li>
|
|
<p>The <code>stt</code> method should take in an audio tuple <code>(sample_rate, audio_array)</code> and return a string of the transcribed text.</p>
|
|
</li>
|
|
<li>
|
|
<p>The <code>audio</code> tuple should be of the form <code>(sample_rate, audio_array)</code> where <code>sample_rate</code> is the sample rate of the audio array and <code>audio_array</code> is a numpy array of the audio data. It can be of type <code>np.int16</code> or <code>np.float32</code>.</p>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li>
|
|
<p>Once you have your model implemented, you can use it in your handler!</p>
|
|
<div class="language-python highlight"><pre><span></span><code><span id="__span-4-1"><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">fastrtc</span><span class="w"> </span><span class="kn">import</span> <span class="n">Stream</span><span class="p">,</span> <span class="n">AdditionalOutputs</span><span class="p">,</span> <span class="n">ReplyOnPause</span>
|
|
</span><span id="__span-4-2"><a id="__codelineno-4-2" name="__codelineno-4-2" href="#__codelineno-4-2"></a><span class="kn">from</span><span class="w"> </span><span class="nn">your_model</span><span class="w"> </span><span class="kn">import</span> <span class="n">YourModel</span>
|
|
</span><span id="__span-4-3"><a id="__codelineno-4-3" name="__codelineno-4-3" href="#__codelineno-4-3"></a>
|
|
</span><span id="__span-4-4"><a id="__codelineno-4-4" name="__codelineno-4-4" href="#__codelineno-4-4"></a><span class="n">model</span> <span class="o">=</span> <span class="n">YourModel</span><span class="p">()</span> <span class="c1"># implement the STTModel protocol</span>
|
|
</span><span id="__span-4-5"><a id="__codelineno-4-5" name="__codelineno-4-5" href="#__codelineno-4-5"></a>
|
|
</span><span id="__span-4-6"><a id="__codelineno-4-6" name="__codelineno-4-6" href="#__codelineno-4-6"></a><span class="k">def</span><span class="w"> </span><span class="nf">echo</span><span class="p">(</span><span class="n">audio</span><span class="p">):</span>
|
|
</span><span id="__span-4-7"><a id="__codelineno-4-7" name="__codelineno-4-7" href="#__codelineno-4-7"></a> <span class="n">text</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">stt</span><span class="p">(</span><span class="n">audio</span><span class="p">)</span>
|
|
</span><span id="__span-4-8"><a id="__codelineno-4-8" name="__codelineno-4-8" href="#__codelineno-4-8"></a> <span class="k">yield</span> <span class="n">AdditionalOutputs</span><span class="p">(</span><span class="n">text</span><span class="p">)</span>
|
|
</span><span id="__span-4-9"><a id="__codelineno-4-9" name="__codelineno-4-9" href="#__codelineno-4-9"></a>
|
|
</span><span id="__span-4-10"><a id="__codelineno-4-10" name="__codelineno-4-10" href="#__codelineno-4-10"></a><span class="n">stream</span> <span class="o">=</span> <span class="n">Stream</span><span class="p">(</span><span class="n">ReplyOnPause</span><span class="p">(</span><span class="n">echo</span><span class="p">),</span> <span class="n">mode</span><span class="o">=</span><span class="s2">"send-receive"</span><span class="p">,</span> <span class="n">modality</span><span class="o">=</span><span class="s2">"audio"</span><span class="p">,</span>
|
|
</span><span id="__span-4-11"><a id="__codelineno-4-11" name="__codelineno-4-11" href="#__codelineno-4-11"></a> <span class="n">additional_outputs</span><span class="o">=</span><span class="p">[</span><span class="n">gr</span><span class="o">.</span><span class="n">Textbox</span><span class="p">(</span><span class="n">label</span><span class="o">=</span><span class="s2">"Transcription"</span><span class="p">)],</span>
|
|
</span><span id="__span-4-12"><a id="__codelineno-4-12" name="__codelineno-4-12" href="#__codelineno-4-12"></a> <span class="n">additional_outputs_handler</span><span class="o">=</span><span class="k">lambda</span> <span class="n">old</span><span class="p">,</span><span class="n">new</span><span class="p">:</span><span class="n">old</span> <span class="o">+</span> <span class="n">new</span><span class="p">)</span>
|
|
</span><span id="__span-4-13"><a id="__codelineno-4-13" name="__codelineno-4-13" href="#__codelineno-4-13"></a><span class="n">stream</span><span class="o">.</span><span class="n">ui</span><span class="o">.</span><span class="n">launch</span><span class="p">()</span>
|
|
</span></code></pre></div>
|
|
</li>
|
|
<li>
|
|
<p>Open a <a href="https://github.com/freddyaboulton/fastrtc/edit/main/docs/speech_to_text_gallery.md">PR</a> to add your model to the gallery! Ideally your model package should be pip installable so others can try it out easily.</p>
|
|
</li>
|
|
</ol>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</article>
|
|
</div>
|
|
|
|
|
|
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
|
</div>
|
|
|
|
</main>
|
|
|
|
<footer class="md-footer">
|
|
|
|
<div class="md-footer-meta md-typeset">
|
|
<div class="md-footer-meta__inner md-grid">
|
|
<div class="md-copyright">
|
|
|
|
|
|
Made with
|
|
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
|
|
Material for MkDocs
|
|
</a>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
</div>
|
|
</footer>
|
|
|
|
</div>
|
|
<div class="md-dialog" data-md-component="dialog">
|
|
<div class="md-dialog__inner md-typeset"></div>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<script id="__config" type="application/json">{"base": "..", "features": ["content.code.copy", "content.code.annotate", "navigation.indexes"], "search": "../assets/javascripts/workers/search.d50fe291.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
|
|
|
|
|
<script src="../assets/javascripts/bundle.13a4f30d.min.js"></script>
|
|
|
|
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
|
|
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
|
|
|
|
|
|
</body>
|
|
</html> |