Cloudflare turn integration (#264)

* Turn integration

* Add code:

* type hint

* Fix typehint

* add code

* format

* WIP

* trickle ice

* bump version

* Better docs

* Modify

* code

* Mute icon for whisper

* Add code

* llama 4 demo

* code

* OpenAI interruptions

* fix docs
This commit is contained in:
Freddy Boulton
2025-04-09 09:36:51 -04:00
committed by GitHub
parent f70b27bd41
commit 837330dcd8
37 changed files with 2914 additions and 780 deletions

View File

@@ -33,7 +33,7 @@ jobs:
path: .cache
restore-keys: |
mkdocs-material-
- run: pip install mkdocs-material mkdocs-llmstxt
- run: pip install mkdocs-material mkdocs-llmstxt==0.1.0
- name: Build docs
run: mkdocs build

View File

@@ -1,6 +1,10 @@
from .credentials import (
get_cloudflare_turn_credentials,
get_cloudflare_turn_credentials_async,
get_hf_turn_credentials,
get_hf_turn_credentials_async,
get_turn_credentials,
get_turn_credentials_async,
get_twilio_turn_credentials,
)
from .pause_detection import (
@@ -70,6 +74,10 @@ __all__ = [
"Warning",
"get_tts_model",
"KokoroTTSOptions",
"get_cloudflare_turn_credentials_async",
"get_hf_turn_credentials_async",
"get_turn_credentials_async",
"get_cloudflare_turn_credentials",
"wait_for_item",
"UIArgs",
"ModelOptions",

View File

@@ -1,29 +1,268 @@
import os
import warnings
from typing import Literal
import requests
import httpx
CLOUDFLARE_FASTRTC_TURN_URL = "https://turn.fastrtc.org/credentials"
async_httpx_client = httpx.AsyncClient()
def get_hf_turn_credentials(token=None):
def _format_response(response):
if response.is_success:
return response.json()
else:
raise Exception(
f"Failed to get TURN credentials: {response.status_code} {response.text}"
)
def get_hf_turn_credentials(token=None, ttl=600):
"""Retrieves TURN credentials from Hugging Face (deprecated).
This function fetches TURN server credentials using a Hugging Face token.
It is deprecated and `get_cloudflare_turn_credentials` should be used instead.
Args:
token (str, optional): Hugging Face API token. Defaults to None, in which
case the HF_TOKEN environment variable is used.
ttl (int, optional): Time-to-live for the credentials in seconds.
Defaults to 600.
Returns:
dict: A dictionary containing the TURN credentials.
Raises:
ValueError: If no token is provided and the HF_TOKEN environment variable
is not set.
Exception: If the request to the TURN server fails.
"""
warnings.warn(
"get_hf_turn_credentials is deprecated. Use get_cloudflare_turn_credentials instead.",
UserWarning,
)
if token is None:
token = os.getenv("HF_TOKEN")
credentials = requests.get(
"https://fastrtc-turn-server-login.hf.space/credentials",
headers={"X-HF-Access-Token": token},
if token is None:
raise ValueError(
"HF_TOKEN environment variable must be set or token must be provided to use get_hf_turn_credentials"
)
response = httpx.get(
CLOUDFLARE_FASTRTC_TURN_URL,
headers={
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
},
params={"ttl": ttl},
)
if not credentials.status_code == 200:
raise ValueError("Failed to get credentials from HF turn server")
return {
"iceServers": [
{
"urls": "turn:gradio-turn.com:80",
**credentials.json(),
return _format_response(response)
async def get_hf_turn_credentials_async(
token=None, ttl=600, client: httpx.AsyncClient | None = None
):
"""Asynchronously retrieves TURN credentials from Hugging Face (deprecated).
This function asynchronously fetches TURN server credentials using a Hugging Face
token. It is deprecated and `get_cloudflare_turn_credentials_async` should be
used instead.
Args:
token (str, optional): Hugging Face API token. Defaults to None, in which
case the HF_TOKEN environment variable is used.
ttl (int, optional): Time-to-live for the credentials in seconds.
Defaults to 600.
client (httpx.AsyncClient | None, optional): An existing httpx async client
to use for the request. If None, a default client is used. Defaults to None.
Returns:
dict: A dictionary containing the TURN credentials.
Raises:
ValueError: If no token is provided and the HF_TOKEN environment variable
is not set.
Exception: If the request to the TURN server fails.
"""
warnings.warn(
"get_hf_turn_credentials_async is deprecated. Use get_cloudflare_turn_credentials_async instead.",
UserWarning,
)
if client is None:
client = async_httpx_client
if token is None:
token = os.getenv("HF_TOKEN")
if token is None:
raise ValueError(
"HF_TOKEN environment variable must be set or token must be provided to use get_hf_turn_credentials"
)
async with client:
response = await client.get(
"https://turn.fastrtc.org/credentials",
headers={"Authorization": f"Bearer {token}"},
params={"ttl": ttl},
)
return _format_response(response)
def get_cloudflare_turn_credentials(
turn_key_id=None, turn_key_api_token=None, hf_token=None, ttl=600
):
"""Retrieves TURN credentials from Cloudflare or Hugging Face.
Fetches TURN server credentials either directly from Cloudflare using API keys
or via the Hugging Face TURN endpoint using an HF token. The HF token method
takes precedence if provided.
Args:
turn_key_id (str, optional): Cloudflare TURN key ID. Defaults to None,
in which case the CLOUDFLARE_TURN_KEY_ID environment variable is used.
turn_key_api_token (str, optional): Cloudflare TURN key API token.
Defaults to None, in which case the CLOUDFLARE_TURN_KEY_API_TOKEN
environment variable is used.
hf_token (str, optional): Hugging Face API token. If provided, this method
is used instead of Cloudflare keys. Defaults to None, in which case
the HF_TOKEN environment variable is used.
ttl (int, optional): Time-to-live for the credentials in seconds.
Defaults to 600.
Returns:
dict: A dictionary containing the TURN credentials (ICE servers).
Raises:
ValueError: If neither HF token nor Cloudflare keys (either as arguments
or environment variables) are provided.
Exception: If the request to the credential server fails.
"""
if hf_token is None:
hf_token = os.getenv("HF_TOKEN")
if hf_token is not None:
return httpx.get(
CLOUDFLARE_FASTRTC_TURN_URL,
headers={"Authorization": f"Bearer {hf_token}"},
params={"ttl": ttl},
).json()
else:
if turn_key_id is None or turn_key_api_token is None:
turn_key_id = os.getenv("CLOUDFLARE_TURN_KEY_ID")
turn_key_api_token = os.getenv("CLOUDFLARE_TURN_KEY_API_TOKEN")
if turn_key_id is None or turn_key_api_token is None:
raise ValueError(
"HF_TOKEN or CLOUDFLARE_TURN_KEY_ID and CLOUDFLARE_TURN_KEY_API_TOKEN must be set to use get_cloudflare_turn_credentials_sync"
)
response = httpx.post(
f"https://rtc.live.cloudflare.com/v1/turn/keys/{turn_key_id}/credentials/generate-ice-servers",
headers={
"Authorization": f"Bearer {turn_key_api_token}",
"Content-Type": "application/json",
},
]
}
json={"ttl": ttl},
)
if response.is_success:
return response.json()
else:
raise Exception(
f"Failed to get TURN credentials: {response.status_code} {response.text}"
)
async def get_cloudflare_turn_credentials_async(
turn_key_id=None,
turn_key_api_token=None,
hf_token=None,
ttl=600,
client: httpx.AsyncClient | None = None,
):
"""Asynchronously retrieves TURN credentials from Cloudflare or Hugging Face.
Asynchronously fetches TURN server credentials either directly from Cloudflare
using API keys or via the Hugging Face TURN endpoint using an HF token. The HF
token method takes precedence if provided.
Args:
turn_key_id (str, optional): Cloudflare TURN key ID. Defaults to None,
in which case the CLOUDFLARE_TURN_KEY_ID environment variable is used.
turn_key_api_token (str, optional): Cloudflare TURN key API token.
Defaults to None, in which case the CLOUDFLARE_TURN_KEY_API_TOKEN
environment variable is used.
hf_token (str, optional): Hugging Face API token. If provided, this method
is used instead of Cloudflare keys. Defaults to None, in which case
the HF_TOKEN environment variable is used.
ttl (int, optional): Time-to-live for the credentials in seconds.
Defaults to 600.
client (httpx.AsyncClient | None, optional): An existing httpx async client
to use for the request. If None, a new client is created per request.
Defaults to None.
Returns:
dict: A dictionary containing the TURN credentials (ICE servers).
Raises:
ValueError: If neither HF token nor Cloudflare keys (either as arguments
or environment variables) are provided.
Exception: If the request to the credential server fails.
"""
if client is None:
client = async_httpx_client
if hf_token is None:
hf_token = os.getenv("HF_TOKEN", "").strip()
if hf_token is not None:
async with httpx.AsyncClient() as client:
response = await client.get(
CLOUDFLARE_FASTRTC_TURN_URL,
headers={"Authorization": f"Bearer {hf_token}"},
params={"ttl": ttl},
)
return _format_response(response)
else:
if turn_key_id is None or turn_key_api_token is None:
turn_key_id = os.getenv("CLOUDFLARE_TURN_KEY_ID")
turn_key_api_token = os.getenv("CLOUDFLARE_TURN_KEY_API_TOKEN")
if turn_key_id is None or turn_key_api_token is None:
raise ValueError(
"HF_TOKEN or CLOUDFLARE_TURN_KEY_ID and CLOUDFLARE_TURN_KEY_API_TOKEN must be set to use get_cloudflare_turn_credentials"
)
async with httpx.AsyncClient() as client:
response = await client.post(
f"https://rtc.live.cloudflare.com/v1/turn/keys/{turn_key_id}/credentials/generate-ice-servers",
headers={
"Authorization": f"Bearer {turn_key_api_token}",
"Content-Type": "application/json",
},
json={"ttl": ttl},
)
if response.is_success:
return response.json()
else:
raise Exception(
f"Failed to get TURN credentials: {response.status_code} {response.text}"
)
def get_twilio_turn_credentials(twilio_sid=None, twilio_token=None):
"""Retrieves TURN credentials from Twilio.
Uses the Twilio REST API to generate temporary TURN credentials. Requires
the `twilio` package to be installed.
Args:
twilio_sid (str, optional): Twilio Account SID. Defaults to None, in which
case the TWILIO_ACCOUNT_SID environment variable is used.
twilio_token (str, optional): Twilio Auth Token. Defaults to None, in which
case the TWILIO_AUTH_TOKEN environment variable is used.
Returns:
dict: A dictionary containing the TURN credentials formatted for WebRTC,
including 'iceServers' and 'iceTransportPolicy'.
Raises:
ImportError: If the `twilio` package is not installed.
ValueError: If Twilio credentials (SID and token) are not provided either
as arguments or environment variables.
TwilioRestException: If the Twilio API request fails.
"""
try:
from twilio.rest import Client
except ImportError:
@@ -43,10 +282,105 @@ def get_twilio_turn_credentials(twilio_sid=None, twilio_token=None):
}
def get_turn_credentials(method: Literal["hf", "twilio"] = "hf", **kwargs):
def get_turn_credentials(
method: Literal["hf", "twilio", "cloudflare"] = "cloudflare", **kwargs
):
"""Retrieves TURN credentials from the specified provider.
Acts as a dispatcher function to call the appropriate credential retrieval
function based on the method specified.
Args:
method (Literal["hf", "twilio", "cloudflare"], optional): The provider
to use. 'hf' uses the deprecated Hugging Face endpoint. 'cloudflare'
uses either Cloudflare keys or the HF endpoint. 'twilio' uses the
Twilio API. Defaults to "cloudflare".
**kwargs: Additional keyword arguments passed directly to the underlying
provider-specific function (e.g., `token`, `ttl` for 'hf';
`twilio_sid`, `twilio_token` for 'twilio'; `turn_key_id`,
`turn_key_api_token`, `hf_token`, `ttl` for 'cloudflare').
Returns:
dict: A dictionary containing the TURN credentials from the chosen provider.
Raises:
ValueError: If an invalid method is specified.
Also raises exceptions from the underlying provider functions (see their
docstrings).
"""
if method == "hf":
return get_hf_turn_credentials(**kwargs)
warnings.warn(
"Method 'hf' is deprecated. Use 'cloudflare' instead.", UserWarning
)
# Ensure only relevant kwargs are passed
hf_kwargs = {k: v for k, v in kwargs.items() if k in ["token", "ttl"]}
return get_hf_turn_credentials(**hf_kwargs)
elif method == "cloudflare":
# Ensure only relevant kwargs are passed
cf_kwargs = {
k: v
for k, v in kwargs.items()
if k in ["turn_key_id", "turn_key_api_token", "hf_token", "ttl"]
}
return get_cloudflare_turn_credentials(**cf_kwargs)
elif method == "twilio":
return get_twilio_turn_credentials(**kwargs)
# Ensure only relevant kwargs are passed
twilio_kwargs = {
k: v for k, v in kwargs.items() if k in ["twilio_sid", "twilio_token"]
}
return get_twilio_turn_credentials(**twilio_kwargs)
else:
raise ValueError("Invalid method. Must be 'hf' or 'twilio'")
raise ValueError("Invalid method. Must be 'hf', 'twilio', or 'cloudflare'")
async def get_turn_credentials_async(
method: Literal["hf", "twilio", "cloudflare"] = "cloudflare", **kwargs
):
"""Asynchronously retrieves TURN credentials from the specified provider.
Acts as an async dispatcher function to call the appropriate async credential
retrieval function based on the method specified.
Args:
method (Literal["hf", "twilio", "cloudflare"], optional): The provider
to use. 'hf' uses the deprecated Hugging Face endpoint. 'cloudflare'
uses either Cloudflare keys or the HF endpoint. 'twilio' is not
supported asynchronously by this function yet. Defaults to "cloudflare".
**kwargs: Additional keyword arguments passed directly to the underlying
provider-specific async function (e.g., `token`, `ttl`, `client` for 'hf';
`turn_key_id`, `turn_key_api_token`, `hf_token`, `ttl`, `client` for
'cloudflare').
Returns:
dict: A dictionary containing the TURN credentials from the chosen provider.
Raises:
ValueError: If an invalid or unsupported method is specified (currently
'twilio' is not supported asynchronously here).
NotImplementedError: If method 'twilio' is requested.
Also raises exceptions from the underlying provider functions (see their
docstrings).
"""
if method == "hf":
warnings.warn(
"Method 'hf' is deprecated. Use 'cloudflare' instead.", UserWarning
)
# Ensure only relevant kwargs are passed
hf_kwargs = {k: v for k, v in kwargs.items() if k in ["token", "ttl", "client"]}
return await get_hf_turn_credentials_async(**hf_kwargs)
elif method == "cloudflare":
# Ensure only relevant kwargs are passed
cf_kwargs = {
k: v
for k, v in kwargs.items()
if k in ["turn_key_id", "turn_key_api_token", "hf_token", "ttl", "client"]
}
return await get_cloudflare_turn_credentials_async(**cf_kwargs)
elif method == "twilio":
# Twilio client library doesn't have a standard async interface for this.
# You might need to run the sync version in an executor or use a different library.
raise NotImplementedError(
"Async retrieval for Twilio credentials is not implemented."
)
else:
raise ValueError("Invalid method. Must be 'hf', 'twilio', or 'cloudflare'")

View File

@@ -1,3 +1,4 @@
import inspect
import logging
from collections.abc import Callable
from contextlib import AbstractAsyncContextManager
@@ -9,6 +10,7 @@ from typing import (
cast,
)
import anyio
import gradio as gr
from fastapi import FastAPI, Request, WebSocket
from fastapi.responses import HTMLResponse
@@ -18,6 +20,7 @@ from pydantic import BaseModel
from typing_extensions import NotRequired
from .tracks import HandlerType, StreamHandlerImpl
from .utils import RTCConfigurationCallable
from .webrtc import WebRTC
from .webrtc_connection_mixin import WebRTCConnectionMixin
from .websocket import WebSocketHandler
@@ -98,7 +101,7 @@ class Stream(WebRTCConnectionMixin):
time_limit: float | None = None,
allow_extra_tracks: bool = False,
rtp_params: dict[str, Any] | None = None,
rtc_configuration: dict[str, Any] | None = None,
rtc_configuration: RTCConfigurationCallable | None = None,
track_constraints: dict[str, Any] | None = None,
additional_inputs: list[Component] | None = None,
additional_outputs: list[Component] | None = None,
@@ -116,7 +119,7 @@ class Stream(WebRTCConnectionMixin):
time_limit: Maximum execution time for the handler function in seconds.
allow_extra_tracks: If True, allows connections with tracks not matching the modality.
rtp_params: Optional dictionary of RTP encoding parameters.
rtc_configuration: Optional dictionary for RTCPeerConnection configuration (e.g., ICE servers).
rtc_configuration: Optional Callable or dictionary for RTCPeerConnection configuration (e.g., ICE servers).
Required when deploying on Colab or Spaces.
track_constraints: Optional dictionary of constraints for media tracks (e.g., resolution, frame rate).
additional_inputs: Optional list of extra Gradio input components.
@@ -749,6 +752,15 @@ class Stream(WebRTCConnectionMixin):
body.model_dump(), set_outputs=self.set_additional_outputs(body.webrtc_id)
)
async def get_rtc_configuration(self):
if inspect.isfunction(self.rtc_configuration):
if inspect.iscoroutinefunction(self.rtc_configuration):
return await self.rtc_configuration()
else:
return anyio.to_thread.run_sync(self.rtc_configuration) # type: ignore
else:
return self.rtc_configuration
async def handle_incoming_call(self, request: Request):
"""
Handle incoming telephone calls (e.g., via Twilio).

File diff suppressed because it is too large Load Diff

View File

@@ -7,7 +7,7 @@ import json
import logging
import tempfile
import traceback
from collections.abc import Callable
from collections.abc import Callable, Coroutine
from contextvars import ContextVar
from dataclasses import dataclass
from typing import Any, Literal, Protocol, TypedDict, cast
@@ -486,3 +486,15 @@ async def wait_for_item(queue: asyncio.Queue, timeout: float = 0.1) -> Any:
return await asyncio.wait_for(queue.get(), timeout=timeout)
except (TimeoutError, asyncio.TimeoutError):
return None
RTCConfigurationCallable = (
Callable[[], dict[str, Any]]
| Callable[[], Coroutine[dict[str, Any], Any, dict[str, Any]]]
| Callable[[str | None, str | None, str | None], dict[str, Any]]
| Callable[
[str | None, str | None, str | None],
Coroutine[dict[str, Any], Any, dict[str, Any]],
]
| dict[str, Any]
)

View File

@@ -2,6 +2,7 @@
from __future__ import annotations
import inspect
import logging
from collections.abc import Callable, Iterable, Sequence
from typing import (
@@ -14,6 +15,8 @@ from typing import (
cast,
)
import anyio
import anyio.to_thread
from gradio import wasm_utils
from gradio.components.base import Component, server
from gradio_client import handle_file
@@ -26,6 +29,7 @@ from .tracks import (
VideoEventHandler,
VideoStreamHandler,
)
from .utils import RTCConfigurationCallable
from .webrtc_connection_mixin import WebRTCConnectionMixin
if TYPE_CHECKING:
@@ -77,7 +81,7 @@ class WebRTC(Component, WebRTCConnectionMixin):
render: bool = True,
key: int | str | None = None,
mirror_webcam: bool = True,
rtc_configuration: dict[str, Any] | None = None,
rtc_configuration: dict[str, Any] | None | RTCConfigurationCallable = None,
track_constraints: dict[str, Any] | None = None,
time_limit: float | None = None,
allow_extra_tracks: bool = False,
@@ -359,6 +363,19 @@ class WebRTC(Component, WebRTCConnectionMixin):
concurrency_id=concurrency_id,
)
@server
async def turn(self, _):
try:
if inspect.isfunction(self.rtc_configuration):
if inspect.iscoroutinefunction(self.rtc_configuration):
return await self.rtc_configuration()
else:
return await anyio.to_thread.run_sync(self.rtc_configuration)
else:
return self.rtc_configuration or {}
except Exception as e:
return {"error": str(e)}
@server
async def offer(self, body):
return await self.handle_offer(

View File

@@ -9,7 +9,7 @@ app_file: app.py
pinned: false
license: mit
short_description: Talk to Gemini using Google's multimodal API
tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GEMINI_API_KEY]
tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GEMINI_API_KEY]
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

View File

@@ -9,7 +9,7 @@ app_file: app.py
pinned: false
license: mit
short_description: Talk to Gemini (Gradio UI)
tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GEMINI_API_KEY]
tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GEMINI_API_KEY]
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

View File

@@ -14,7 +14,7 @@ from fastapi.responses import HTMLResponse
from fastrtc import (
AsyncStreamHandler,
Stream,
get_twilio_turn_credentials,
get_cloudflare_turn_credentials_async,
wait_for_item,
)
from google import genai
@@ -117,7 +117,7 @@ stream = Stream(
modality="audio",
mode="send-receive",
handler=GeminiHandler(),
rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
rtc_configuration=get_cloudflare_turn_credentials_async if get_space() else None,
concurrency_limit=5 if get_space() else None,
time_limit=90 if get_space() else None,
additional_inputs=[
@@ -160,7 +160,7 @@ async def _(body: InputData):
@app.get("/")
async def index():
rtc_config = get_twilio_turn_credentials() if get_space() else None
rtc_config = await get_cloudflare_turn_credentials_async() if get_space() else None
html_content = (current_dir / "index.html").read_text()
html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
return HTMLResponse(content=html_content)

View File

@@ -98,6 +98,11 @@
font-weight: 600;
cursor: pointer;
transition: all 0.2s ease;
display: flex;
align-items: center;
justify-content: center;
gap: 12px;
min-width: 180px;
}
button:hover {
@@ -134,7 +139,6 @@
align-items: center;
justify-content: center;
gap: 12px;
min-width: 180px;
}
.pulse-circle {
@@ -171,6 +175,23 @@
background-color: #ffd700;
color: black;
}
/* Add styles for the mute toggle */
.mute-toggle {
width: 24px;
height: 24px;
cursor: pointer;
flex-shrink: 0;
}
.mute-toggle svg {
display: block;
}
#start-button {
margin-left: auto;
margin-right: auto;
}
</style>
</head>
@@ -221,6 +242,11 @@
let dataChannel;
let isRecording = false;
let webrtc_id;
let isMuted = false;
let analyser_input, dataArray_input;
let analyser, dataArray;
let source_input = null;
let source_output = null;
const startButton = document.getElementById('start-button');
const apiKeyInput = document.getElementById('api-key');
@@ -235,7 +261,28 @@
boxContainer.appendChild(box);
}
// SVG Icons
const micIconSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
</svg>`;
const micMutedIconSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
<line x1="1" y1="1" x2="23" y2="23"></line>
</svg>`;
function updateButtonState() {
startButton.innerHTML = '';
startButton.onclick = null;
if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
startButton.innerHTML = `
<div class="icon-with-spinner">
@@ -243,15 +290,28 @@
<span>Connecting...</span>
</div>
`;
startButton.disabled = true;
} else if (peerConnection && peerConnection.connectionState === 'connected') {
startButton.innerHTML = `
<div class="pulse-container">
<div class="pulse-circle"></div>
<span>Stop Recording</span>
</div>
const pulseContainer = document.createElement('div');
pulseContainer.className = 'pulse-container';
pulseContainer.innerHTML = `
<div class="pulse-circle"></div>
<span>Stop Recording</span>
`;
const muteToggle = document.createElement('div');
muteToggle.className = 'mute-toggle';
muteToggle.title = isMuted ? 'Unmute' : 'Mute';
muteToggle.innerHTML = isMuted ? micMutedIconSVG : micIconSVG;
muteToggle.addEventListener('click', toggleMute);
startButton.appendChild(pulseContainer);
startButton.appendChild(muteToggle);
startButton.disabled = false;
} else {
startButton.innerHTML = 'Start Recording';
startButton.disabled = false;
}
}
@@ -267,6 +327,23 @@
}, 5000);
}
function toggleMute(event) {
event.stopPropagation();
if (!peerConnection || peerConnection.connectionState !== 'connected') return;
isMuted = !isMuted;
console.log("Mute toggled:", isMuted);
peerConnection.getSenders().forEach(sender => {
if (sender.track && sender.track.kind === 'audio') {
sender.track.enabled = !isMuted;
console.log(`Audio track ${sender.track.id} enabled: ${!isMuted}`);
}
});
updateButtonState();
}
async function setupWebRTC() {
const config = __RTC_CONFIGURATION__;
peerConnection = new RTCPeerConnection(config);
@@ -288,58 +365,74 @@
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
stream.getTracks().forEach(track => peerConnection.addTrack(track, stream));
// Update audio visualization setup
audioContext = new AudioContext();
if (!audioContext || audioContext.state === 'closed') {
audioContext = new AudioContext();
}
if (source_input) {
try { source_input.disconnect(); } catch (e) { console.warn("Error disconnecting previous input source:", e); }
source_input = null;
}
source_input = audioContext.createMediaStreamSource(stream);
analyser_input = audioContext.createAnalyser();
const source = audioContext.createMediaStreamSource(stream);
source.connect(analyser_input);
source_input.connect(analyser_input);
analyser_input.fftSize = 64;
dataArray_input = new Uint8Array(analyser_input.frequencyBinCount);
function updateAudioLevel() {
analyser_input.getByteFrequencyData(dataArray_input);
const average = Array.from(dataArray_input).reduce((a, b) => a + b, 0) / dataArray_input.length;
const audioLevel = average / 255;
const pulseCircle = document.querySelector('.pulse-circle');
if (pulseCircle) {
console.log("audioLevel", audioLevel);
pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
}
animationId = requestAnimationFrame(updateAudioLevel);
}
updateAudioLevel();
// Add connection state change listener
peerConnection.addEventListener('connectionstatechange', () => {
console.log('connectionstatechange', peerConnection.connectionState);
if (peerConnection.connectionState === 'connected') {
clearTimeout(timeoutId);
const toast = document.getElementById('error-toast');
toast.style.display = 'none';
if (analyser_input) updateAudioLevel();
if (analyser) updateVisualization();
} else if (['disconnected', 'failed', 'closed'].includes(peerConnection.connectionState)) {
// Explicitly stop animations if connection drops unexpectedly
// Note: stopWebRTC() handles the normal stop case
}
updateButtonState();
});
// Handle incoming audio
peerConnection.addEventListener('track', (evt) => {
if (audioOutput && audioOutput.srcObject !== evt.streams[0]) {
audioOutput.srcObject = evt.streams[0];
audioOutput.play();
peerConnection.onicecandidate = ({ candidate }) => {
if (candidate) {
console.debug("Sending ICE candidate", candidate);
fetch('/webrtc/offer', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
candidate: candidate.toJSON(),
webrtc_id: webrtc_id,
type: "ice-candidate",
})
})
}
};
// Set up audio visualization on the output stream
audioContext = new AudioContext();
analyser = audioContext.createAnalyser();
const source = audioContext.createMediaStreamSource(evt.streams[0]);
source.connect(analyser);
analyser.fftSize = 2048;
dataArray = new Uint8Array(analyser.frequencyBinCount);
updateVisualization();
peerConnection.addEventListener('track', (evt) => {
if (evt.track.kind === 'audio' && audioOutput) {
if (audioOutput.srcObject !== evt.streams[0]) {
audioOutput.srcObject = evt.streams[0];
audioOutput.play().catch(e => console.error("Audio play failed:", e));
if (!audioContext || audioContext.state === 'closed') {
console.warn("AudioContext not ready for output track analysis.");
return;
}
if (source_output) {
try { source_output.disconnect(); } catch (e) { console.warn("Error disconnecting previous output source:", e); }
source_output = null;
}
source_output = audioContext.createMediaStreamSource(evt.streams[0]);
analyser = audioContext.createAnalyser();
source_output.connect(analyser);
analyser.fftSize = 2048;
dataArray = new Uint8Array(analyser.frequencyBinCount);
updateVisualization();
}
}
});
// Create data channel for messages
dataChannel = peerConnection.createDataChannel('text');
dataChannel.onmessage = (event) => {
const eventJson = JSON.parse(event.data);
@@ -360,24 +453,9 @@
}
};
// Create and send offer
const offer = await peerConnection.createOffer();
await peerConnection.setLocalDescription(offer);
await new Promise((resolve) => {
if (peerConnection.iceGatheringState === "complete") {
resolve();
} else {
const checkState = () => {
if (peerConnection.iceGatheringState === "complete") {
peerConnection.removeEventListener("icegatheringstatechange", checkState);
resolve();
}
};
peerConnection.addEventListener("icegatheringstatechange", checkState);
}
});
const response = await fetch('/webrtc/offer', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
@@ -394,7 +472,7 @@
showError(serverResponse.meta.error === 'concurrency_limit_reached'
? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
: serverResponse.meta.error);
stop();
stopWebRTC();
startButton.textContent = 'Start Recording';
return;
}
@@ -404,13 +482,17 @@
clearTimeout(timeoutId);
console.error('Error setting up WebRTC:', err);
showError('Failed to establish connection. Please try again.');
stop();
stopWebRTC();
startButton.textContent = 'Start Recording';
}
}
function updateVisualization() {
if (!analyser) return;
if (!analyser || !peerConnection || !['connected', 'connecting'].includes(peerConnection.connectionState)) {
const bars = document.querySelectorAll('.box');
bars.forEach(bar => bar.style.transform = 'scaleY(0.1)');
return;
}
analyser.getByteFrequencyData(dataArray);
const bars = document.querySelectorAll('.box');
@@ -420,32 +502,114 @@
bars[i].style.transform = `scaleY(${Math.max(0.1, barHeight)})`;
}
animationId = requestAnimationFrame(updateVisualization);
requestAnimationFrame(updateVisualization);
}
function updateAudioLevel() {
if (!analyser_input || !peerConnection || !['connected', 'connecting'].includes(peerConnection.connectionState)) {
const pulseCircle = document.querySelector('.pulse-circle');
if (pulseCircle) {
pulseCircle.style.setProperty('--audio-level', 1);
}
return;
}
analyser_input.getByteFrequencyData(dataArray_input);
const average = Array.from(dataArray_input).reduce((a, b) => a + b, 0) / dataArray_input.length;
const audioLevel = average / 255;
const pulseCircle = document.querySelector('.pulse-circle');
if (pulseCircle) {
pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
}
requestAnimationFrame(updateAudioLevel);
}
function stopWebRTC() {
console.log("Running stopWebRTC");
if (peerConnection) {
peerConnection.close();
peerConnection.getSenders().forEach(sender => {
if (sender.track) {
sender.track.stop();
}
});
peerConnection.ontrack = null;
peerConnection.onicegatheringstatechange = null;
peerConnection.onconnectionstatechange = null;
if (dataChannel) {
dataChannel.onmessage = null;
try { dataChannel.close(); } catch (e) { console.warn("Error closing data channel:", e); }
dataChannel = null;
}
try { peerConnection.close(); } catch (e) { console.warn("Error closing peer connection:", e); }
peerConnection = null;
}
if (animationId) {
cancelAnimationFrame(animationId);
if (audioOutput) {
audioOutput.pause();
audioOutput.srcObject = null;
}
if (audioContext) {
audioContext.close();
if (source_input) {
try { source_input.disconnect(); } catch (e) { console.warn("Error disconnecting input source:", e); }
source_input = null;
}
if (source_output) {
try { source_output.disconnect(); } catch (e) { console.warn("Error disconnecting output source:", e); }
source_output = null;
}
if (audioContext && audioContext.state !== 'closed') {
audioContext.close().then(() => {
console.log("AudioContext closed successfully.");
audioContext = null;
}).catch(e => {
console.error("Error closing AudioContext:", e);
audioContext = null;
});
} else {
audioContext = null;
}
analyser_input = null;
dataArray_input = null;
analyser = null;
dataArray = null;
isMuted = false;
isRecording = false;
updateButtonState();
const bars = document.querySelectorAll('.box');
bars.forEach(bar => bar.style.transform = 'scaleY(0.1)');
const pulseCircle = document.querySelector('.pulse-circle');
if (pulseCircle) {
pulseCircle.style.setProperty('--audio-level', 1);
}
}
startButton.addEventListener('click', () => {
if (!isRecording) {
setupWebRTC();
startButton.classList.add('recording');
} else {
stopWebRTC();
startButton.classList.remove('recording');
startButton.addEventListener('click', (event) => {
if (event.target.closest('.mute-toggle')) {
return;
}
if (peerConnection && peerConnection.connectionState === 'connected') {
console.log("Stop button clicked");
stopWebRTC();
} else if (!peerConnection || ['new', 'closed', 'failed', 'disconnected'].includes(peerConnection.connectionState)) {
console.log("Start button clicked");
if (!apiKeyInput.value) {
showError("Please enter your API Key.");
return;
}
setupWebRTC();
isRecording = true;
updateButtonState();
}
isRecording = !isRecording;
});
updateButtonState();
</script>
</body>

View File

@@ -1,4 +1,4 @@
fastrtc
fastrtc[vad]==0.0.20.rc2
python-dotenv
google-genai
twilio

View File

@@ -0,0 +1,15 @@
---
title: Talk to Llama 4
emoji: 🦙
colorFrom: purple
colorTo: red
sdk: gradio
sdk_version: 5.23.3
app_file: app.py
pinned: false
license: mit
short_description: Talk to Llama 4 using Groq + Cloudflare
tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GROQ_API_KEY]
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

120
demo/talk_to_llama4/app.py Normal file
View File

@@ -0,0 +1,120 @@
import json
import os
from pathlib import Path
import gradio as gr
import numpy as np
from dotenv import load_dotenv
from fastapi import FastAPI
from fastapi.responses import HTMLResponse, StreamingResponse
from fastrtc import (
AdditionalOutputs,
ReplyOnPause,
Stream,
audio_to_bytes,
get_cloudflare_turn_credentials_async,
get_current_context,
get_tts_model,
)
from groq import Groq
from numpy.typing import NDArray
curr_dir = Path(__file__).parent
load_dotenv()
tts_model = get_tts_model()
groq = Groq(api_key=os.getenv("GROQ_API_KEY"))
conversations: dict[str, list[dict[str, str]]] = {}
def response(user_audio: tuple[int, NDArray[np.int16]]):
context = get_current_context()
if context.webrtc_id not in conversations:
conversations[context.webrtc_id] = [
{
"role": "system",
"content": (
"You are a helpful assistant that can answer questions and help with tasks."
'Please return a short (that will be converted to audio using a text-to-speech model) response and long response to this question. They can be the same if appropriate. Please return in JSON format\n\n{"short":, "long"}\n\n'
),
}
]
messages = conversations[context.webrtc_id]
transcription = groq.audio.transcriptions.create(
file=("audio.wav", audio_to_bytes(user_audio)),
model="distil-whisper-large-v3-en",
response_format="verbose_json",
)
print(transcription.text)
messages.append({"role": "user", "content": transcription.text})
completion = groq.chat.completions.create( # type: ignore
model="meta-llama/llama-4-scout-17b-16e-instruct",
messages=messages, # type: ignore
temperature=1,
max_completion_tokens=1024,
top_p=1,
stream=False,
response_format={"type": "json_object"},
stop=None,
)
response = completion.choices[0].message.content
response = json.loads(response)
short_response = response["short"]
long_response = response["long"]
messages.append({"role": "assistant", "content": long_response})
conversations[context.webrtc_id] = messages
yield from tts_model.stream_tts_sync(short_response)
yield AdditionalOutputs(messages)
stream = Stream(
ReplyOnPause(response),
modality="audio",
mode="send-receive",
additional_outputs=[gr.Chatbot(type="messages")],
additional_outputs_handler=lambda old, new: new,
rtc_configuration=get_cloudflare_turn_credentials_async,
)
app = FastAPI()
stream.mount(app)
@app.get("/")
async def _():
rtc_config = await get_cloudflare_turn_credentials_async()
html_content = (curr_dir / "index.html").read_text()
html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
return HTMLResponse(content=html_content)
@app.get("/outputs")
async def _(webrtc_id: str):
async def output_stream():
async for output in stream.output_stream(webrtc_id):
state = output.args[0]
for msg in state[-2:]:
data = {
"message": msg,
}
yield f"event: output\ndata: {json.dumps(data)}\n\n"
return StreamingResponse(output_stream(), media_type="text/event-stream")
if __name__ == "__main__":
import os
if (mode := os.getenv("MODE")) == "UI":
stream.ui.launch(server_port=7860)
elif mode == "PHONE":
raise ValueError("Phone mode not supported")
else:
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)

View File

@@ -0,0 +1,839 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Talk to Llama 4</title>
<style>
:root {
--color-primary: #3b82f6;
--color-secondary: #f97316;
--color-background: #0f172a;
--color-surface: #1e293b;
--color-text: #f1f5f9;
--color-message-user: #334155;
--color-message-assistant: #1e40af;
--gradient-primary: linear-gradient(135deg, #3b82f6, #8b5cf6);
--gradient-secondary: linear-gradient(135deg, #f97316, #ec4899);
--boxSize: 8px;
--gutter: 4px;
}
* {
box-sizing: border-box;
margin: 0;
padding: 0;
}
body {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
background-color: var(--color-background);
color: var(--color-text);
min-height: 100vh;
display: flex;
flex-direction: column;
align-items: center;
padding: 2rem 1rem;
background-image:
radial-gradient(circle at 25% 25%, rgba(59, 130, 246, 0.1) 0%, transparent 50%),
radial-gradient(circle at 75% 75%, rgba(249, 115, 22, 0.1) 0%, transparent 50%);
}
.header-container {
display: flex;
align-items: center;
gap: 2rem;
margin-bottom: 2rem;
width: 100%;
max-width: 800px;
animation: fadeIn 1s ease-out;
}
.header {
text-align: left;
}
.header h1 {
font-size: 2.5rem;
margin-bottom: 0.5rem;
background: var(--gradient-primary);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-weight: 800;
}
.header h2 {
font-size: 1.2rem;
font-weight: 400;
color: rgba(241, 245, 249, 0.8);
margin-bottom: 1rem;
}
.logo {
width: 120px;
height: 120px;
background: var(--color-surface);
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
box-shadow: 0 15px 30px rgba(0, 0, 0, 0.3);
position: relative;
overflow: hidden;
animation: float 6s ease-in-out infinite;
flex-shrink: 0;
}
.logo::before {
content: "";
position: absolute;
width: 200%;
height: 200%;
background: var(--gradient-secondary);
opacity: 0.2;
animation: rotate 10s linear infinite;
}
.logo img {
width: 75%;
height: 75%;
object-fit: contain;
z-index: 2;
}
.container {
width: 100%;
max-width: 800px;
background-color: var(--color-surface);
border-radius: 1rem;
box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.25);
overflow: hidden;
animation: slideUp 0.5s ease-out;
}
.chat-container {
height: 400px;
overflow-y: auto;
padding: 1.5rem;
display: flex;
flex-direction: column;
gap: 1rem;
scroll-behavior: smooth;
}
.message {
max-width: 80%;
padding: 1rem;
border-radius: 1rem;
line-height: 1.5;
animation: fadeIn 0.3s ease-out;
}
.message.user {
background-color: var(--color-message-user);
color: var(--color-text);
align-self: flex-end;
border-bottom-right-radius: 0.25rem;
}
.message.assistant {
background-color: var(--color-message-assistant);
color: var(--color-text);
align-self: flex-start;
border-bottom-left-radius: 0.25rem;
}
.wave-visualizer {
height: 100px;
padding: 1rem;
background-color: rgba(30, 41, 59, 0.8);
display: flex;
align-items: center;
justify-content: center;
position: relative;
overflow: hidden;
border-top: 1px solid rgba(255, 255, 255, 0.1);
}
.box-container {
display: flex;
justify-content: space-between;
align-items: center;
width: 100%;
height: 64px;
padding: 0 1rem;
}
.box {
height: 100%;
width: var(--boxSize);
background: var(--gradient-primary);
border-radius: 4px;
transform: scaleY(0.1);
transition: transform 0.05s ease;
}
.controls {
display: flex;
justify-content: center;
align-items: center;
padding: 1.5rem;
gap: 1rem;
border-top: 1px solid rgba(255, 255, 255, 0.1);
}
#start-button {
display: flex;
align-items: center;
justify-content: center;
background: var(--gradient-primary);
color: white;
border: none;
border-radius: 9999px;
padding: 0.75rem 1.5rem;
font-size: 1rem;
font-weight: 600;
cursor: pointer;
transition: all 0.3s ease;
box-shadow: 0 4px 14px rgba(59, 130, 246, 0.4);
}
#start-button:hover {
transform: translateY(-2px);
box-shadow: 0 6px 20px rgba(59, 130, 246, 0.6);
}
#start-button:active {
transform: translateY(1px);
}
.icon-with-spinner {
display: flex;
align-items: center;
justify-content: center;
gap: 12px;
min-width: 180px;
}
.spinner {
width: 20px;
height: 20px;
border: 2px solid white;
border-top-color: transparent;
border-radius: 50%;
animation: spin 1s linear infinite;
flex-shrink: 0;
}
.pulse-container {
display: flex;
align-items: center;
justify-content: center;
gap: 12px;
}
.pulse-circle {
width: 20px;
height: 20px;
border-radius: 50%;
background: var(--color-secondary);
opacity: 0.85;
flex-shrink: 0;
transform: scale(var(--audio-level, 1));
transition: transform 0.1s ease;
}
.mute-toggle {
width: 24px;
height: 24px;
cursor: pointer;
margin-left: 12px;
flex-shrink: 0;
filter: drop-shadow(0 4px 6px rgba(0, 0, 0, 0.2));
}
.mute-toggle svg {
width: 100%;
height: 100%;
stroke: white;
}
.typing-indicator {
padding: 0.5rem 1rem;
display: inline-flex;
align-items: center;
background-color: var(--color-message-assistant);
border-radius: 1rem;
align-self: flex-start;
margin-bottom: 0.5rem;
display: none;
animation: fadeIn 0.3s ease-out;
}
.dots {
display: inline-flex;
gap: 4px;
}
.dot {
width: 8px;
height: 8px;
background-color: white;
border-radius: 50%;
animation: bounce 1.5s infinite;
opacity: 0.7;
}
.dot:nth-child(2) {
animation-delay: 0.15s;
}
.dot:nth-child(3) {
animation-delay: 0.3s;
}
.toast {
position: fixed;
top: 20px;
left: 50%;
transform: translateX(-50%);
padding: 1rem 1.5rem;
border-radius: 0.5rem;
font-size: 0.875rem;
z-index: 1000;
display: none;
box-shadow: 0 10px 25px rgba(0, 0, 0, 0.3);
animation: slideDown 0.3s ease-out;
}
.toast.error {
background-color: #ef4444;
color: white;
}
.toast.warning {
background-color: #f59e0b;
color: black;
}
#audio-output {
display: none;
}
@keyframes float {
0%,
100% {
transform: translateY(0);
}
50% {
transform: translateY(-10px);
}
}
@keyframes rotate {
0% {
transform: rotate(0deg);
}
100% {
transform: rotate(360deg);
}
}
@keyframes spin {
to {
transform: rotate(360deg);
}
}
@keyframes bounce {
0%,
100% {
transform: translateY(0);
}
50% {
transform: translateY(-4px);
}
}
@keyframes fadeIn {
from {
opacity: 0;
}
to {
opacity: 1;
}
}
@keyframes slideUp {
from {
opacity: 0;
transform: translateY(20px);
}
to {
opacity: 1;
transform: translateY(0);
}
}
@keyframes slideDown {
from {
opacity: 0;
transform: translate(-50%, -20px);
}
to {
opacity: 1;
transform: translate(-50%, 0);
}
}
</style>
</head>
<body>
<div id="error-toast" class="toast"></div>
<div class="header-container">
<div class="logo">
<img src="https://huggingface.co/datasets/freddyaboulton/bucket/resolve/main/Video%26Audio%20huggy.png"
alt="LLaMA Logo">
</div>
<div class="header">
<h1>Talk to LLaMA 4</h1>
<h2>Experience seamless real-time conversation thanks to Cloudflare and Hugging Face's FastRTC.</h2>
</div>
</div>
<div class="container">
<div class="chat-container" id="chat-messages">
<!-- Messages will appear here -->
</div>
<div class="typing-indicator" id="typing-indicator">
<div class="dots">
<div class="dot"></div>
<div class="dot"></div>
<div class="dot"></div>
</div>
</div>
<div class="wave-visualizer">
<div class="box-container" id="box-container">
<!-- Boxes will be dynamically added here -->
</div>
</div>
<div class="controls">
<button id="start-button">Start Conversation</button>
</div>
</div>
<audio id="audio-output"></audio>
<script>
let peerConnection;
let webrtc_id;
const startButton = document.getElementById('start-button');
const chatMessages = document.getElementById('chat-messages');
const boxContainer = document.getElementById('box-container');
const typingIndicator = document.getElementById('typing-indicator');
const audioOutput = document.getElementById('audio-output');
let audioLevel = 0;
let animationFrame_input, animationFrame_output;
let audioContext_input, audioContext_output;
let analyser_input, dataArray_input;
let analyser_output, dataArray_output;
let audioSource_input, audioSource_output;
let messages = [];
let eventSource;
let isMuted = false;
// Create wave visualizer boxes
const numBars = 32;
for (let i = 0; i < numBars; i++) {
const box = document.createElement('div');
box.className = 'box';
boxContainer.appendChild(box);
}
// SVG Icons
const micIconSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
</svg>`;
const micMutedIconSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
<line x1="1" y1="1" x2="23" y2="23"></line>
</svg>`;
function updateButtonState() {
const existingMuteButton = startButton.querySelector('.mute-toggle');
if (existingMuteButton) {
existingMuteButton.removeEventListener('click', toggleMute);
}
startButton.innerHTML = '';
if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
startButton.innerHTML = `
<div class="icon-with-spinner">
<div class="spinner"></div>
<span>Connecting...</span>
</div>
`;
startButton.disabled = true;
} else if (peerConnection && peerConnection.connectionState === 'connected') {
const pulseContainer = document.createElement('div');
pulseContainer.className = 'pulse-container';
pulseContainer.innerHTML = `
<div class="pulse-circle"></div>
<span>Stop Conversation</span>
`;
const muteToggle = document.createElement('div');
muteToggle.className = 'mute-toggle';
muteToggle.title = isMuted ? 'Unmute' : 'Mute';
muteToggle.innerHTML = isMuted ? micMutedIconSVG : micIconSVG;
muteToggle.addEventListener('click', toggleMute);
startButton.appendChild(pulseContainer);
startButton.appendChild(muteToggle);
startButton.disabled = false;
} else {
startButton.textContent = 'Start Conversation';
startButton.disabled = false;
}
}
function toggleMute(event) {
event.stopPropagation();
if (!peerConnection || peerConnection.connectionState !== 'connected') return;
isMuted = !isMuted;
console.log("Mute toggled:", isMuted);
peerConnection.getSenders().forEach(sender => {
if (sender.track && sender.track.kind === 'audio') {
sender.track.enabled = !isMuted;
console.log(`Audio track ${sender.track.id} enabled: ${!isMuted}`);
}
});
updateButtonState();
}
function setupAudioVisualization(stream) {
// Input audio context for pulse circle
audioContext_input = new (window.AudioContext || window.webkitAudioContext)();
analyser_input = audioContext_input.createAnalyser();
audioSource_input = audioContext_input.createMediaStreamSource(stream);
audioSource_input.connect(analyser_input);
analyser_input.fftSize = 64;
dataArray_input = new Uint8Array(analyser_input.frequencyBinCount);
function updateAudioLevel() {
// Update input audio visualization (pulse circle)
analyser_input.getByteFrequencyData(dataArray_input);
const average = Array.from(dataArray_input).reduce((a, b) => a + b, 0) / dataArray_input.length;
audioLevel = average / 255;
const pulseCircle = document.querySelector('.pulse-circle');
if (pulseCircle) {
pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
}
animationFrame_input = requestAnimationFrame(updateAudioLevel);
}
updateAudioLevel();
}
function setupOutputVisualization(stream) {
// Create separate audio context for output visualization
audioContext_output = new (window.AudioContext || window.webkitAudioContext)();
analyser_output = audioContext_output.createAnalyser();
audioSource_output = audioContext_output.createMediaStreamSource(stream);
audioSource_output.connect(analyser_output);
analyser_output.fftSize = 2048;
dataArray_output = new Uint8Array(analyser_output.frequencyBinCount);
function updateVisualization() {
// Update output audio visualization (wave bars)
analyser_output.getByteFrequencyData(dataArray_output);
const boxes = document.querySelectorAll('.box');
for (let i = 0; i < boxes.length; i++) {
const index = Math.floor(i * dataArray_output.length / boxes.length);
const value = dataArray_output[index] / 255;
boxes[i].style.transform = `scaleY(${Math.max(0.1, value * 1.5)})`;
}
animationFrame_output = requestAnimationFrame(updateVisualization);
}
updateVisualization();
}
// Reset wave visualization bars to minimum height
function resetVisualization() {
const boxes = document.querySelectorAll('.box');
boxes.forEach(box => box.style.transform = 'scaleY(0.1)');
}
function showError(message) {
const toast = document.getElementById('error-toast');
toast.textContent = message;
toast.className = 'toast error';
toast.style.display = 'block';
setTimeout(() => {
toast.style.display = 'none';
}, 5000);
}
function handleMessage(event) {
const eventJson = JSON.parse(event.data);
if (eventJson.type === "error") {
showError(eventJson.message);
} else if (eventJson.type === "send_input") {
fetch('/input_hook', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
webrtc_id: webrtc_id,
chatbot: messages,
state: messages
})
});
} else if (eventJson.type === "log") {
if (eventJson.data === "pause_detected") {
typingIndicator.style.display = 'block';
chatMessages.scrollTop = chatMessages.scrollHeight;
} else if (eventJson.data === "response_starting") {
typingIndicator.style.display = 'none';
}
}
}
async function setupWebRTC() {
const config = __RTC_CONFIGURATION__;
peerConnection = new RTCPeerConnection(config);
const timeoutId = setTimeout(() => {
const toast = document.getElementById('error-toast');
toast.textContent = "Connection is taking longer than usual. Are you on a VPN?";
toast.className = 'toast warning';
toast.style.display = 'block';
setTimeout(() => {
toast.style.display = 'none';
}, 5000);
}, 5000);
try {
const stream = await navigator.mediaDevices.getUserMedia({
audio: true
});
setupAudioVisualization(stream);
stream.getTracks().forEach(track => {
peerConnection.addTrack(track, stream);
});
// Add this listener to handle incoming audio track
peerConnection.addEventListener('track', (event) => {
if (event.track.kind === 'audio') {
console.log("Received audio track from server");
if (audioOutput) {
audioOutput.srcObject = event.streams[0];
audioOutput.play().catch(e => console.error("Audio play failed:", e));
}
// Set up visualization for output audio with separate context
setupOutputVisualization(event.streams[0]);
}
});
const dataChannel = peerConnection.createDataChannel('text');
dataChannel.onmessage = handleMessage;
const offer = await peerConnection.createOffer();
await peerConnection.setLocalDescription(offer);
peerConnection.onicecandidate = ({ candidate }) => {
if (candidate) {
console.debug("Sending ICE candidate", candidate);
fetch('/webrtc/offer', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
candidate: candidate.toJSON(),
webrtc_id: webrtc_id,
type: "ice-candidate",
})
})
}
};
peerConnection.addEventListener('connectionstatechange', () => {
console.log('connectionstatechange', peerConnection.connectionState);
if (peerConnection.connectionState === 'connected') {
clearTimeout(timeoutId);
const toast = document.getElementById('error-toast');
toast.style.display = 'none';
} else if (['closed', 'failed', 'disconnected'].includes(peerConnection.connectionState)) {
stop();
}
updateButtonState();
});
webrtc_id = Math.random().toString(36).substring(7);
const response = await fetch('/webrtc/offer', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
sdp: peerConnection.localDescription.sdp,
type: peerConnection.localDescription.type,
webrtc_id: webrtc_id
})
});
const serverResponse = await response.json();
if (serverResponse.status === 'failed') {
showError(serverResponse.meta.error === 'concurrency_limit_reached'
? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
: serverResponse.meta.error);
stop();
return;
}
await peerConnection.setRemoteDescription(serverResponse);
eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
eventSource.addEventListener("output", (event) => {
const eventJson = JSON.parse(event.data);
console.log(eventJson);
messages.push(eventJson.message);
addMessage(eventJson.message.role, eventJson.audio ?? eventJson.message.content);
})
} catch (err) {
clearTimeout(timeoutId);
console.error('Error setting up WebRTC:', err);
showError('Failed to establish connection. Please try again.');
stop();
}
}
function addMessage(role, content) {
const messageDiv = document.createElement('div');
messageDiv.classList.add('message', role);
messageDiv.textContent = content;
chatMessages.appendChild(messageDiv);
chatMessages.scrollTop = chatMessages.scrollHeight;
}
function stop() {
if (eventSource) {
eventSource.close();
eventSource = null;
}
if (animationFrame_input) {
cancelAnimationFrame(animationFrame_input);
animationFrame_input = null;
}
if (animationFrame_output) {
cancelAnimationFrame(animationFrame_output);
animationFrame_output = null;
}
if (audioContext_input) {
audioContext_input.close().catch(e => console.error("Error closing input AudioContext:", e));
audioContext_input = null;
analyser_input = null;
dataArray_input = null;
audioSource_input = null;
}
if (audioContext_output) {
audioContext_output.close().catch(e => console.error("Error closing output AudioContext:", e));
audioContext_output = null;
analyser_output = null;
dataArray_output = null;
audioSource_output = null;
}
if (audioOutput) {
audioOutput.pause();
audioOutput.srcObject = null;
}
// Reset visualization
resetVisualization();
if (peerConnection) {
if (peerConnection.getTransceivers) {
peerConnection.getTransceivers().forEach(transceiver => {
if (transceiver.stop) {
transceiver.stop();
}
});
}
peerConnection.onicecandidate = null;
peerConnection.ondatachannel = null;
peerConnection.onconnectionstatechange = null;
peerConnection.close();
peerConnection = null;
}
isMuted = false;
updateButtonState();
audioLevel = 0;
}
startButton.addEventListener('click', (event) => {
if (event.target.closest('.mute-toggle')) {
return;
}
if (peerConnection && peerConnection.connectionState === 'connected') {
console.log("Stop button clicked");
stop();
} else if (!peerConnection || ['new', 'closed', 'failed', 'disconnected'].includes(peerConnection.connectionState)) {
console.log("Start button clicked");
messages = [];
chatMessages.innerHTML = '';
setupWebRTC();
updateButtonState();
}
});
</script>
</body>
</html>

View File

@@ -0,0 +1,3 @@
fastrtc[vad, tts]==0.0.20.rc2
groq
python-dotenv

View File

@@ -9,7 +9,7 @@ app_file: app.py
pinned: false
license: mit
short_description: Talk to OpenAI using their multimodal API
tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|OPENAI_API_KEY]
tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|OPENAI_API_KEY]
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

View File

@@ -9,7 +9,7 @@ app_file: app.py
pinned: false
license: mit
short_description: Talk to OpenAI (Gradio UI)
tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|OPENAI_API_KEY]
tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|OPENAI_API_KEY]
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

View File

@@ -54,6 +54,9 @@ class OpenAIHandler(AsyncStreamHandler):
)
self.connection = conn
async for event in self.connection:
# Handle interruptions
if event.type == "input_audio_buffer.speech_started":
self.clear_queue()
if event.type == "response.audio_transcript.done":
await self.output_queue.put(AdditionalOutputs(event))
if event.type == "response.audio.delta":

View File

@@ -67,16 +67,21 @@
}
button {
display: inline-flex;
align-items: center;
justify-content: center;
gap: 10px;
padding: 12px 24px;
background-color: transparent;
color: #ffffff;
border: 1px solid #ffffff;
padding: 12px 24px;
font-family: inherit;
font-size: 16px;
cursor: pointer;
transition: all 0.3s;
text-transform: uppercase;
letter-spacing: 1px;
position: relative;
}
button:hover {
@@ -116,9 +121,7 @@
.pulse-container {
display: flex;
align-items: center;
justify-content: center;
gap: 12px;
min-width: 180px;
}
.pulse-circle {
@@ -128,10 +131,47 @@
background-color: #ffffff;
opacity: 0.2;
flex-shrink: 0;
transform: translateX(-0%) scale(var(--audio-level, 1));
transform: scale(var(--audio-level, 1));
transition: transform 0.1s ease;
}
/* Fix button layout */
button {
display: inline-flex;
align-items: center;
justify-content: center;
gap: 10px;
padding: 12px 24px;
background-color: transparent;
color: #ffffff;
border: 1px solid #ffffff;
font-family: inherit;
font-size: 16px;
cursor: pointer;
transition: all 0.3s;
text-transform: uppercase;
letter-spacing: 1px;
position: relative;
}
.mute-toggle {
width: 24px;
height: 24px;
cursor: pointer;
flex-shrink: 0;
}
.mute-toggle svg {
display: block;
width: 100%;
height: 100%;
}
#start-button {
margin-left: auto;
margin-right: auto;
}
/* Add styles for toast notifications */
.toast {
position: fixed;
@@ -177,6 +217,7 @@
<script>
let peerConnection;
let webrtc_id;
let isMuted = false;
const audioOutput = document.getElementById('audio-output');
const startButton = document.getElementById('start-button');
const chatMessages = document.getElementById('chat-messages');
@@ -185,27 +226,82 @@
let animationFrame;
let audioContext, analyser, audioSource;
// SVG Icons
const micIconSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
</svg>`;
const micMutedIconSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
<line x1="1" y1="1" x2="23" y2="23"></line>
</svg>`;
function updateButtonState() {
const button = document.getElementById('start-button');
// Clear previous content
button.innerHTML = '';
if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
button.innerHTML = `
<div class="icon-with-spinner">
<div class="spinner"></div>
<span>Connecting...</span>
</div>
`;
const spinner = document.createElement('div');
spinner.className = 'spinner';
const text = document.createElement('span');
text.textContent = 'Connecting...';
button.appendChild(spinner);
button.appendChild(text);
} else if (peerConnection && peerConnection.connectionState === 'connected') {
button.innerHTML = `
<div class="pulse-container">
<div class="pulse-circle"></div>
<span>Stop Conversation</span>
</div>
`;
// Create pulse circle
const pulseCircle = document.createElement('div');
pulseCircle.className = 'pulse-circle';
// Create mic icon
const micIcon = document.createElement('div');
micIcon.className = 'mute-toggle';
micIcon.innerHTML = isMuted ? micMutedIconSVG : micIconSVG;
micIcon.addEventListener('click', toggleMute);
// Create text
const text = document.createElement('span');
text.textContent = 'Stop Conversation';
// Add elements in correct order
button.appendChild(pulseCircle);
button.appendChild(micIcon);
button.appendChild(text);
} else {
button.innerHTML = 'Start Conversation';
const text = document.createElement('span');
text.textContent = 'Start Conversation';
button.appendChild(text);
}
}
function toggleMute(event) {
event.stopPropagation();
if (!peerConnection || peerConnection.connectionState !== 'connected') return;
isMuted = !isMuted;
console.log("Mute toggled:", isMuted);
peerConnection.getSenders().forEach(sender => {
if (sender.track && sender.track.kind === 'audio') {
sender.track.enabled = !isMuted;
console.log(`Audio track ${sender.track.id} enabled: ${!isMuted}`);
}
});
updateButtonState();
}
function setupAudioVisualization(stream) {
audioContext = new (window.AudioContext || window.webkitAudioContext)();
analyser = audioContext.createAnalyser();
@@ -276,6 +372,21 @@
}
});
peerConnection.onicecandidate = ({ candidate }) => {
if (candidate) {
console.debug("Sending ICE candidate", candidate);
fetch('/webrtc/offer', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
candidate: candidate.toJSON(),
webrtc_id: webrtc_id,
type: "ice-candidate",
})
})
}
};
const dataChannel = peerConnection.createDataChannel('text');
dataChannel.onmessage = (event) => {
const eventJson = JSON.parse(event.data);
@@ -287,20 +398,6 @@
const offer = await peerConnection.createOffer();
await peerConnection.setLocalDescription(offer);
await new Promise((resolve) => {
if (peerConnection.iceGatheringState === "complete") {
resolve();
} else {
const checkState = () => {
if (peerConnection.iceGatheringState === "complete") {
peerConnection.removeEventListener("icegatheringstatechange", checkState);
resolve();
}
};
peerConnection.addEventListener("icegatheringstatechange", checkState);
}
});
peerConnection.addEventListener('connectionstatechange', () => {
console.log('connectionstatechange', peerConnection.connectionState);
if (peerConnection.connectionState === 'connected') {
@@ -388,7 +485,12 @@
audioLevel = 0;
}
startButton.addEventListener('click', () => {
startButton.addEventListener('click', (event) => {
// Skip if clicking the mute toggle
if (event.target.closest('.mute-toggle')) {
return;
}
console.log('clicked');
console.log(peerConnection, peerConnection?.connectionState);
if (!peerConnection || peerConnection.connectionState !== 'connected') {

View File

@@ -1,4 +1,4 @@
fastrtc[vad]
fastrtc[vad]==0.0.20.rc2
openai
twilio
python-dotenv

View File

@@ -9,7 +9,7 @@ app_file: app.py
pinned: false
license: mit
short_description: Llama 3.2 - SambaNova API
tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|SAMBANOVA_API_KEY]
tags: [webrtc, websocket, gradio, secret|HF_TOKEN_ALT, secret|SAMBANOVA_API_KEY]
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

View File

@@ -9,7 +9,7 @@ app_file: app.py
pinned: false
license: mit
short_description: Llama 3.2 - SambaNova API (Gradio)
tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|SAMBANOVA_API_KEY]
tags: [webrtc, websocket, gradio, secret|HF_TOKEN_ALT, secret|SAMBANOVA_API_KEY]
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

View File

@@ -13,8 +13,8 @@ from fastrtc import (
AdditionalOutputs,
ReplyOnPause,
Stream,
get_cloudflare_turn_credentials_async,
get_stt_model,
get_twilio_turn_credentials,
)
from gradio.utils import get_space
from pydantic import BaseModel
@@ -75,7 +75,7 @@ stream = Stream(
additional_outputs=[chatbot, state],
additional_outputs_handler=lambda *a: (a[2], a[3]),
concurrency_limit=20 if get_space() else None,
rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
rtc_configuration=get_cloudflare_turn_credentials_async,
)
app = FastAPI()
@@ -95,7 +95,9 @@ class InputData(BaseModel):
@app.get("/")
async def _():
rtc_config = get_twilio_turn_credentials() if get_space() else None
rtc_config = await get_cloudflare_turn_credentials_async(
hf_token=os.getenv("HF_TOKEN_ALT")
)
html_content = (curr_dir / "index.html").read_text()
html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
return HTMLResponse(content=html_content)

View File

@@ -72,13 +72,17 @@
background-color: #0066cc;
color: white;
border: none;
padding: 12px 24px;
padding: 12px 18px;
font-family: inherit;
font-size: 14px;
cursor: pointer;
transition: all 0.3s;
border-radius: 4px;
font-weight: 500;
display: inline-flex;
align-items: center;
justify-content: center;
gap: 8px;
}
button:hover {
@@ -94,7 +98,6 @@
align-items: center;
justify-content: center;
gap: 12px;
min-width: 180px;
}
.spinner {
@@ -118,7 +121,6 @@
align-items: center;
justify-content: center;
gap: 12px;
min-width: 180px;
}
.pulse-circle {
@@ -200,6 +202,23 @@
background-color: #ffd700;
color: black;
}
/* Styles for the mute toggle icon */
.mute-toggle {
width: 20px;
height: 20px;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
flex-shrink: 0;
}
.mute-toggle svg {
width: 100%;
height: 100%;
stroke: white;
}
</style>
</head>
@@ -239,28 +258,82 @@
let audioContext, analyser, audioSource;
let messages = [];
let eventSource;
let isMuted = false;
// SVG Icons
const micIconSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
</svg>`;
const micMutedIconSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
<line x1="1" y1="1" x2="23" y2="23"></line>
</svg>`;
function updateButtonState() {
const button = document.getElementById('start-button');
const existingMuteButton = startButton.querySelector('.mute-toggle');
if (existingMuteButton) {
existingMuteButton.removeEventListener('click', toggleMute);
}
startButton.innerHTML = '';
if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
button.innerHTML = `
startButton.innerHTML = `
<div class="icon-with-spinner">
<div class="spinner"></div>
<span>Connecting...</span>
</div>
`;
startButton.disabled = true;
} else if (peerConnection && peerConnection.connectionState === 'connected') {
button.innerHTML = `
<div class="pulse-container">
<div class="pulse-circle"></div>
<span>Stop Conversation</span>
</div>
const pulseContainer = document.createElement('div');
pulseContainer.className = 'pulse-container';
pulseContainer.innerHTML = `
<div class="pulse-circle"></div>
<span>Stop Conversation</span>
`;
const muteToggle = document.createElement('div');
muteToggle.className = 'mute-toggle';
muteToggle.title = isMuted ? 'Unmute' : 'Mute';
muteToggle.innerHTML = isMuted ? micMutedIconSVG : micIconSVG;
muteToggle.addEventListener('click', toggleMute);
startButton.appendChild(pulseContainer);
startButton.appendChild(muteToggle);
startButton.disabled = false;
} else {
button.innerHTML = 'Start Conversation';
startButton.textContent = 'Start Conversation';
startButton.disabled = false;
}
}
function toggleMute(event) {
event.stopPropagation();
if (!peerConnection || peerConnection.connectionState !== 'connected') return;
isMuted = !isMuted;
console.log("Mute toggled:", isMuted);
peerConnection.getSenders().forEach(sender => {
if (sender.track && sender.track.kind === 'audio') {
sender.track.enabled = !isMuted;
console.log(`Audio track ${sender.track.id} enabled: ${!isMuted}`);
}
});
updateButtonState();
}
function setupAudioVisualization(stream) {
audioContext = new (window.AudioContext || window.webkitAudioContext)();
analyser = audioContext.createAnalyser();
@@ -378,6 +451,8 @@
clearTimeout(timeoutId);
const toast = document.getElementById('error-toast');
toast.style.display = 'none';
} else if (['closed', 'failed', 'disconnected'].includes(peerConnection.connectionState)) {
stop();
}
updateButtonState();
});
@@ -448,9 +523,10 @@
if (animationFrame) {
cancelAnimationFrame(animationFrame);
animationFrame = null;
}
if (audioContext) {
audioContext.close();
audioContext.close().catch(e => console.error("Error closing AudioContext:", e));
audioContext = null;
analyser = null;
audioSource = null;
@@ -464,22 +540,33 @@
});
}
if (peerConnection.getSenders) {
peerConnection.getSenders().forEach(sender => {
if (sender.track && sender.track.stop) sender.track.stop();
});
}
peerConnection.onicecandidate = null;
peerConnection.ondatachannel = null;
peerConnection.onconnectionstatechange = null;
peerConnection.close();
peerConnection = null;
console.log("Peer connection closed.");
}
isMuted = false;
updateButtonState();
audioLevel = 0;
}
startButton.addEventListener('click', () => {
if (!peerConnection || peerConnection.connectionState !== 'connected') {
setupWebRTC();
} else {
startButton.addEventListener('click', (event) => {
if (event.target.closest('.mute-toggle')) {
return;
}
if (peerConnection && peerConnection.connectionState === 'connected') {
console.log("Stop button clicked");
stop();
} else if (!peerConnection || ['new', 'closed', 'failed', 'disconnected'].includes(peerConnection.connectionState)) {
console.log("Start button clicked");
messages = [];
chatMessages.innerHTML = '';
setupWebRTC();
updateButtonState();
}
});
</script>

View File

@@ -1,4 +1,4 @@
fastrtc[vad, stt]
fastrtc[vad, stt]==0.0.20.rc2
python-dotenv
huggingface_hub>=0.29.0
twilio

View File

@@ -9,7 +9,7 @@ app_file: app.py
pinned: false
license: mit
short_description: Transcribe audio in realtime with Whisper
tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GROQ_API_KEY]
tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GROQ_API_KEY]
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

View File

@@ -12,8 +12,7 @@ tags:
- webrtc
- websocket
- gradio
- secret|TWILIO_ACCOUNT_SID
- secret|TWILIO_AUTH_TOKEN
- secret|HF_TOKEN
- secret|GROQ_API_KEY
title: Whisper Realtime Transcription (Gradio UI)
---

View File

@@ -9,14 +9,21 @@
:root {
--primary-gradient: linear-gradient(135deg, #f9a45c 0%, #e66465 100%);
--background-cream: #faf8f5;
--background-cream-end: #f7f5f2;
/* Slightly warmer end color for body gradient */
--text-dark: #2d2d2d;
--transcript-bg: #ffffff;
/* White background for transcript area */
--transcript-border: #e0e0e0;
/* Light border for transcript items */
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
margin: 0;
padding: 0;
background-color: var(--background-cream);
/* Apply a subtle vertical gradient to the body */
background: linear-gradient(to bottom, var(--background-cream), var(--background-cream-end));
color: var(--text-dark);
min-height: 100vh;
}
@@ -43,18 +50,26 @@
.container {
max-width: 1000px;
margin: 1.5rem auto;
margin: 2.5rem auto;
/* Increased top/bottom margin */
padding: 0 2rem;
}
.transcript-container {
border-radius: 8px;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.06);
border-radius: 12px;
/* Slightly larger radius */
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.08);
/* Enhanced shadow */
padding: 1.5rem;
height: 300px;
height: 350px;
/* Increased height */
overflow-y: auto;
margin-bottom: 1.5rem;
border: 1px solid rgba(0, 0, 0, 0.1);
margin-bottom: 2rem;
/* Increased margin */
border: 1px solid rgba(0, 0, 0, 0.05);
/* Softer border */
background-color: var(--transcript-bg);
/* Use the new variable */
}
.controls {
@@ -73,6 +88,8 @@
transition: all 0.2s ease;
font-weight: 500;
min-width: 180px;
position: relative;
padding-right: 50px;
}
button:hover {
@@ -86,22 +103,39 @@
/* Transcript text styling */
.transcript-container p {
margin: 0.4rem 0;
padding: 0.6rem;
margin: 0.6rem 0;
/* Increased vertical margin */
padding: 0.8rem 1rem;
/* Increased padding */
background: var(--background-cream);
border-radius: 4px;
line-height: 1.4;
font-size: 0.95rem;
/* Use the lighter cream for contrast */
border-radius: 6px;
/* Slightly larger radius */
line-height: 1.5;
/* Improved line spacing */
font-size: 0.98rem;
/* Slightly larger font */
border-left: 3px solid var(--transcript-border);
/* Add a subtle left border */
transition: background-color 0.2s ease;
/* Smooth hover effect */
}
/* Custom scrollbar - made thinner */
.transcript-container p:hover {
background-color: #fdfbf9;
/* Slightly change background on hover */
}
/* Custom scrollbar - update track color */
.transcript-container::-webkit-scrollbar {
width: 6px;
width: 8px;
/* Slightly wider scrollbar */
}
.transcript-container::-webkit-scrollbar-track {
background: var(--background-cream);
border-radius: 3px;
background: var(--background-cream-end);
/* Match body end gradient */
border-radius: 4px;
}
.transcript-container::-webkit-scrollbar-thumb {
@@ -176,6 +210,40 @@
transition: transform 0.1s ease;
}
/* Styles for the mute button */
.mute-toggle {
position: absolute;
right: 10px;
top: 50%;
transform: translateY(-50%);
width: 24px;
height: 24px;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
}
.mute-toggle svg {
width: 20px;
height: 20px;
stroke: white;
}
/* Adjust layout for button content when mute is present */
.button-content {
display: flex;
align-items: center;
justify-content: center;
width: calc(100% - 40px);
margin-right: 40px;
}
.icon-with-spinner,
.pulse-container {
width: 100%;
}
@keyframes spin {
to {
transform: rotate(360deg);
@@ -206,10 +274,29 @@
let audioContext, analyser, audioSource;
let audioLevel = 0;
let animationFrame;
let isMuted = false;
const startButton = document.getElementById('start-button');
const transcriptDiv = document.getElementById('transcript');
// SVG Icons
const micIconSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
</svg>`;
const micMutedIconSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
<line x1="1" y1="1" x2="23" y2="23"></line>
</svg>`;
function showError(message) {
const toast = document.getElementById('error-toast');
toast.textContent = message;
@@ -241,25 +328,63 @@
}
function updateButtonState() {
// Remove existing mute listener if present
const existingMuteButton = startButton.querySelector('.mute-toggle');
if (existingMuteButton) {
existingMuteButton.removeEventListener('click', toggleMute);
existingMuteButton.remove();
}
if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
startButton.innerHTML = `
<div class="icon-with-spinner">
<div class="spinner"></div>
<span>Connecting...</span>
<div class="button-content">
<div class="icon-with-spinner">
<div class="spinner"></div>
<span>Connecting...</span>
</div>
</div>
`;
startButton.disabled = true;
} else if (peerConnection && peerConnection.connectionState === 'connected') {
startButton.innerHTML = `
<div class="pulse-container">
<div class="pulse-circle"></div>
<span>Stop Recording</span>
<div class="button-content">
<div class="pulse-container">
<div class="pulse-circle"></div>
<span>Stop Recording</span>
</div>
</div>
<div class="mute-toggle" title="${isMuted ? 'Unmute' : 'Mute'}">
${isMuted ? micMutedIconSVG : micIconSVG}
</div>
`;
startButton.disabled = false;
const muteButton = startButton.querySelector('.mute-toggle');
if (muteButton) {
muteButton.addEventListener('click', toggleMute);
}
} else {
startButton.innerHTML = 'Start Recording';
startButton.disabled = false;
}
}
function toggleMute(event) {
event.stopPropagation();
if (!peerConnection || peerConnection.connectionState !== 'connected') return;
isMuted = !isMuted;
console.log("Mute toggled:", isMuted);
peerConnection.getSenders().forEach(sender => {
if (sender.track && sender.track.kind === 'audio') {
sender.track.enabled = !isMuted;
console.log(`Audio track ${sender.track.id} enabled: ${!isMuted}`);
}
});
updateButtonState();
}
function setupAudioVisualization(stream) {
audioContext = new (window.AudioContext || window.webkitAudioContext)();
analyser = audioContext.createAnalyser();
@@ -321,6 +446,21 @@
updateButtonState();
});
peerConnection.onicecandidate = ({ candidate }) => {
if (candidate) {
console.debug("Sending ICE candidate", candidate);
fetch('/webrtc/offer', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
candidate: candidate.toJSON(),
webrtc_id: webrtc_id,
type: "ice-candidate",
})
})
}
};
// Create data channel for messages
const dataChannel = peerConnection.createDataChannel('text');
dataChannel.onmessage = handleMessage;
@@ -329,20 +469,6 @@
const offer = await peerConnection.createOffer();
await peerConnection.setLocalDescription(offer);
await new Promise((resolve) => {
if (peerConnection.iceGatheringState === "complete") {
resolve();
} else {
const checkState = () => {
if (peerConnection.iceGatheringState === "complete") {
peerConnection.removeEventListener("icegatheringstatechange", checkState);
resolve();
}
};
peerConnection.addEventListener("icegatheringstatechange", checkState);
}
});
webrtc_id = Math.random().toString(36).substring(7);
const response = await fetch('/webrtc/offer', {
@@ -392,41 +518,45 @@
function stop() {
if (animationFrame) {
cancelAnimationFrame(animationFrame);
animationFrame = null;
}
if (audioContext) {
audioContext.close();
audioContext.close().catch(e => console.error("Error closing AudioContext:", e));
audioContext = null;
analyser = null;
audioSource = null;
}
if (peerConnection) {
if (peerConnection.getTransceivers) {
peerConnection.getTransceivers().forEach(transceiver => {
if (transceiver.stop) {
transceiver.stop();
if (peerConnection.getSenders) {
peerConnection.getSenders().forEach(sender => {
if (sender.track) {
sender.track.stop();
console.log(`Track ${sender.track.id} stopped.`);
}
});
}
if (peerConnection.getSenders) {
peerConnection.getSenders().forEach(sender => {
if (sender.track && sender.track.stop) sender.track.stop();
});
}
setTimeout(() => {
peerConnection.close();
}, 500);
peerConnection.close();
peerConnection = null;
console.log("Peer connection closed.");
}
audioLevel = 0;
isMuted = false;
updateButtonState();
}
startButton.addEventListener('click', () => {
if (startButton.textContent === 'Start Recording') {
setupWebRTC();
} else {
startButton.addEventListener('click', (event) => {
if (event.target.closest('.mute-toggle')) {
return;
}
if (peerConnection && peerConnection.connectionState === 'connected') {
console.log("Stop button clicked");
stop();
} else if (!peerConnection || ['new', 'closed', 'failed', 'disconnected'].includes(peerConnection.connectionState)) {
console.log("Start button clicked");
transcriptDiv.innerHTML = '';
setupWebRTC();
updateButtonState();
}
});
</script>

View File

@@ -1,4 +1,3 @@
fastrtc[vad]
fastrtc[vad]==0.0.20.rc2
groq
python-dotenv
twilio
python-dotenv

View File

@@ -3,27 +3,75 @@ When deploying in cloud environments with firewalls (like Hugging Face Spaces, R
!!! tip
The `rtc_configuration` parameter of the `Stream` class also be passed to the [`WebRTC`](../userguide/gradio) component directly if you're building a standalone gradio app.
## Community Server
Hugging Face graciously provides a TURN server for the community.
## Cloudflare Calls API
Cloudflare also offers a managed TURN server with [Cloudflare Calls](https://www.cloudflare.com/en-au/developer-platform/products/cloudflare-calls/).
### With a Hugging Face Token
Cloudflare and Hugging Face have partnered to allow you to stream 10gb of WebRTC traffic per month for free with a Hugging Face account!
```python
from fastrtc import Stream, get_cloudflare_turn_credentials_async
# Make sure the HF_TOKEN environment variable is set
# Or pass in a callable with all arguments set
# make sure you don't commit your token to git!
TOKEN = "hf_..."
async def get_credentials():
return await get_cloudflare_turn_credentials_async(hf_token=TOKEN)
stream = Stream(
handler=...,
rtc_configuration=get_credentials,
modality="audio",
mode="send-receive",
)
```
### With a Cloudflare API Token
Once you have exhausted your monthly quota, you can create a **free** Cloudflare account.
Create an [account](https://developers.cloudflare.com/fundamentals/setup/account/create-account/) and head to the [Calls section in your dashboard](https://dash.cloudflare.com/?to=/:account/calls).
Choose `Create -> TURN App`, give it a name (like `fastrtc-demo`), and then hit the Create button.
Take note of the Turn Token ID (often exported as `TURN_KEY_ID`) and API Token (exported as `TURN_KEY_API_TOKEN`).
You can then connect from the WebRTC component like so:
```python
from fastrtc import Stream, get_cloudflare_turn_credentials_async
# Make sure the TURN_KEY_ID and TURN_KEY_API_TOKEN environment variables are set
stream = Stream(
handler=...,
rtc_configuration=get_cloudflare_turn_credentials_async,
modality="audio",
mode="send-receive",
)
```
## Community Server (Deprecated)
Hugging Face graciously provides 10gb of TURN traffic through Cloudflare's global network.
In order to use it, you need to first create a Hugging Face account by going to [huggingface.co](https://huggingface.co/).
Then navigate to this [space](https://huggingface.co/spaces/fastrtc/turn-server-login) and follow the instructions on the page. You just have to click the "Log in" button and then the "Sign Up" button.
![turn_login](https://github.com/user-attachments/assets/cefa8dec-487e-47d8-bb96-1a14a701f6e5)
Then you can create an [access token](https://huggingface.co/docs/hub/en/security-tokens).
Then you can use the `get_hf_turn_credentials` helper to get your credentials:
```python
from fastrtc import get_hf_turn_credentials, Stream
# Pass a valid access token for your Hugging Face account
# or set the HF_TOKEN environment variable
credentials = get_hf_turn_credentials(token=None)
# Make sure the HF_TOKEN environment variable is set
Stream(
handler=...,
rtc_configuration=credentials,
rtc_configuration=get_hf_turn_credentials,
modality="audio",
mode="send-receive"
)
@@ -31,8 +79,7 @@ Stream(
!!! warning
This is a shared resource so we make no latency/availability guarantees.
For more robust options, see the Twilio, Cloudflare and self-hosting options below.
This function is now deprecated. Please use `get_cloudflare_turn_credentials` instead.
## Twilio API
@@ -78,50 +125,6 @@ Stream(
rtc_configuration = get_twilio_turn_credentials()
```
## Cloudflare Calls API
Cloudflare also offers a managed TURN server with [Cloudflare Calls](https://www.cloudflare.com/en-au/developer-platform/products/cloudflare-calls/).
Create a **free** [account](https://developers.cloudflare.com/fundamentals/setup/account/create-account/) and head to the [Calls section in your dashboard](https://dash.cloudflare.com/?to=/:account/calls).
Choose `Create -> TURN App`, give it a name (like `fastrtc-demo`), and then hit the Create button.
Take note of the Turn Token ID (often exported as `TURN_KEY_ID`) and API Token (exported as `TURN_KEY_API_TOKEN`).
You can then connect from the WebRTC component like so:
```python
from fastrtc import Stream
import requests
import os
turn_key_id = os.environ.get("TURN_KEY_ID")
turn_key_api_token = os.environ.get("TURN_KEY_API_TOKEN")
ttl = 86400 # Can modify TTL, here it's set to 24 hours
response = requests.post(
f"https://rtc.live.cloudflare.com/v1/turn/keys/{turn_key_id}/credentials/generate-ice-servers",
headers={
"Authorization": f"Bearer {turn_key_api_token}",
"Content-Type": "application/json",
},
json={"ttl": ttl},
)
if response.ok:
rtc_configuration = response.json()
else:
raise Exception(
f"Failed to get TURN credentials: {response.status_code} {response.text}"
)
stream = Stream(
handler=...,
rtc_configuration=rtc_configuration,
modality="audio",
mode="send-receive",
)
```
## Self Hosting
We have developed a script that can automatically deploy a TURN server to Amazon Web Services (AWS). You can follow the instructions [here](https://github.com/freddyaboulton/turn-server-deploy) or this guide.

View File

@@ -0,0 +1,267 @@
# TURN Credential Utils
## `get_turn_credentials_async`
```python
async def get_turn_credentials_async(
method: Literal["hf", "twilio", "cloudflare"] = "cloudflare",
**kwargs
):
```
Retrieves TURN credentials from the specified provider.
This can be passed directly to the Stream class and it will be called for each
unique WebRTC connection via the Gradio UI. When mounting to FastAPI, call this function
yourself to return the credentials to the frontend client, for example, in the
index route, you can call this function and embed the credentials in the source code of the index.html.
See the FastRTC spaces at hf.co/fastrtc for an example.
Acts as a dispatcher function to call the appropriate credential retrieval
function based on the method specified.
Args:
```
method: Literal["hf", "twilio", "cloudflare"] | None
The provider to use. 'hf' uses the deprecated Hugging Face endpoint.
'cloudflare' uses either Cloudflare keys or the HF endpoint.
'twilio' uses the Twilio API. Defaults to "cloudflare".
**kwargs:
Additional keyword arguments passed directly to the underlying
provider-specific function (e.g., `token`, `ttl` for 'hf';
`twilio_sid`, `twilio_token` for 'twilio'; `turn_key_id`,
`turn_key_api_token`, `hf_token`, `ttl` for 'cloudflare').
```
Returns:
```
dict:
A dictionary containing the TURN credentials from the chosen provider.
```
Raises:
```
ValueError:
If an invalid method is specified.
Also raises exceptions from the underlying provider functions (see their
docstrings).
```
Example
```python
>>> from fastrtc import get_turn_credentials_async, Stream
>>> credentials = await get_turn_credentials_async()
>>> print(credentials)
>>> # Can pass directly to stream class
>>> stream = Stream(..., rtc_configuration=get_turn_credentials_async)
```
## `get_turn_credentials`
```python
def get_turn_credentials(
method: Literal["hf", "twilio", "cloudflare"] = "cloudflare",
**kwargs
):
```
Retrieves TURN credentials from the specified provider.
This can be passed directly to the Stream class and it will be called for each
unique WebRTC connection via the Gradio UI. When mounting to FastAPI, call this function
yourself to return the credentials to the frontend client, for example, in the
index route, you can call this function and embed the credentials in the source code of the index.html.
See the FastRTC spaces at hf.co/fastrtc for an example.
Acts as a dispatcher function to call the appropriate credential retrieval
function based on the method specified.
Args:
```
method: Literal["hf", "twilio", "cloudflare"] | None
The provider to use. 'hf' uses the deprecated Hugging Face endpoint.
'cloudflare' uses either Cloudflare keys or the HF endpoint.
'twilio' uses the Twilio API. Defaults to "cloudflare".
**kwargs:
Additional keyword arguments passed directly to the underlying
provider-specific function (e.g., `token`, `ttl` for 'hf';
`twilio_sid`, `twilio_token` for 'twilio'; `turn_key_id`,
`turn_key_api_token`, `hf_token`, `ttl` for 'cloudflare').
```
Returns:
```
dict:
A dictionary containing the TURN credentials from the chosen provider.
```
Raises:
```
ValueError:
If an invalid method is specified.
Also raises exceptions from the underlying provider functions (see their
docstrings).
```
Example
```python
>>> from fastrtc import get_turn_credentials, Stream
>>> credentials = get_turn_credentials()
>>> print(credentials)
>>> # Can pass directly to stream class
>>> stream = Stream(..., rtc_configuration=get_turn_credentials_async)
```
## `get_cloudflare_turn_credentials_async`
```python
async def get_cloudflare_turn_credentials_async(
turn_key_id=None,
turn_key_api_token=None,
hf_token=None,
ttl=600,
client: httpx.AsyncClient | None = None,
):
```
Asynchronously retrieves TURN credentials from Cloudflare or Hugging Face.
Asynchronously fetches TURN server credentials either directly from Cloudflare
using API keys or via the Hugging Face TURN endpoint using an HF token. The HF
token method takes precedence if provided.
Args:
```
turn_key_id (str, optional):
Cloudflare TURN key ID. Defaults to None,
in which case the CLOUDFLARE_TURN_KEY_ID environment variable is used.
turn_key_api_token (str, optional):
Cloudflare TURN key API token.
Defaults to None, in which case the CLOUDFLARE_TURN_KEY_API_TOKEN
environment variable is used.
hf_token (str, optional):
Hugging Face API token. If provided, this method
is used instead of Cloudflare keys.
Defaults to None, in which case the HF_TOKEN environment variable is used.
ttl (int, optional): Time-to-live for the credentials in seconds.
Defaults to 600.
client (httpx.AsyncClient | None, optional): An existing httpx async client
to use for the request. If None, a new client is created per request.
Defaults to None.
```
Returns:
```
dict: A dictionary containing the TURN credentials (ICE servers).
```
Raises:
```
ValueError: If neither HF token nor Cloudflare keys (either as arguments
or environment variables) are provided.
Exception: If the request to the credential server fails.
```
Example
```python
>>> from fastrtc import get_cloudflare_turn_crendials_async, Stream
>>> credentials = await get_cloudflare_turn_credentials_async()
>>> print(credentials)
>>> # Can pass directly to stream class
>>> stream = Stream(..., rtc_configuration=get_turn_credentials_async)
```
## `get_cloudflare_turn_credentials`
```python
def get_cloudflare_turn_credentials(
turn_key_id=None,
turn_key_api_token=None,
hf_token=None,
ttl=600,
client: httpx.AsyncClient | None = None,
):
```
Retrieves TURN credentials from Cloudflare or Hugging Face.
Fetches TURN server credentials either directly from Cloudflare using API keys
or via the Hugging Face TURN endpoint using an HF token. The HF token method
takes precedence if provided.
Args:
```
turn_key_id (str, optional):
Cloudflare TURN key ID. Defaults to None,
in which case the CLOUDFLARE_TURN_KEY_ID environment variable is used.
turn_key_api_token (str, optional):
Cloudflare TURN key API token.
Defaults to None, in which case the CLOUDFLARE_TURN_KEY_API_TOKEN
environment variable is used.
hf_token (str, optional):
Hugging Face API token. If provided, this method
is used instead of Cloudflare keys.
Defaults to None, in which case the HF_TOKEN environment variable is used.
ttl (int, optional): Time-to-live for the credentials in seconds.
Defaults to 600.
client (httpx.AsyncClient | None, optional): An existing httpx async client
to use for the request. If None, a new client is created per request.
Defaults to None.
```
Returns:
```
dict: A dictionary containing the TURN credentials (ICE servers).
```
Raises:
```
ValueError: If neither HF token nor Cloudflare keys (either as arguments
or environment variables) are provided.
Exception: If the request to the credential server fails.
```
Example
```python
>>> from fastrtc import get_cloudflare_turn_crendials_async, Stream
>>> credentials = await get_cloudflare_turn_credentials_async()
>>> print(credentials)
>>> # Can pass directly to stream class
>>> stream = Stream(..., rtc_configuration=get_turn_credentials_async)
```
## `get_twilio_turn_credentials`
```python
def get_twilio_turn_credentials(
twilio_sid=None,
twilio_token=None):
```
Retrieves TURN credentials from Twilio.
Uses the Twilio REST API to generate temporary TURN credentials. Requires
the `twilio` package to be installed.
Args:
```
twilio_sid (str, optional):
Twilio Account SID. Defaults to None, in which
case the TWILIO_ACCOUNT_SID environment variable is used.
twilio_token (str, optional):
Twilio Auth Token. Defaults to None, in which
case the TWILIO_AUTH_TOKEN environment variable is used.
```
Returns:
```
dict:
A dictionary containing the TURN credentials formatted for WebRTC,
including 'iceServers' and 'iceTransportPolicy'.
```
Raises:
```
ImportError: If the `twilio` package is not installed.
ValueError: If Twilio credentials (SID and token) are not provided either
as arguments or environment variables.
TwilioRestException: If the Twilio API request fails.
```

View File

@@ -23,6 +23,7 @@
export let width: number | undefined;
export let server: {
offer: (body: any) => Promise<any>;
turn: () => Promise<any>;
};
export let container = false;

View File

@@ -73,6 +73,7 @@
export let server: {
offer: (body: any) => Promise<any>;
turn: () => Promise<any>;
};
let stream_state: "open" | "closed" | "waiting" = "closed";
@@ -148,7 +149,17 @@
}
_webrtc_id = Math.random().toString(36).substring(2);
value = _webrtc_id;
stream_state = "waiting";
await server.turn().then((rtc_configuration_) => {
if (rtc_configuration_.error) {
dispatch("error", rtc_configuration_.error);
return;
}
rtc_configuration = rtc_configuration_;
console.info("rtc_configuration", rtc_configuration_);
});
pc = new RTCPeerConnection(rtc_configuration);
console.info("created");
pc.addEventListener("connectionstatechange", async (event) => {
switch (pc.connectionState) {
case "connected":
@@ -173,7 +184,6 @@
break;
}
});
stream_state = "waiting";
stream = null;
try {

View File

@@ -40,6 +40,14 @@
$: if (value === "start_webrtc_stream") {
_webrtc_id = Math.random().toString(36).substring(2);
server
.turn()
.then((rtc_configuration_) => {
rtc_configuration = rtc_configuration_;
})
.catch((error) => {
dispatch("error", error);
});
value = _webrtc_id;
pc = new RTCPeerConnection(rtc_configuration);
pc.addEventListener("connectionstatechange", async (event) => {

View File

@@ -39,6 +39,7 @@ nav:
- Pause Detection Handlers: reference/reply_on_pause.md
- Stream Handlers: reference/stream_handlers.md
- Utils: reference/utils.md
- TURN Credentials: reference/credentials.md
extra_javascript:
- https://cdn.jsdelivr.net/npm/marked/marked.min.js

View File

@@ -8,7 +8,7 @@ build-backend = "hatchling.build"
[project]
name = "fastrtc"
version = "0.0.19"
version = "0.0.20.rc2"
description = "The realtime communication library for Python"
readme = "README.md"
license = "MIT"
@@ -84,7 +84,7 @@ packages = ["/backend/fastrtc"]
[tool.pytest.ini_options]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope="function"
asyncio_default_fixture_loop_scope = "function"
[tool.ruff]
src = ["demo", "backend/fastrtc", "test"]
@@ -99,19 +99,10 @@ skip-magic-trailing-comma = false
line-ending = "auto"
[tool.ruff.lint]
select = [
"E",
"F",
"W",
"Q",
"I",
"UP",
]
select = ["E", "F", "W", "Q", "I", "UP"]
# These can be turned on when the framework is more mature (Too many errors right now)
exclude = [
"D",
]
exclude = ["D"]
# Avoid enforcing line-length violations (`E501`)
ignore = ["E501"]
@@ -125,13 +116,10 @@ convention = "google"
[tool.pyright]
include = ["backend/fastrtc"]
exclude = [
"**/__pycache__",
"**/*.pyi",
]
exclude = ["**/__pycache__", "**/*.pyi"]
reportMissingImports = false
reportMissingTypeStubs = false
pythonVersion = "3.10"
pythonPlatform = "Linux"
pythonPlatform = "Linux"