Cloudflare turn integration (#264)

* Turn integration

* Add code:

* type hint

* Fix typehint

* add code

* format

* WIP

* trickle ice

* bump version

* Better docs

* Modify

* code

* Mute icon for whisper

* Add code

* llama 4 demo

* code

* OpenAI interruptions

* fix docs
This commit is contained in:
Freddy Boulton
2025-04-09 09:36:51 -04:00
committed by GitHub
parent f70b27bd41
commit 837330dcd8
37 changed files with 2914 additions and 780 deletions

View File

@@ -9,7 +9,7 @@ app_file: app.py
pinned: false
license: mit
short_description: Talk to Gemini using Google's multimodal API
tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GEMINI_API_KEY]
tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GEMINI_API_KEY]
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

View File

@@ -9,7 +9,7 @@ app_file: app.py
pinned: false
license: mit
short_description: Talk to Gemini (Gradio UI)
tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GEMINI_API_KEY]
tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GEMINI_API_KEY]
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

View File

@@ -14,7 +14,7 @@ from fastapi.responses import HTMLResponse
from fastrtc import (
AsyncStreamHandler,
Stream,
get_twilio_turn_credentials,
get_cloudflare_turn_credentials_async,
wait_for_item,
)
from google import genai
@@ -117,7 +117,7 @@ stream = Stream(
modality="audio",
mode="send-receive",
handler=GeminiHandler(),
rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
rtc_configuration=get_cloudflare_turn_credentials_async if get_space() else None,
concurrency_limit=5 if get_space() else None,
time_limit=90 if get_space() else None,
additional_inputs=[
@@ -160,7 +160,7 @@ async def _(body: InputData):
@app.get("/")
async def index():
rtc_config = get_twilio_turn_credentials() if get_space() else None
rtc_config = await get_cloudflare_turn_credentials_async() if get_space() else None
html_content = (current_dir / "index.html").read_text()
html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
return HTMLResponse(content=html_content)

View File

@@ -98,6 +98,11 @@
font-weight: 600;
cursor: pointer;
transition: all 0.2s ease;
display: flex;
align-items: center;
justify-content: center;
gap: 12px;
min-width: 180px;
}
button:hover {
@@ -134,7 +139,6 @@
align-items: center;
justify-content: center;
gap: 12px;
min-width: 180px;
}
.pulse-circle {
@@ -171,6 +175,23 @@
background-color: #ffd700;
color: black;
}
/* Add styles for the mute toggle */
.mute-toggle {
width: 24px;
height: 24px;
cursor: pointer;
flex-shrink: 0;
}
.mute-toggle svg {
display: block;
}
#start-button {
margin-left: auto;
margin-right: auto;
}
</style>
</head>
@@ -221,6 +242,11 @@
let dataChannel;
let isRecording = false;
let webrtc_id;
let isMuted = false;
let analyser_input, dataArray_input;
let analyser, dataArray;
let source_input = null;
let source_output = null;
const startButton = document.getElementById('start-button');
const apiKeyInput = document.getElementById('api-key');
@@ -235,7 +261,28 @@
boxContainer.appendChild(box);
}
// SVG Icons
const micIconSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
</svg>`;
const micMutedIconSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
<line x1="1" y1="1" x2="23" y2="23"></line>
</svg>`;
function updateButtonState() {
startButton.innerHTML = '';
startButton.onclick = null;
if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
startButton.innerHTML = `
<div class="icon-with-spinner">
@@ -243,15 +290,28 @@
<span>Connecting...</span>
</div>
`;
startButton.disabled = true;
} else if (peerConnection && peerConnection.connectionState === 'connected') {
startButton.innerHTML = `
<div class="pulse-container">
<div class="pulse-circle"></div>
<span>Stop Recording</span>
</div>
const pulseContainer = document.createElement('div');
pulseContainer.className = 'pulse-container';
pulseContainer.innerHTML = `
<div class="pulse-circle"></div>
<span>Stop Recording</span>
`;
const muteToggle = document.createElement('div');
muteToggle.className = 'mute-toggle';
muteToggle.title = isMuted ? 'Unmute' : 'Mute';
muteToggle.innerHTML = isMuted ? micMutedIconSVG : micIconSVG;
muteToggle.addEventListener('click', toggleMute);
startButton.appendChild(pulseContainer);
startButton.appendChild(muteToggle);
startButton.disabled = false;
} else {
startButton.innerHTML = 'Start Recording';
startButton.disabled = false;
}
}
@@ -267,6 +327,23 @@
}, 5000);
}
function toggleMute(event) {
event.stopPropagation();
if (!peerConnection || peerConnection.connectionState !== 'connected') return;
isMuted = !isMuted;
console.log("Mute toggled:", isMuted);
peerConnection.getSenders().forEach(sender => {
if (sender.track && sender.track.kind === 'audio') {
sender.track.enabled = !isMuted;
console.log(`Audio track ${sender.track.id} enabled: ${!isMuted}`);
}
});
updateButtonState();
}
async function setupWebRTC() {
const config = __RTC_CONFIGURATION__;
peerConnection = new RTCPeerConnection(config);
@@ -288,58 +365,74 @@
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
stream.getTracks().forEach(track => peerConnection.addTrack(track, stream));
// Update audio visualization setup
audioContext = new AudioContext();
if (!audioContext || audioContext.state === 'closed') {
audioContext = new AudioContext();
}
if (source_input) {
try { source_input.disconnect(); } catch (e) { console.warn("Error disconnecting previous input source:", e); }
source_input = null;
}
source_input = audioContext.createMediaStreamSource(stream);
analyser_input = audioContext.createAnalyser();
const source = audioContext.createMediaStreamSource(stream);
source.connect(analyser_input);
source_input.connect(analyser_input);
analyser_input.fftSize = 64;
dataArray_input = new Uint8Array(analyser_input.frequencyBinCount);
function updateAudioLevel() {
analyser_input.getByteFrequencyData(dataArray_input);
const average = Array.from(dataArray_input).reduce((a, b) => a + b, 0) / dataArray_input.length;
const audioLevel = average / 255;
const pulseCircle = document.querySelector('.pulse-circle');
if (pulseCircle) {
console.log("audioLevel", audioLevel);
pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
}
animationId = requestAnimationFrame(updateAudioLevel);
}
updateAudioLevel();
// Add connection state change listener
peerConnection.addEventListener('connectionstatechange', () => {
console.log('connectionstatechange', peerConnection.connectionState);
if (peerConnection.connectionState === 'connected') {
clearTimeout(timeoutId);
const toast = document.getElementById('error-toast');
toast.style.display = 'none';
if (analyser_input) updateAudioLevel();
if (analyser) updateVisualization();
} else if (['disconnected', 'failed', 'closed'].includes(peerConnection.connectionState)) {
// Explicitly stop animations if connection drops unexpectedly
// Note: stopWebRTC() handles the normal stop case
}
updateButtonState();
});
// Handle incoming audio
peerConnection.addEventListener('track', (evt) => {
if (audioOutput && audioOutput.srcObject !== evt.streams[0]) {
audioOutput.srcObject = evt.streams[0];
audioOutput.play();
peerConnection.onicecandidate = ({ candidate }) => {
if (candidate) {
console.debug("Sending ICE candidate", candidate);
fetch('/webrtc/offer', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
candidate: candidate.toJSON(),
webrtc_id: webrtc_id,
type: "ice-candidate",
})
})
}
};
// Set up audio visualization on the output stream
audioContext = new AudioContext();
analyser = audioContext.createAnalyser();
const source = audioContext.createMediaStreamSource(evt.streams[0]);
source.connect(analyser);
analyser.fftSize = 2048;
dataArray = new Uint8Array(analyser.frequencyBinCount);
updateVisualization();
peerConnection.addEventListener('track', (evt) => {
if (evt.track.kind === 'audio' && audioOutput) {
if (audioOutput.srcObject !== evt.streams[0]) {
audioOutput.srcObject = evt.streams[0];
audioOutput.play().catch(e => console.error("Audio play failed:", e));
if (!audioContext || audioContext.state === 'closed') {
console.warn("AudioContext not ready for output track analysis.");
return;
}
if (source_output) {
try { source_output.disconnect(); } catch (e) { console.warn("Error disconnecting previous output source:", e); }
source_output = null;
}
source_output = audioContext.createMediaStreamSource(evt.streams[0]);
analyser = audioContext.createAnalyser();
source_output.connect(analyser);
analyser.fftSize = 2048;
dataArray = new Uint8Array(analyser.frequencyBinCount);
updateVisualization();
}
}
});
// Create data channel for messages
dataChannel = peerConnection.createDataChannel('text');
dataChannel.onmessage = (event) => {
const eventJson = JSON.parse(event.data);
@@ -360,24 +453,9 @@
}
};
// Create and send offer
const offer = await peerConnection.createOffer();
await peerConnection.setLocalDescription(offer);
await new Promise((resolve) => {
if (peerConnection.iceGatheringState === "complete") {
resolve();
} else {
const checkState = () => {
if (peerConnection.iceGatheringState === "complete") {
peerConnection.removeEventListener("icegatheringstatechange", checkState);
resolve();
}
};
peerConnection.addEventListener("icegatheringstatechange", checkState);
}
});
const response = await fetch('/webrtc/offer', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
@@ -394,7 +472,7 @@
showError(serverResponse.meta.error === 'concurrency_limit_reached'
? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
: serverResponse.meta.error);
stop();
stopWebRTC();
startButton.textContent = 'Start Recording';
return;
}
@@ -404,13 +482,17 @@
clearTimeout(timeoutId);
console.error('Error setting up WebRTC:', err);
showError('Failed to establish connection. Please try again.');
stop();
stopWebRTC();
startButton.textContent = 'Start Recording';
}
}
function updateVisualization() {
if (!analyser) return;
if (!analyser || !peerConnection || !['connected', 'connecting'].includes(peerConnection.connectionState)) {
const bars = document.querySelectorAll('.box');
bars.forEach(bar => bar.style.transform = 'scaleY(0.1)');
return;
}
analyser.getByteFrequencyData(dataArray);
const bars = document.querySelectorAll('.box');
@@ -420,32 +502,114 @@
bars[i].style.transform = `scaleY(${Math.max(0.1, barHeight)})`;
}
animationId = requestAnimationFrame(updateVisualization);
requestAnimationFrame(updateVisualization);
}
function updateAudioLevel() {
if (!analyser_input || !peerConnection || !['connected', 'connecting'].includes(peerConnection.connectionState)) {
const pulseCircle = document.querySelector('.pulse-circle');
if (pulseCircle) {
pulseCircle.style.setProperty('--audio-level', 1);
}
return;
}
analyser_input.getByteFrequencyData(dataArray_input);
const average = Array.from(dataArray_input).reduce((a, b) => a + b, 0) / dataArray_input.length;
const audioLevel = average / 255;
const pulseCircle = document.querySelector('.pulse-circle');
if (pulseCircle) {
pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
}
requestAnimationFrame(updateAudioLevel);
}
function stopWebRTC() {
console.log("Running stopWebRTC");
if (peerConnection) {
peerConnection.close();
peerConnection.getSenders().forEach(sender => {
if (sender.track) {
sender.track.stop();
}
});
peerConnection.ontrack = null;
peerConnection.onicegatheringstatechange = null;
peerConnection.onconnectionstatechange = null;
if (dataChannel) {
dataChannel.onmessage = null;
try { dataChannel.close(); } catch (e) { console.warn("Error closing data channel:", e); }
dataChannel = null;
}
try { peerConnection.close(); } catch (e) { console.warn("Error closing peer connection:", e); }
peerConnection = null;
}
if (animationId) {
cancelAnimationFrame(animationId);
if (audioOutput) {
audioOutput.pause();
audioOutput.srcObject = null;
}
if (audioContext) {
audioContext.close();
if (source_input) {
try { source_input.disconnect(); } catch (e) { console.warn("Error disconnecting input source:", e); }
source_input = null;
}
if (source_output) {
try { source_output.disconnect(); } catch (e) { console.warn("Error disconnecting output source:", e); }
source_output = null;
}
if (audioContext && audioContext.state !== 'closed') {
audioContext.close().then(() => {
console.log("AudioContext closed successfully.");
audioContext = null;
}).catch(e => {
console.error("Error closing AudioContext:", e);
audioContext = null;
});
} else {
audioContext = null;
}
analyser_input = null;
dataArray_input = null;
analyser = null;
dataArray = null;
isMuted = false;
isRecording = false;
updateButtonState();
const bars = document.querySelectorAll('.box');
bars.forEach(bar => bar.style.transform = 'scaleY(0.1)');
const pulseCircle = document.querySelector('.pulse-circle');
if (pulseCircle) {
pulseCircle.style.setProperty('--audio-level', 1);
}
}
startButton.addEventListener('click', () => {
if (!isRecording) {
setupWebRTC();
startButton.classList.add('recording');
} else {
stopWebRTC();
startButton.classList.remove('recording');
startButton.addEventListener('click', (event) => {
if (event.target.closest('.mute-toggle')) {
return;
}
if (peerConnection && peerConnection.connectionState === 'connected') {
console.log("Stop button clicked");
stopWebRTC();
} else if (!peerConnection || ['new', 'closed', 'failed', 'disconnected'].includes(peerConnection.connectionState)) {
console.log("Start button clicked");
if (!apiKeyInput.value) {
showError("Please enter your API Key.");
return;
}
setupWebRTC();
isRecording = true;
updateButtonState();
}
isRecording = !isRecording;
});
updateButtonState();
</script>
</body>

View File

@@ -1,4 +1,4 @@
fastrtc
fastrtc[vad]==0.0.20.rc2
python-dotenv
google-genai
twilio

View File

@@ -0,0 +1,15 @@
---
title: Talk to Llama 4
emoji: 🦙
colorFrom: purple
colorTo: red
sdk: gradio
sdk_version: 5.23.3
app_file: app.py
pinned: false
license: mit
short_description: Talk to Llama 4 using Groq + Cloudflare
tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GROQ_API_KEY]
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

120
demo/talk_to_llama4/app.py Normal file
View File

@@ -0,0 +1,120 @@
import json
import os
from pathlib import Path
import gradio as gr
import numpy as np
from dotenv import load_dotenv
from fastapi import FastAPI
from fastapi.responses import HTMLResponse, StreamingResponse
from fastrtc import (
AdditionalOutputs,
ReplyOnPause,
Stream,
audio_to_bytes,
get_cloudflare_turn_credentials_async,
get_current_context,
get_tts_model,
)
from groq import Groq
from numpy.typing import NDArray
curr_dir = Path(__file__).parent
load_dotenv()
tts_model = get_tts_model()
groq = Groq(api_key=os.getenv("GROQ_API_KEY"))
conversations: dict[str, list[dict[str, str]]] = {}
def response(user_audio: tuple[int, NDArray[np.int16]]):
context = get_current_context()
if context.webrtc_id not in conversations:
conversations[context.webrtc_id] = [
{
"role": "system",
"content": (
"You are a helpful assistant that can answer questions and help with tasks."
'Please return a short (that will be converted to audio using a text-to-speech model) response and long response to this question. They can be the same if appropriate. Please return in JSON format\n\n{"short":, "long"}\n\n'
),
}
]
messages = conversations[context.webrtc_id]
transcription = groq.audio.transcriptions.create(
file=("audio.wav", audio_to_bytes(user_audio)),
model="distil-whisper-large-v3-en",
response_format="verbose_json",
)
print(transcription.text)
messages.append({"role": "user", "content": transcription.text})
completion = groq.chat.completions.create( # type: ignore
model="meta-llama/llama-4-scout-17b-16e-instruct",
messages=messages, # type: ignore
temperature=1,
max_completion_tokens=1024,
top_p=1,
stream=False,
response_format={"type": "json_object"},
stop=None,
)
response = completion.choices[0].message.content
response = json.loads(response)
short_response = response["short"]
long_response = response["long"]
messages.append({"role": "assistant", "content": long_response})
conversations[context.webrtc_id] = messages
yield from tts_model.stream_tts_sync(short_response)
yield AdditionalOutputs(messages)
stream = Stream(
ReplyOnPause(response),
modality="audio",
mode="send-receive",
additional_outputs=[gr.Chatbot(type="messages")],
additional_outputs_handler=lambda old, new: new,
rtc_configuration=get_cloudflare_turn_credentials_async,
)
app = FastAPI()
stream.mount(app)
@app.get("/")
async def _():
rtc_config = await get_cloudflare_turn_credentials_async()
html_content = (curr_dir / "index.html").read_text()
html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
return HTMLResponse(content=html_content)
@app.get("/outputs")
async def _(webrtc_id: str):
async def output_stream():
async for output in stream.output_stream(webrtc_id):
state = output.args[0]
for msg in state[-2:]:
data = {
"message": msg,
}
yield f"event: output\ndata: {json.dumps(data)}\n\n"
return StreamingResponse(output_stream(), media_type="text/event-stream")
if __name__ == "__main__":
import os
if (mode := os.getenv("MODE")) == "UI":
stream.ui.launch(server_port=7860)
elif mode == "PHONE":
raise ValueError("Phone mode not supported")
else:
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)

View File

@@ -0,0 +1,839 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Talk to Llama 4</title>
<style>
:root {
--color-primary: #3b82f6;
--color-secondary: #f97316;
--color-background: #0f172a;
--color-surface: #1e293b;
--color-text: #f1f5f9;
--color-message-user: #334155;
--color-message-assistant: #1e40af;
--gradient-primary: linear-gradient(135deg, #3b82f6, #8b5cf6);
--gradient-secondary: linear-gradient(135deg, #f97316, #ec4899);
--boxSize: 8px;
--gutter: 4px;
}
* {
box-sizing: border-box;
margin: 0;
padding: 0;
}
body {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
background-color: var(--color-background);
color: var(--color-text);
min-height: 100vh;
display: flex;
flex-direction: column;
align-items: center;
padding: 2rem 1rem;
background-image:
radial-gradient(circle at 25% 25%, rgba(59, 130, 246, 0.1) 0%, transparent 50%),
radial-gradient(circle at 75% 75%, rgba(249, 115, 22, 0.1) 0%, transparent 50%);
}
.header-container {
display: flex;
align-items: center;
gap: 2rem;
margin-bottom: 2rem;
width: 100%;
max-width: 800px;
animation: fadeIn 1s ease-out;
}
.header {
text-align: left;
}
.header h1 {
font-size: 2.5rem;
margin-bottom: 0.5rem;
background: var(--gradient-primary);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-weight: 800;
}
.header h2 {
font-size: 1.2rem;
font-weight: 400;
color: rgba(241, 245, 249, 0.8);
margin-bottom: 1rem;
}
.logo {
width: 120px;
height: 120px;
background: var(--color-surface);
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
box-shadow: 0 15px 30px rgba(0, 0, 0, 0.3);
position: relative;
overflow: hidden;
animation: float 6s ease-in-out infinite;
flex-shrink: 0;
}
.logo::before {
content: "";
position: absolute;
width: 200%;
height: 200%;
background: var(--gradient-secondary);
opacity: 0.2;
animation: rotate 10s linear infinite;
}
.logo img {
width: 75%;
height: 75%;
object-fit: contain;
z-index: 2;
}
.container {
width: 100%;
max-width: 800px;
background-color: var(--color-surface);
border-radius: 1rem;
box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.25);
overflow: hidden;
animation: slideUp 0.5s ease-out;
}
.chat-container {
height: 400px;
overflow-y: auto;
padding: 1.5rem;
display: flex;
flex-direction: column;
gap: 1rem;
scroll-behavior: smooth;
}
.message {
max-width: 80%;
padding: 1rem;
border-radius: 1rem;
line-height: 1.5;
animation: fadeIn 0.3s ease-out;
}
.message.user {
background-color: var(--color-message-user);
color: var(--color-text);
align-self: flex-end;
border-bottom-right-radius: 0.25rem;
}
.message.assistant {
background-color: var(--color-message-assistant);
color: var(--color-text);
align-self: flex-start;
border-bottom-left-radius: 0.25rem;
}
.wave-visualizer {
height: 100px;
padding: 1rem;
background-color: rgba(30, 41, 59, 0.8);
display: flex;
align-items: center;
justify-content: center;
position: relative;
overflow: hidden;
border-top: 1px solid rgba(255, 255, 255, 0.1);
}
.box-container {
display: flex;
justify-content: space-between;
align-items: center;
width: 100%;
height: 64px;
padding: 0 1rem;
}
.box {
height: 100%;
width: var(--boxSize);
background: var(--gradient-primary);
border-radius: 4px;
transform: scaleY(0.1);
transition: transform 0.05s ease;
}
.controls {
display: flex;
justify-content: center;
align-items: center;
padding: 1.5rem;
gap: 1rem;
border-top: 1px solid rgba(255, 255, 255, 0.1);
}
#start-button {
display: flex;
align-items: center;
justify-content: center;
background: var(--gradient-primary);
color: white;
border: none;
border-radius: 9999px;
padding: 0.75rem 1.5rem;
font-size: 1rem;
font-weight: 600;
cursor: pointer;
transition: all 0.3s ease;
box-shadow: 0 4px 14px rgba(59, 130, 246, 0.4);
}
#start-button:hover {
transform: translateY(-2px);
box-shadow: 0 6px 20px rgba(59, 130, 246, 0.6);
}
#start-button:active {
transform: translateY(1px);
}
.icon-with-spinner {
display: flex;
align-items: center;
justify-content: center;
gap: 12px;
min-width: 180px;
}
.spinner {
width: 20px;
height: 20px;
border: 2px solid white;
border-top-color: transparent;
border-radius: 50%;
animation: spin 1s linear infinite;
flex-shrink: 0;
}
.pulse-container {
display: flex;
align-items: center;
justify-content: center;
gap: 12px;
}
.pulse-circle {
width: 20px;
height: 20px;
border-radius: 50%;
background: var(--color-secondary);
opacity: 0.85;
flex-shrink: 0;
transform: scale(var(--audio-level, 1));
transition: transform 0.1s ease;
}
.mute-toggle {
width: 24px;
height: 24px;
cursor: pointer;
margin-left: 12px;
flex-shrink: 0;
filter: drop-shadow(0 4px 6px rgba(0, 0, 0, 0.2));
}
.mute-toggle svg {
width: 100%;
height: 100%;
stroke: white;
}
.typing-indicator {
padding: 0.5rem 1rem;
display: inline-flex;
align-items: center;
background-color: var(--color-message-assistant);
border-radius: 1rem;
align-self: flex-start;
margin-bottom: 0.5rem;
display: none;
animation: fadeIn 0.3s ease-out;
}
.dots {
display: inline-flex;
gap: 4px;
}
.dot {
width: 8px;
height: 8px;
background-color: white;
border-radius: 50%;
animation: bounce 1.5s infinite;
opacity: 0.7;
}
.dot:nth-child(2) {
animation-delay: 0.15s;
}
.dot:nth-child(3) {
animation-delay: 0.3s;
}
.toast {
position: fixed;
top: 20px;
left: 50%;
transform: translateX(-50%);
padding: 1rem 1.5rem;
border-radius: 0.5rem;
font-size: 0.875rem;
z-index: 1000;
display: none;
box-shadow: 0 10px 25px rgba(0, 0, 0, 0.3);
animation: slideDown 0.3s ease-out;
}
.toast.error {
background-color: #ef4444;
color: white;
}
.toast.warning {
background-color: #f59e0b;
color: black;
}
#audio-output {
display: none;
}
@keyframes float {
0%,
100% {
transform: translateY(0);
}
50% {
transform: translateY(-10px);
}
}
@keyframes rotate {
0% {
transform: rotate(0deg);
}
100% {
transform: rotate(360deg);
}
}
@keyframes spin {
to {
transform: rotate(360deg);
}
}
@keyframes bounce {
0%,
100% {
transform: translateY(0);
}
50% {
transform: translateY(-4px);
}
}
@keyframes fadeIn {
from {
opacity: 0;
}
to {
opacity: 1;
}
}
@keyframes slideUp {
from {
opacity: 0;
transform: translateY(20px);
}
to {
opacity: 1;
transform: translateY(0);
}
}
@keyframes slideDown {
from {
opacity: 0;
transform: translate(-50%, -20px);
}
to {
opacity: 1;
transform: translate(-50%, 0);
}
}
</style>
</head>
<body>
<div id="error-toast" class="toast"></div>
<div class="header-container">
<div class="logo">
<img src="https://huggingface.co/datasets/freddyaboulton/bucket/resolve/main/Video%26Audio%20huggy.png"
alt="LLaMA Logo">
</div>
<div class="header">
<h1>Talk to LLaMA 4</h1>
<h2>Experience seamless real-time conversation thanks to Cloudflare and Hugging Face's FastRTC.</h2>
</div>
</div>
<div class="container">
<div class="chat-container" id="chat-messages">
<!-- Messages will appear here -->
</div>
<div class="typing-indicator" id="typing-indicator">
<div class="dots">
<div class="dot"></div>
<div class="dot"></div>
<div class="dot"></div>
</div>
</div>
<div class="wave-visualizer">
<div class="box-container" id="box-container">
<!-- Boxes will be dynamically added here -->
</div>
</div>
<div class="controls">
<button id="start-button">Start Conversation</button>
</div>
</div>
<audio id="audio-output"></audio>
<script>
let peerConnection;
let webrtc_id;
const startButton = document.getElementById('start-button');
const chatMessages = document.getElementById('chat-messages');
const boxContainer = document.getElementById('box-container');
const typingIndicator = document.getElementById('typing-indicator');
const audioOutput = document.getElementById('audio-output');
let audioLevel = 0;
let animationFrame_input, animationFrame_output;
let audioContext_input, audioContext_output;
let analyser_input, dataArray_input;
let analyser_output, dataArray_output;
let audioSource_input, audioSource_output;
let messages = [];
let eventSource;
let isMuted = false;
// Create wave visualizer boxes
const numBars = 32;
for (let i = 0; i < numBars; i++) {
const box = document.createElement('div');
box.className = 'box';
boxContainer.appendChild(box);
}
// SVG Icons
const micIconSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
</svg>`;
const micMutedIconSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
<line x1="1" y1="1" x2="23" y2="23"></line>
</svg>`;
function updateButtonState() {
const existingMuteButton = startButton.querySelector('.mute-toggle');
if (existingMuteButton) {
existingMuteButton.removeEventListener('click', toggleMute);
}
startButton.innerHTML = '';
if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
startButton.innerHTML = `
<div class="icon-with-spinner">
<div class="spinner"></div>
<span>Connecting...</span>
</div>
`;
startButton.disabled = true;
} else if (peerConnection && peerConnection.connectionState === 'connected') {
const pulseContainer = document.createElement('div');
pulseContainer.className = 'pulse-container';
pulseContainer.innerHTML = `
<div class="pulse-circle"></div>
<span>Stop Conversation</span>
`;
const muteToggle = document.createElement('div');
muteToggle.className = 'mute-toggle';
muteToggle.title = isMuted ? 'Unmute' : 'Mute';
muteToggle.innerHTML = isMuted ? micMutedIconSVG : micIconSVG;
muteToggle.addEventListener('click', toggleMute);
startButton.appendChild(pulseContainer);
startButton.appendChild(muteToggle);
startButton.disabled = false;
} else {
startButton.textContent = 'Start Conversation';
startButton.disabled = false;
}
}
function toggleMute(event) {
event.stopPropagation();
if (!peerConnection || peerConnection.connectionState !== 'connected') return;
isMuted = !isMuted;
console.log("Mute toggled:", isMuted);
peerConnection.getSenders().forEach(sender => {
if (sender.track && sender.track.kind === 'audio') {
sender.track.enabled = !isMuted;
console.log(`Audio track ${sender.track.id} enabled: ${!isMuted}`);
}
});
updateButtonState();
}
function setupAudioVisualization(stream) {
// Input audio context for pulse circle
audioContext_input = new (window.AudioContext || window.webkitAudioContext)();
analyser_input = audioContext_input.createAnalyser();
audioSource_input = audioContext_input.createMediaStreamSource(stream);
audioSource_input.connect(analyser_input);
analyser_input.fftSize = 64;
dataArray_input = new Uint8Array(analyser_input.frequencyBinCount);
function updateAudioLevel() {
// Update input audio visualization (pulse circle)
analyser_input.getByteFrequencyData(dataArray_input);
const average = Array.from(dataArray_input).reduce((a, b) => a + b, 0) / dataArray_input.length;
audioLevel = average / 255;
const pulseCircle = document.querySelector('.pulse-circle');
if (pulseCircle) {
pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
}
animationFrame_input = requestAnimationFrame(updateAudioLevel);
}
updateAudioLevel();
}
function setupOutputVisualization(stream) {
// Create separate audio context for output visualization
audioContext_output = new (window.AudioContext || window.webkitAudioContext)();
analyser_output = audioContext_output.createAnalyser();
audioSource_output = audioContext_output.createMediaStreamSource(stream);
audioSource_output.connect(analyser_output);
analyser_output.fftSize = 2048;
dataArray_output = new Uint8Array(analyser_output.frequencyBinCount);
function updateVisualization() {
// Update output audio visualization (wave bars)
analyser_output.getByteFrequencyData(dataArray_output);
const boxes = document.querySelectorAll('.box');
for (let i = 0; i < boxes.length; i++) {
const index = Math.floor(i * dataArray_output.length / boxes.length);
const value = dataArray_output[index] / 255;
boxes[i].style.transform = `scaleY(${Math.max(0.1, value * 1.5)})`;
}
animationFrame_output = requestAnimationFrame(updateVisualization);
}
updateVisualization();
}
// Reset wave visualization bars to minimum height
function resetVisualization() {
const boxes = document.querySelectorAll('.box');
boxes.forEach(box => box.style.transform = 'scaleY(0.1)');
}
function showError(message) {
const toast = document.getElementById('error-toast');
toast.textContent = message;
toast.className = 'toast error';
toast.style.display = 'block';
setTimeout(() => {
toast.style.display = 'none';
}, 5000);
}
function handleMessage(event) {
const eventJson = JSON.parse(event.data);
if (eventJson.type === "error") {
showError(eventJson.message);
} else if (eventJson.type === "send_input") {
fetch('/input_hook', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
webrtc_id: webrtc_id,
chatbot: messages,
state: messages
})
});
} else if (eventJson.type === "log") {
if (eventJson.data === "pause_detected") {
typingIndicator.style.display = 'block';
chatMessages.scrollTop = chatMessages.scrollHeight;
} else if (eventJson.data === "response_starting") {
typingIndicator.style.display = 'none';
}
}
}
async function setupWebRTC() {
const config = __RTC_CONFIGURATION__;
peerConnection = new RTCPeerConnection(config);
const timeoutId = setTimeout(() => {
const toast = document.getElementById('error-toast');
toast.textContent = "Connection is taking longer than usual. Are you on a VPN?";
toast.className = 'toast warning';
toast.style.display = 'block';
setTimeout(() => {
toast.style.display = 'none';
}, 5000);
}, 5000);
try {
const stream = await navigator.mediaDevices.getUserMedia({
audio: true
});
setupAudioVisualization(stream);
stream.getTracks().forEach(track => {
peerConnection.addTrack(track, stream);
});
// Add this listener to handle incoming audio track
peerConnection.addEventListener('track', (event) => {
if (event.track.kind === 'audio') {
console.log("Received audio track from server");
if (audioOutput) {
audioOutput.srcObject = event.streams[0];
audioOutput.play().catch(e => console.error("Audio play failed:", e));
}
// Set up visualization for output audio with separate context
setupOutputVisualization(event.streams[0]);
}
});
const dataChannel = peerConnection.createDataChannel('text');
dataChannel.onmessage = handleMessage;
const offer = await peerConnection.createOffer();
await peerConnection.setLocalDescription(offer);
peerConnection.onicecandidate = ({ candidate }) => {
if (candidate) {
console.debug("Sending ICE candidate", candidate);
fetch('/webrtc/offer', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
candidate: candidate.toJSON(),
webrtc_id: webrtc_id,
type: "ice-candidate",
})
})
}
};
peerConnection.addEventListener('connectionstatechange', () => {
console.log('connectionstatechange', peerConnection.connectionState);
if (peerConnection.connectionState === 'connected') {
clearTimeout(timeoutId);
const toast = document.getElementById('error-toast');
toast.style.display = 'none';
} else if (['closed', 'failed', 'disconnected'].includes(peerConnection.connectionState)) {
stop();
}
updateButtonState();
});
webrtc_id = Math.random().toString(36).substring(7);
const response = await fetch('/webrtc/offer', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
sdp: peerConnection.localDescription.sdp,
type: peerConnection.localDescription.type,
webrtc_id: webrtc_id
})
});
const serverResponse = await response.json();
if (serverResponse.status === 'failed') {
showError(serverResponse.meta.error === 'concurrency_limit_reached'
? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
: serverResponse.meta.error);
stop();
return;
}
await peerConnection.setRemoteDescription(serverResponse);
eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
eventSource.addEventListener("output", (event) => {
const eventJson = JSON.parse(event.data);
console.log(eventJson);
messages.push(eventJson.message);
addMessage(eventJson.message.role, eventJson.audio ?? eventJson.message.content);
})
} catch (err) {
clearTimeout(timeoutId);
console.error('Error setting up WebRTC:', err);
showError('Failed to establish connection. Please try again.');
stop();
}
}
function addMessage(role, content) {
const messageDiv = document.createElement('div');
messageDiv.classList.add('message', role);
messageDiv.textContent = content;
chatMessages.appendChild(messageDiv);
chatMessages.scrollTop = chatMessages.scrollHeight;
}
function stop() {
if (eventSource) {
eventSource.close();
eventSource = null;
}
if (animationFrame_input) {
cancelAnimationFrame(animationFrame_input);
animationFrame_input = null;
}
if (animationFrame_output) {
cancelAnimationFrame(animationFrame_output);
animationFrame_output = null;
}
if (audioContext_input) {
audioContext_input.close().catch(e => console.error("Error closing input AudioContext:", e));
audioContext_input = null;
analyser_input = null;
dataArray_input = null;
audioSource_input = null;
}
if (audioContext_output) {
audioContext_output.close().catch(e => console.error("Error closing output AudioContext:", e));
audioContext_output = null;
analyser_output = null;
dataArray_output = null;
audioSource_output = null;
}
if (audioOutput) {
audioOutput.pause();
audioOutput.srcObject = null;
}
// Reset visualization
resetVisualization();
if (peerConnection) {
if (peerConnection.getTransceivers) {
peerConnection.getTransceivers().forEach(transceiver => {
if (transceiver.stop) {
transceiver.stop();
}
});
}
peerConnection.onicecandidate = null;
peerConnection.ondatachannel = null;
peerConnection.onconnectionstatechange = null;
peerConnection.close();
peerConnection = null;
}
isMuted = false;
updateButtonState();
audioLevel = 0;
}
startButton.addEventListener('click', (event) => {
if (event.target.closest('.mute-toggle')) {
return;
}
if (peerConnection && peerConnection.connectionState === 'connected') {
console.log("Stop button clicked");
stop();
} else if (!peerConnection || ['new', 'closed', 'failed', 'disconnected'].includes(peerConnection.connectionState)) {
console.log("Start button clicked");
messages = [];
chatMessages.innerHTML = '';
setupWebRTC();
updateButtonState();
}
});
</script>
</body>
</html>

View File

@@ -0,0 +1,3 @@
fastrtc[vad, tts]==0.0.20.rc2
groq
python-dotenv

View File

@@ -9,7 +9,7 @@ app_file: app.py
pinned: false
license: mit
short_description: Talk to OpenAI using their multimodal API
tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|OPENAI_API_KEY]
tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|OPENAI_API_KEY]
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

View File

@@ -9,7 +9,7 @@ app_file: app.py
pinned: false
license: mit
short_description: Talk to OpenAI (Gradio UI)
tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|OPENAI_API_KEY]
tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|OPENAI_API_KEY]
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

View File

@@ -54,6 +54,9 @@ class OpenAIHandler(AsyncStreamHandler):
)
self.connection = conn
async for event in self.connection:
# Handle interruptions
if event.type == "input_audio_buffer.speech_started":
self.clear_queue()
if event.type == "response.audio_transcript.done":
await self.output_queue.put(AdditionalOutputs(event))
if event.type == "response.audio.delta":

View File

@@ -67,16 +67,21 @@
}
button {
display: inline-flex;
align-items: center;
justify-content: center;
gap: 10px;
padding: 12px 24px;
background-color: transparent;
color: #ffffff;
border: 1px solid #ffffff;
padding: 12px 24px;
font-family: inherit;
font-size: 16px;
cursor: pointer;
transition: all 0.3s;
text-transform: uppercase;
letter-spacing: 1px;
position: relative;
}
button:hover {
@@ -116,9 +121,7 @@
.pulse-container {
display: flex;
align-items: center;
justify-content: center;
gap: 12px;
min-width: 180px;
}
.pulse-circle {
@@ -128,10 +131,47 @@
background-color: #ffffff;
opacity: 0.2;
flex-shrink: 0;
transform: translateX(-0%) scale(var(--audio-level, 1));
transform: scale(var(--audio-level, 1));
transition: transform 0.1s ease;
}
/* Fix button layout */
button {
display: inline-flex;
align-items: center;
justify-content: center;
gap: 10px;
padding: 12px 24px;
background-color: transparent;
color: #ffffff;
border: 1px solid #ffffff;
font-family: inherit;
font-size: 16px;
cursor: pointer;
transition: all 0.3s;
text-transform: uppercase;
letter-spacing: 1px;
position: relative;
}
.mute-toggle {
width: 24px;
height: 24px;
cursor: pointer;
flex-shrink: 0;
}
.mute-toggle svg {
display: block;
width: 100%;
height: 100%;
}
#start-button {
margin-left: auto;
margin-right: auto;
}
/* Add styles for toast notifications */
.toast {
position: fixed;
@@ -177,6 +217,7 @@
<script>
let peerConnection;
let webrtc_id;
let isMuted = false;
const audioOutput = document.getElementById('audio-output');
const startButton = document.getElementById('start-button');
const chatMessages = document.getElementById('chat-messages');
@@ -185,27 +226,82 @@
let animationFrame;
let audioContext, analyser, audioSource;
// SVG Icons
const micIconSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
</svg>`;
const micMutedIconSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
<line x1="1" y1="1" x2="23" y2="23"></line>
</svg>`;
function updateButtonState() {
const button = document.getElementById('start-button');
// Clear previous content
button.innerHTML = '';
if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
button.innerHTML = `
<div class="icon-with-spinner">
<div class="spinner"></div>
<span>Connecting...</span>
</div>
`;
const spinner = document.createElement('div');
spinner.className = 'spinner';
const text = document.createElement('span');
text.textContent = 'Connecting...';
button.appendChild(spinner);
button.appendChild(text);
} else if (peerConnection && peerConnection.connectionState === 'connected') {
button.innerHTML = `
<div class="pulse-container">
<div class="pulse-circle"></div>
<span>Stop Conversation</span>
</div>
`;
// Create pulse circle
const pulseCircle = document.createElement('div');
pulseCircle.className = 'pulse-circle';
// Create mic icon
const micIcon = document.createElement('div');
micIcon.className = 'mute-toggle';
micIcon.innerHTML = isMuted ? micMutedIconSVG : micIconSVG;
micIcon.addEventListener('click', toggleMute);
// Create text
const text = document.createElement('span');
text.textContent = 'Stop Conversation';
// Add elements in correct order
button.appendChild(pulseCircle);
button.appendChild(micIcon);
button.appendChild(text);
} else {
button.innerHTML = 'Start Conversation';
const text = document.createElement('span');
text.textContent = 'Start Conversation';
button.appendChild(text);
}
}
function toggleMute(event) {
event.stopPropagation();
if (!peerConnection || peerConnection.connectionState !== 'connected') return;
isMuted = !isMuted;
console.log("Mute toggled:", isMuted);
peerConnection.getSenders().forEach(sender => {
if (sender.track && sender.track.kind === 'audio') {
sender.track.enabled = !isMuted;
console.log(`Audio track ${sender.track.id} enabled: ${!isMuted}`);
}
});
updateButtonState();
}
function setupAudioVisualization(stream) {
audioContext = new (window.AudioContext || window.webkitAudioContext)();
analyser = audioContext.createAnalyser();
@@ -276,6 +372,21 @@
}
});
peerConnection.onicecandidate = ({ candidate }) => {
if (candidate) {
console.debug("Sending ICE candidate", candidate);
fetch('/webrtc/offer', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
candidate: candidate.toJSON(),
webrtc_id: webrtc_id,
type: "ice-candidate",
})
})
}
};
const dataChannel = peerConnection.createDataChannel('text');
dataChannel.onmessage = (event) => {
const eventJson = JSON.parse(event.data);
@@ -287,20 +398,6 @@
const offer = await peerConnection.createOffer();
await peerConnection.setLocalDescription(offer);
await new Promise((resolve) => {
if (peerConnection.iceGatheringState === "complete") {
resolve();
} else {
const checkState = () => {
if (peerConnection.iceGatheringState === "complete") {
peerConnection.removeEventListener("icegatheringstatechange", checkState);
resolve();
}
};
peerConnection.addEventListener("icegatheringstatechange", checkState);
}
});
peerConnection.addEventListener('connectionstatechange', () => {
console.log('connectionstatechange', peerConnection.connectionState);
if (peerConnection.connectionState === 'connected') {
@@ -388,7 +485,12 @@
audioLevel = 0;
}
startButton.addEventListener('click', () => {
startButton.addEventListener('click', (event) => {
// Skip if clicking the mute toggle
if (event.target.closest('.mute-toggle')) {
return;
}
console.log('clicked');
console.log(peerConnection, peerConnection?.connectionState);
if (!peerConnection || peerConnection.connectionState !== 'connected') {

View File

@@ -1,4 +1,4 @@
fastrtc[vad]
fastrtc[vad]==0.0.20.rc2
openai
twilio
python-dotenv

View File

@@ -9,7 +9,7 @@ app_file: app.py
pinned: false
license: mit
short_description: Llama 3.2 - SambaNova API
tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|SAMBANOVA_API_KEY]
tags: [webrtc, websocket, gradio, secret|HF_TOKEN_ALT, secret|SAMBANOVA_API_KEY]
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

View File

@@ -9,7 +9,7 @@ app_file: app.py
pinned: false
license: mit
short_description: Llama 3.2 - SambaNova API (Gradio)
tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|SAMBANOVA_API_KEY]
tags: [webrtc, websocket, gradio, secret|HF_TOKEN_ALT, secret|SAMBANOVA_API_KEY]
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

View File

@@ -13,8 +13,8 @@ from fastrtc import (
AdditionalOutputs,
ReplyOnPause,
Stream,
get_cloudflare_turn_credentials_async,
get_stt_model,
get_twilio_turn_credentials,
)
from gradio.utils import get_space
from pydantic import BaseModel
@@ -75,7 +75,7 @@ stream = Stream(
additional_outputs=[chatbot, state],
additional_outputs_handler=lambda *a: (a[2], a[3]),
concurrency_limit=20 if get_space() else None,
rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
rtc_configuration=get_cloudflare_turn_credentials_async,
)
app = FastAPI()
@@ -95,7 +95,9 @@ class InputData(BaseModel):
@app.get("/")
async def _():
rtc_config = get_twilio_turn_credentials() if get_space() else None
rtc_config = await get_cloudflare_turn_credentials_async(
hf_token=os.getenv("HF_TOKEN_ALT")
)
html_content = (curr_dir / "index.html").read_text()
html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
return HTMLResponse(content=html_content)

View File

@@ -72,13 +72,17 @@
background-color: #0066cc;
color: white;
border: none;
padding: 12px 24px;
padding: 12px 18px;
font-family: inherit;
font-size: 14px;
cursor: pointer;
transition: all 0.3s;
border-radius: 4px;
font-weight: 500;
display: inline-flex;
align-items: center;
justify-content: center;
gap: 8px;
}
button:hover {
@@ -94,7 +98,6 @@
align-items: center;
justify-content: center;
gap: 12px;
min-width: 180px;
}
.spinner {
@@ -118,7 +121,6 @@
align-items: center;
justify-content: center;
gap: 12px;
min-width: 180px;
}
.pulse-circle {
@@ -200,6 +202,23 @@
background-color: #ffd700;
color: black;
}
/* Styles for the mute toggle icon */
.mute-toggle {
width: 20px;
height: 20px;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
flex-shrink: 0;
}
.mute-toggle svg {
width: 100%;
height: 100%;
stroke: white;
}
</style>
</head>
@@ -239,28 +258,82 @@
let audioContext, analyser, audioSource;
let messages = [];
let eventSource;
let isMuted = false;
// SVG Icons
const micIconSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
</svg>`;
const micMutedIconSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
<line x1="1" y1="1" x2="23" y2="23"></line>
</svg>`;
function updateButtonState() {
const button = document.getElementById('start-button');
const existingMuteButton = startButton.querySelector('.mute-toggle');
if (existingMuteButton) {
existingMuteButton.removeEventListener('click', toggleMute);
}
startButton.innerHTML = '';
if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
button.innerHTML = `
startButton.innerHTML = `
<div class="icon-with-spinner">
<div class="spinner"></div>
<span>Connecting...</span>
</div>
`;
startButton.disabled = true;
} else if (peerConnection && peerConnection.connectionState === 'connected') {
button.innerHTML = `
<div class="pulse-container">
<div class="pulse-circle"></div>
<span>Stop Conversation</span>
</div>
const pulseContainer = document.createElement('div');
pulseContainer.className = 'pulse-container';
pulseContainer.innerHTML = `
<div class="pulse-circle"></div>
<span>Stop Conversation</span>
`;
const muteToggle = document.createElement('div');
muteToggle.className = 'mute-toggle';
muteToggle.title = isMuted ? 'Unmute' : 'Mute';
muteToggle.innerHTML = isMuted ? micMutedIconSVG : micIconSVG;
muteToggle.addEventListener('click', toggleMute);
startButton.appendChild(pulseContainer);
startButton.appendChild(muteToggle);
startButton.disabled = false;
} else {
button.innerHTML = 'Start Conversation';
startButton.textContent = 'Start Conversation';
startButton.disabled = false;
}
}
function toggleMute(event) {
event.stopPropagation();
if (!peerConnection || peerConnection.connectionState !== 'connected') return;
isMuted = !isMuted;
console.log("Mute toggled:", isMuted);
peerConnection.getSenders().forEach(sender => {
if (sender.track && sender.track.kind === 'audio') {
sender.track.enabled = !isMuted;
console.log(`Audio track ${sender.track.id} enabled: ${!isMuted}`);
}
});
updateButtonState();
}
function setupAudioVisualization(stream) {
audioContext = new (window.AudioContext || window.webkitAudioContext)();
analyser = audioContext.createAnalyser();
@@ -378,6 +451,8 @@
clearTimeout(timeoutId);
const toast = document.getElementById('error-toast');
toast.style.display = 'none';
} else if (['closed', 'failed', 'disconnected'].includes(peerConnection.connectionState)) {
stop();
}
updateButtonState();
});
@@ -448,9 +523,10 @@
if (animationFrame) {
cancelAnimationFrame(animationFrame);
animationFrame = null;
}
if (audioContext) {
audioContext.close();
audioContext.close().catch(e => console.error("Error closing AudioContext:", e));
audioContext = null;
analyser = null;
audioSource = null;
@@ -464,22 +540,33 @@
});
}
if (peerConnection.getSenders) {
peerConnection.getSenders().forEach(sender => {
if (sender.track && sender.track.stop) sender.track.stop();
});
}
peerConnection.onicecandidate = null;
peerConnection.ondatachannel = null;
peerConnection.onconnectionstatechange = null;
peerConnection.close();
peerConnection = null;
console.log("Peer connection closed.");
}
isMuted = false;
updateButtonState();
audioLevel = 0;
}
startButton.addEventListener('click', () => {
if (!peerConnection || peerConnection.connectionState !== 'connected') {
setupWebRTC();
} else {
startButton.addEventListener('click', (event) => {
if (event.target.closest('.mute-toggle')) {
return;
}
if (peerConnection && peerConnection.connectionState === 'connected') {
console.log("Stop button clicked");
stop();
} else if (!peerConnection || ['new', 'closed', 'failed', 'disconnected'].includes(peerConnection.connectionState)) {
console.log("Start button clicked");
messages = [];
chatMessages.innerHTML = '';
setupWebRTC();
updateButtonState();
}
});
</script>

View File

@@ -1,4 +1,4 @@
fastrtc[vad, stt]
fastrtc[vad, stt]==0.0.20.rc2
python-dotenv
huggingface_hub>=0.29.0
twilio

View File

@@ -9,7 +9,7 @@ app_file: app.py
pinned: false
license: mit
short_description: Transcribe audio in realtime with Whisper
tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GROQ_API_KEY]
tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GROQ_API_KEY]
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

View File

@@ -12,8 +12,7 @@ tags:
- webrtc
- websocket
- gradio
- secret|TWILIO_ACCOUNT_SID
- secret|TWILIO_AUTH_TOKEN
- secret|HF_TOKEN
- secret|GROQ_API_KEY
title: Whisper Realtime Transcription (Gradio UI)
---

View File

@@ -9,14 +9,21 @@
:root {
--primary-gradient: linear-gradient(135deg, #f9a45c 0%, #e66465 100%);
--background-cream: #faf8f5;
--background-cream-end: #f7f5f2;
/* Slightly warmer end color for body gradient */
--text-dark: #2d2d2d;
--transcript-bg: #ffffff;
/* White background for transcript area */
--transcript-border: #e0e0e0;
/* Light border for transcript items */
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
margin: 0;
padding: 0;
background-color: var(--background-cream);
/* Apply a subtle vertical gradient to the body */
background: linear-gradient(to bottom, var(--background-cream), var(--background-cream-end));
color: var(--text-dark);
min-height: 100vh;
}
@@ -43,18 +50,26 @@
.container {
max-width: 1000px;
margin: 1.5rem auto;
margin: 2.5rem auto;
/* Increased top/bottom margin */
padding: 0 2rem;
}
.transcript-container {
border-radius: 8px;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.06);
border-radius: 12px;
/* Slightly larger radius */
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.08);
/* Enhanced shadow */
padding: 1.5rem;
height: 300px;
height: 350px;
/* Increased height */
overflow-y: auto;
margin-bottom: 1.5rem;
border: 1px solid rgba(0, 0, 0, 0.1);
margin-bottom: 2rem;
/* Increased margin */
border: 1px solid rgba(0, 0, 0, 0.05);
/* Softer border */
background-color: var(--transcript-bg);
/* Use the new variable */
}
.controls {
@@ -73,6 +88,8 @@
transition: all 0.2s ease;
font-weight: 500;
min-width: 180px;
position: relative;
padding-right: 50px;
}
button:hover {
@@ -86,22 +103,39 @@
/* Transcript text styling */
.transcript-container p {
margin: 0.4rem 0;
padding: 0.6rem;
margin: 0.6rem 0;
/* Increased vertical margin */
padding: 0.8rem 1rem;
/* Increased padding */
background: var(--background-cream);
border-radius: 4px;
line-height: 1.4;
font-size: 0.95rem;
/* Use the lighter cream for contrast */
border-radius: 6px;
/* Slightly larger radius */
line-height: 1.5;
/* Improved line spacing */
font-size: 0.98rem;
/* Slightly larger font */
border-left: 3px solid var(--transcript-border);
/* Add a subtle left border */
transition: background-color 0.2s ease;
/* Smooth hover effect */
}
/* Custom scrollbar - made thinner */
.transcript-container p:hover {
background-color: #fdfbf9;
/* Slightly change background on hover */
}
/* Custom scrollbar - update track color */
.transcript-container::-webkit-scrollbar {
width: 6px;
width: 8px;
/* Slightly wider scrollbar */
}
.transcript-container::-webkit-scrollbar-track {
background: var(--background-cream);
border-radius: 3px;
background: var(--background-cream-end);
/* Match body end gradient */
border-radius: 4px;
}
.transcript-container::-webkit-scrollbar-thumb {
@@ -176,6 +210,40 @@
transition: transform 0.1s ease;
}
/* Styles for the mute button */
.mute-toggle {
position: absolute;
right: 10px;
top: 50%;
transform: translateY(-50%);
width: 24px;
height: 24px;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
}
.mute-toggle svg {
width: 20px;
height: 20px;
stroke: white;
}
/* Adjust layout for button content when mute is present */
.button-content {
display: flex;
align-items: center;
justify-content: center;
width: calc(100% - 40px);
margin-right: 40px;
}
.icon-with-spinner,
.pulse-container {
width: 100%;
}
@keyframes spin {
to {
transform: rotate(360deg);
@@ -206,10 +274,29 @@
let audioContext, analyser, audioSource;
let audioLevel = 0;
let animationFrame;
let isMuted = false;
const startButton = document.getElementById('start-button');
const transcriptDiv = document.getElementById('transcript');
// SVG Icons
const micIconSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
</svg>`;
const micMutedIconSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
<line x1="1" y1="1" x2="23" y2="23"></line>
</svg>`;
function showError(message) {
const toast = document.getElementById('error-toast');
toast.textContent = message;
@@ -241,25 +328,63 @@
}
function updateButtonState() {
// Remove existing mute listener if present
const existingMuteButton = startButton.querySelector('.mute-toggle');
if (existingMuteButton) {
existingMuteButton.removeEventListener('click', toggleMute);
existingMuteButton.remove();
}
if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
startButton.innerHTML = `
<div class="icon-with-spinner">
<div class="spinner"></div>
<span>Connecting...</span>
<div class="button-content">
<div class="icon-with-spinner">
<div class="spinner"></div>
<span>Connecting...</span>
</div>
</div>
`;
startButton.disabled = true;
} else if (peerConnection && peerConnection.connectionState === 'connected') {
startButton.innerHTML = `
<div class="pulse-container">
<div class="pulse-circle"></div>
<span>Stop Recording</span>
<div class="button-content">
<div class="pulse-container">
<div class="pulse-circle"></div>
<span>Stop Recording</span>
</div>
</div>
<div class="mute-toggle" title="${isMuted ? 'Unmute' : 'Mute'}">
${isMuted ? micMutedIconSVG : micIconSVG}
</div>
`;
startButton.disabled = false;
const muteButton = startButton.querySelector('.mute-toggle');
if (muteButton) {
muteButton.addEventListener('click', toggleMute);
}
} else {
startButton.innerHTML = 'Start Recording';
startButton.disabled = false;
}
}
function toggleMute(event) {
event.stopPropagation();
if (!peerConnection || peerConnection.connectionState !== 'connected') return;
isMuted = !isMuted;
console.log("Mute toggled:", isMuted);
peerConnection.getSenders().forEach(sender => {
if (sender.track && sender.track.kind === 'audio') {
sender.track.enabled = !isMuted;
console.log(`Audio track ${sender.track.id} enabled: ${!isMuted}`);
}
});
updateButtonState();
}
function setupAudioVisualization(stream) {
audioContext = new (window.AudioContext || window.webkitAudioContext)();
analyser = audioContext.createAnalyser();
@@ -321,6 +446,21 @@
updateButtonState();
});
peerConnection.onicecandidate = ({ candidate }) => {
if (candidate) {
console.debug("Sending ICE candidate", candidate);
fetch('/webrtc/offer', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
candidate: candidate.toJSON(),
webrtc_id: webrtc_id,
type: "ice-candidate",
})
})
}
};
// Create data channel for messages
const dataChannel = peerConnection.createDataChannel('text');
dataChannel.onmessage = handleMessage;
@@ -329,20 +469,6 @@
const offer = await peerConnection.createOffer();
await peerConnection.setLocalDescription(offer);
await new Promise((resolve) => {
if (peerConnection.iceGatheringState === "complete") {
resolve();
} else {
const checkState = () => {
if (peerConnection.iceGatheringState === "complete") {
peerConnection.removeEventListener("icegatheringstatechange", checkState);
resolve();
}
};
peerConnection.addEventListener("icegatheringstatechange", checkState);
}
});
webrtc_id = Math.random().toString(36).substring(7);
const response = await fetch('/webrtc/offer', {
@@ -392,41 +518,45 @@
function stop() {
if (animationFrame) {
cancelAnimationFrame(animationFrame);
animationFrame = null;
}
if (audioContext) {
audioContext.close();
audioContext.close().catch(e => console.error("Error closing AudioContext:", e));
audioContext = null;
analyser = null;
audioSource = null;
}
if (peerConnection) {
if (peerConnection.getTransceivers) {
peerConnection.getTransceivers().forEach(transceiver => {
if (transceiver.stop) {
transceiver.stop();
if (peerConnection.getSenders) {
peerConnection.getSenders().forEach(sender => {
if (sender.track) {
sender.track.stop();
console.log(`Track ${sender.track.id} stopped.`);
}
});
}
if (peerConnection.getSenders) {
peerConnection.getSenders().forEach(sender => {
if (sender.track && sender.track.stop) sender.track.stop();
});
}
setTimeout(() => {
peerConnection.close();
}, 500);
peerConnection.close();
peerConnection = null;
console.log("Peer connection closed.");
}
audioLevel = 0;
isMuted = false;
updateButtonState();
}
startButton.addEventListener('click', () => {
if (startButton.textContent === 'Start Recording') {
setupWebRTC();
} else {
startButton.addEventListener('click', (event) => {
if (event.target.closest('.mute-toggle')) {
return;
}
if (peerConnection && peerConnection.connectionState === 'connected') {
console.log("Stop button clicked");
stop();
} else if (!peerConnection || ['new', 'closed', 'failed', 'disconnected'].includes(peerConnection.connectionState)) {
console.log("Start button clicked");
transcriptDiv.innerHTML = '';
setupWebRTC();
updateButtonState();
}
});
</script>

View File

@@ -1,4 +1,3 @@
fastrtc[vad]
fastrtc[vad]==0.0.20.rc2
groq
python-dotenv
twilio
python-dotenv