update v0.2.0

2026-02-05 18:09:23 +08:00 · 2025-04-01 16:04:53 +08:00
parent f0b43b63cd 99a8e78e5e
commit b6d3e51b2d
198 changed files with 27674 additions and 2392 deletions
--- a/demo/app.py
+++ b/demo/app.py
@@ -1,10 +1,17 @@
 import asyncio
 import base64
 from io import BytesIO
+import json
+import math
+import queue
+import time
+import uuid
+import threading

+from fastrtc.utils import Message
 import gradio as gr
 import numpy as np
-from gradio_webrtc import (
+from fastrtc import (
    AsyncAudioVideoStreamHandler,
    WebRTC,
    VideoEmitType,
@@ -26,6 +33,7 @@ def encode_image(data: np.ndarray) -> dict:
    base64_str = str(base64.b64encode(bytes_data), "utf-8")
    return {"mime_type": "image/jpeg", "data": base64_str}

+frame_queue = queue.Queue(maxsize=100)

 class VideoChatHandler(AsyncAudioVideoStreamHandler):
    def __init__(
@@ -38,7 +46,7 @@ class VideoChatHandler(AsyncAudioVideoStreamHandler):
            input_sample_rate=24000,
        )
        self.audio_queue = asyncio.Queue()
-        self.video_queue = asyncio.Queue()
+        self.video_queue = frame_queue
        self.quit = asyncio.Event()
        self.session = None
        self.last_frame_time = 0
@@ -50,6 +58,25 @@ class VideoChatHandler(AsyncAudioVideoStreamHandler):
            output_frame_size=self.output_frame_size,
        )
    
+    chat_id = ''
+    async def on_chat_datachannel(self,message: Message,channel): 
+      # 返回
+      # {"type":"chat",id:"标识属于同一段话", "message":"Hello, world!"}
+      # {"type":"avatar_end"} 表示本次对话结束
+      if message['type'] == 'stop_chat':
+        self.chat_id = ''
+        channel.send(json.dumps({'type':'avatar_end'}))
+      else:
+        id = uuid.uuid4().hex
+        self.chat_id = id
+        data = message["data"]
+        halfLen =  math.floor(data.__len__()/2)
+        channel.send(json.dumps({"type":"chat","id":id,"message":data[:halfLen]}))
+        await asyncio.sleep(5)
+        if self.chat_id == id:
+          channel.send(json.dumps({"type":"chat","id":id,"message":data[halfLen:]}))
+          channel.send(json.dumps({'type':'avatar_end'}))
+    
    async def video_receive(self, frame: np.ndarray):
        # if self.session:
        #     # send image every 1 second
@@ -61,10 +88,11 @@ class VideoChatHandler(AsyncAudioVideoStreamHandler):
        # print(frame.shape)
        newFrame = np.array(frame)
        newFrame[0:, :, 0] = 255 - newFrame[0:, :, 0]
-        self.video_queue.put_nowait(newFrame)
+        # self.video_queue.put_nowait(newFrame)
    
    async def video_emit(self) -> VideoEmitType:
-        return await self.video_queue.get()
+        # print('123123',frame_queue.qsize())
+        return frame_queue.get()

    async def receive(self, frame: tuple[int, np.ndarray]) -> None:
        frame_size, array = frame
@@ -114,14 +142,35 @@ with gr.Blocks(css=css) as demo:
                },
            }
        )
+        handler = VideoChatHandler()
        webrtc.stream(
-            VideoChatHandler(),
+            handler,
            inputs=[webrtc],
            outputs=[webrtc],
-            time_limit=150,
+            time_limit=1500,
            concurrency_limit=2,
        )
-
+        # 线程函数：随机生成 numpy 帧
+        def generate_frames(width=480, height=960, channels=3):
+            while True:
+                try:
+                    # 随机生成一个 RGB 图像帧
+                    frame = np.random.randint(188, 256, (height, width, channels), dtype=np.uint8)
+                    
+                    # 将帧放入队列
+                    frame_queue.put(frame)
+                    # print("生成一帧数据，形状:", frame.shape, frame_queue.qsize())
+                    
+                    # 模拟实时性：避免过度消耗 CPU
+                    time.sleep(0.03)  # 每秒约生成 30 帧
+                except Exception as e:
+                    print(f"生成帧时出错: {e}")
+                    break
+        thread = threading.Thread(target=generate_frames, daemon=True)
+        thread.start()

 if __name__ == "__main__":
    demo.launch()
+
+
+
--- a/demo/echo_audio/README.md
+++ b/demo/echo_audio/README.md
@@ -0,0 +1,15 @@
+---
+title: Echo Audio
+emoji: 🪩
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Simple echo stream - simplest FastRTC demo
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN]
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/echo_audio/app.py
+++ b/demo/echo_audio/app.py
@@ -0,0 +1,45 @@
+import numpy as np
+from fastapi import FastAPI
+from fastapi.responses import RedirectResponse
+from fastrtc import ReplyOnPause, Stream, get_twilio_turn_credentials
+from gradio.utils import get_space
+
+
+def detection(audio: tuple[int, np.ndarray]):
+    # Implement any iterator that yields audio
+    # See "LLM Voice Chat" for a more complete example
+    yield audio
+
+
+stream = Stream(
+    handler=ReplyOnPause(detection),
+    modality="audio",
+    mode="send-receive",
+    rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
+    concurrency_limit=5 if get_space() else None,
+    time_limit=90 if get_space() else None,
+)
+
+app = FastAPI()
+
+stream.mount(app)
+
+
+@app.get("/")
+async def index():
+    return RedirectResponse(
+        url="/ui" if not get_space() else "https://fastrtc-echo-audio.hf.space/ui/"
+    )
+
+
+if __name__ == "__main__":
+    import os
+
+    if (mode := os.getenv("MODE")) == "UI":
+        stream.ui.launch(server_port=7860)
+    elif mode == "PHONE":
+        stream.fastphone(port=7860)
+    else:
+        import uvicorn
+
+        uvicorn.run(app, host="0.0.0.0", port=7860)
--- a/demo/echo_audio/requirements.txt
+++ b/demo/echo_audio/requirements.txt
@@ -0,0 +1,3 @@
+fastrtc[vad]
+twilio
+python-dotenv
--- a/demo/gemini_audio_video/README.md
+++ b/demo/gemini_audio_video/README.md
@@ -0,0 +1,15 @@
+---
+title: Gemini Audio Video
+emoji: ♊️
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Gemini understands audio and video!
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GEMINI_API_KEY]
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/gemini_audio_video/app.py
+++ b/demo/gemini_audio_video/app.py
@@ -0,0 +1,185 @@
+import asyncio
+import base64
+import os
+import time
+from io import BytesIO
+
+import gradio as gr
+import numpy as np
+from dotenv import load_dotenv
+from fastrtc import (
+    AsyncAudioVideoStreamHandler,
+    Stream,
+    WebRTC,
+    get_twilio_turn_credentials,
+)
+from google import genai
+from gradio.utils import get_space
+from PIL import Image
+
+load_dotenv()
+
+
+def encode_audio(data: np.ndarray) -> dict:
+    """Encode Audio data to send to the server"""
+    return {
+        "mime_type": "audio/pcm",
+        "data": base64.b64encode(data.tobytes()).decode("UTF-8"),
+    }
+
+
+def encode_image(data: np.ndarray) -> dict:
+    with BytesIO() as output_bytes:
+        pil_image = Image.fromarray(data)
+        pil_image.save(output_bytes, "JPEG")
+        bytes_data = output_bytes.getvalue()
+    base64_str = str(base64.b64encode(bytes_data), "utf-8")
+    return {"mime_type": "image/jpeg", "data": base64_str}
+
+
+class GeminiHandler(AsyncAudioVideoStreamHandler):
+    def __init__(
+        self,
+    ) -> None:
+        super().__init__(
+            "mono",
+            output_sample_rate=24000,
+            output_frame_size=480,
+            input_sample_rate=16000,
+        )
+        self.audio_queue = asyncio.Queue()
+        self.video_queue = asyncio.Queue()
+        self.quit = asyncio.Event()
+        self.session = None
+        self.last_frame_time = 0
+        self.quit = asyncio.Event()
+
+    def copy(self) -> "GeminiHandler":
+        return GeminiHandler()
+
+    async def start_up(self):
+        client = genai.Client(
+            api_key=os.getenv("GEMINI_API_KEY"), http_options={"api_version": "v1alpha"}
+        )
+        config = {"response_modalities": ["AUDIO"]}
+        async with client.aio.live.connect(
+            model="gemini-2.0-flash-exp", config=config
+        ) as session:
+            self.session = session
+            print("set session")
+            while not self.quit.is_set():
+                turn = self.session.receive()
+                async for response in turn:
+                    if data := response.data:
+                        audio = np.frombuffer(data, dtype=np.int16).reshape(1, -1)
+                        self.audio_queue.put_nowait(audio)
+
+    async def video_receive(self, frame: np.ndarray):
+        if self.session:
+            # send image every 1 second
+            print(time.time() - self.last_frame_time)
+            if time.time() - self.last_frame_time > 1:
+                self.last_frame_time = time.time()
+                await self.session.send(input=encode_image(frame))
+                if self.latest_args[1] is not None:
+                    await self.session.send(input=encode_image(self.latest_args[1]))
+
+        self.video_queue.put_nowait(frame)
+
+    async def video_emit(self):
+        return await self.video_queue.get()
+
+    async def receive(self, frame: tuple[int, np.ndarray]) -> None:
+        _, array = frame
+        array = array.squeeze()
+        audio_message = encode_audio(array)
+        if self.session:
+            await self.session.send(input=audio_message)
+
+    async def emit(self):
+        array = await self.audio_queue.get()
+        return (self.output_sample_rate, array)
+
+    async def shutdown(self) -> None:
+        if self.session:
+            self.quit.set()
+            await self.session._websocket.close()
+            self.quit.clear()
+
+
+stream = Stream(
+    handler=GeminiHandler(),
+    modality="audio-video",
+    mode="send-receive",
+    rtc_configuration=get_twilio_turn_credentials()
+    if get_space() == "spaces"
+    else None,
+    time_limit=90 if get_space() else None,
+    additional_inputs=[
+        gr.Image(label="Image", type="numpy", sources=["upload", "clipboard"])
+    ],
+    ui_args={
+        "icon": "https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png",
+        "pulse_color": "rgb(255, 255, 255)",
+        "icon_button_color": "rgb(255, 255, 255)",
+        "title": "Gemini Audio Video Chat",
+    },
+)
+
+css = """
+#video-source {max-width: 600px !important; max-height: 600 !important;}
+"""
+
+with gr.Blocks(css=css) as demo:
+    gr.HTML(
+        """
+    <div style='display: flex; align-items: center; justify-content: center; gap: 20px'>
+        <div style="background-color: var(--block-background-fill); border-radius: 8px">
+            <img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" style="width: 100px; height: 100px;">
+        </div>
+        <div>
+            <h1>Gen AI SDK Voice Chat</h1>
+            <p>Speak with Gemini using real-time audio + video streaming</p>
+            <p>Powered by <a href="https://gradio.app/">Gradio</a> and <a href=https://freddyaboulton.github.io/gradio-webrtc/">WebRTC</a>⚡️</p>
+            <p>Get an API Key <a href="https://support.google.com/googleapi/answer/6158862?hl=en">here</a></p>
+        </div>
+    </div>
+    """
+    )
+    with gr.Row() as row:
+        with gr.Column():
+            webrtc = WebRTC(
+                label="Video Chat",
+                modality="audio-video",
+                mode="send-receive",
+                elem_id="video-source",
+                rtc_configuration=get_twilio_turn_credentials()
+                if get_space() == "spaces"
+                else None,
+                icon="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png",
+                pulse_color="rgb(255, 255, 255)",
+                icon_button_color="rgb(255, 255, 255)",
+            )
+        with gr.Column():
+            image_input = gr.Image(
+                label="Image", type="numpy", sources=["upload", "clipboard"]
+            )
+
+        webrtc.stream(
+            GeminiHandler(),
+            inputs=[webrtc, image_input],
+            outputs=[webrtc],
+            time_limit=60 if get_space() else None,
+            concurrency_limit=2 if get_space() else None,
+        )
+
+stream.ui = demo
+
+
+if __name__ == "__main__":
+    if (mode := os.getenv("MODE")) == "UI":
+        stream.ui.launch(server_port=7860)
+    elif mode == "PHONE":
+        raise ValueError("Phone mode not supported for this demo")
+    else:
+        stream.ui.launch(server_port=7860)
--- a/demo/gemini_audio_video/requirements.txt
+++ b/demo/gemini_audio_video/requirements.txt
@@ -0,0 +1,4 @@
+fastrtc
+python-dotenv
+google-genai
+twilio
--- a/demo/gemini_conversation/README.md
+++ b/demo/gemini_conversation/README.md
@@ -0,0 +1,15 @@
+---
+title: Gemini Talking to Gemini
+emoji: ♊️
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.17.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Have two Gemini agents talk to each other
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GEMINI_API_KEY]
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/gemini_conversation/app.py
+++ b/demo/gemini_conversation/app.py
@@ -0,0 +1,232 @@
+import asyncio
+import base64
+import os
+from pathlib import Path
+from typing import AsyncGenerator
+
+import librosa
+import numpy as np
+from dotenv import load_dotenv
+from fastrtc import (
+    AsyncStreamHandler,
+    Stream,
+    get_tts_model,
+    wait_for_item,
+)
+from fastrtc.utils import audio_to_int16
+from google import genai
+from google.genai.types import (
+    Content,
+    LiveConnectConfig,
+    Part,
+    PrebuiltVoiceConfig,
+    SpeechConfig,
+    VoiceConfig,
+)
+
+load_dotenv()
+
+cur_dir = Path(__file__).parent
+
+SAMPLE_RATE = 24000
+
+tts_model = get_tts_model()
+
+
+class GeminiHandler(AsyncStreamHandler):
+    """Handler for the Gemini API"""
+
+    def __init__(
+        self,
+    ) -> None:
+        super().__init__(
+            expected_layout="mono",
+            output_sample_rate=24000,
+            output_frame_size=480,
+            input_sample_rate=24000,
+        )
+        self.input_queue: asyncio.Queue = asyncio.Queue()
+        self.output_queue: asyncio.Queue = asyncio.Queue()
+        self.quit: asyncio.Event = asyncio.Event()
+
+    def copy(self) -> "GeminiHandler":
+        return GeminiHandler()
+
+    async def start_up(self):
+        voice_name = "Charon"
+        client = genai.Client(
+            api_key=os.getenv("GEMINI_API_KEY"),
+            http_options={"api_version": "v1alpha"},
+        )
+
+        config = LiveConnectConfig(
+            response_modalities=["AUDIO"],  # type: ignore
+            speech_config=SpeechConfig(
+                voice_config=VoiceConfig(
+                    prebuilt_voice_config=PrebuiltVoiceConfig(
+                        voice_name=voice_name,
+                    )
+                )
+            ),
+            system_instruction=Content(
+                parts=[Part(text="You are a helpful assistant.")],
+                role="system",
+            ),
+        )
+        async with client.aio.live.connect(
+            model="gemini-2.0-flash-exp", config=config
+        ) as session:
+            async for audio in session.start_stream(
+                stream=self.stream(), mime_type="audio/pcm"
+            ):
+                if audio.data:
+                    array = np.frombuffer(audio.data, dtype=np.int16)
+                    self.output_queue.put_nowait((self.output_sample_rate, array))
+
+    async def stream(self) -> AsyncGenerator[bytes, None]:
+        while not self.quit.is_set():
+            try:
+                audio = await asyncio.wait_for(self.input_queue.get(), 0.1)
+                yield audio
+            except (asyncio.TimeoutError, TimeoutError):
+                pass
+
+    async def receive(self, frame: tuple[int, np.ndarray]) -> None:
+        _, array = frame
+        array = array.squeeze()
+        audio_message = base64.b64encode(array.tobytes()).decode("UTF-8")
+        self.input_queue.put_nowait(audio_message)
+
+    async def emit(self) -> tuple[int, np.ndarray] | None:
+        return await wait_for_item(self.output_queue)
+
+    def shutdown(self) -> None:
+        self.quit.set()
+
+
+class GeminiHandler2(GeminiHandler):
+    async def start_up(self):
+        starting_message = tts_model.tts("Can you help me make an omelette?")
+        starting_message = librosa.resample(
+            starting_message[1],
+            orig_sr=starting_message[0],
+            target_sr=self.output_sample_rate,
+        )
+        starting_message = audio_to_int16((self.output_sample_rate, starting_message))
+        await self.output_queue.put((self.output_sample_rate, starting_message))
+        voice_name = "Puck"
+        client = genai.Client(
+            api_key=os.getenv("GEMINI_API_KEY"),
+            http_options={"api_version": "v1alpha"},
+        )
+
+        config = LiveConnectConfig(
+            response_modalities=["AUDIO"],  # type: ignore
+            speech_config=SpeechConfig(
+                voice_config=VoiceConfig(
+                    prebuilt_voice_config=PrebuiltVoiceConfig(
+                        voice_name=voice_name,
+                    )
+                )
+            ),
+            system_instruction=Content(
+                parts=[
+                    Part(
+                        text="You are a cooking student who wants to learn how to make an omelette."
+                    ),
+                    Part(
+                        text="You are currently in the kitchen with a teacher who is helping you make an omelette."
+                    ),
+                    Part(
+                        text="Please wait for the teacher to tell you what to do next. Follow the teacher's instructions carefully."
+                    ),
+                ],
+                role="system",
+            ),
+        )
+        async with client.aio.live.connect(
+            model="gemini-2.0-flash-exp", config=config
+        ) as session:
+            async for audio in session.start_stream(
+                stream=self.stream(), mime_type="audio/pcm"
+            ):
+                if audio.data:
+                    array = np.frombuffer(audio.data, dtype=np.int16)
+                    self.output_queue.put_nowait((self.output_sample_rate, array))
+
+    def copy(self) -> "GeminiHandler2":
+        return GeminiHandler2()
+
+
+gemini_stream = Stream(
+    GeminiHandler(),
+    modality="audio",
+    mode="send-receive",
+    ui_args={
+        "title": "Gemini Teacher",
+        "icon": "https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png",
+        "pulse_color": "rgb(74, 138, 213)",
+        "icon_button_color": "rgb(255, 255, 255)",
+    },
+)
+
+gemini_stream_2 = Stream(
+    GeminiHandler2(),
+    modality="audio",
+    mode="send-receive",
+    ui_args={
+        "title": "Gemini Student",
+        "icon": "https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png",
+        "pulse_color": "rgb(132, 112, 196)",
+        "icon_button_color": "rgb(255, 255, 255)",
+    },
+)
+
+if __name__ == "__main__":
+    import gradio as gr
+    from gradio.utils import get_space
+
+    if not get_space():
+        with gr.Blocks() as demo:
+            gr.HTML(
+                """
+                <div style="display: flex; justify-content: center; align-items: center;">
+                <h1>Gemini Conversation</h1>                
+                </div>
+                """
+            )
+            gr.Markdown(
+                """# How to run this demo
+                
+                - Clone the repo - top right of the page click the vertical three dots and select "Clone repository"
+                - Open the repo in a terminal and install the dependencies
+                - Get a gemini API key [here](https://ai.google.dev/gemini-api/docs/api-key)
+                - Create a `.env` file in the root of the repo and add the following:
+                ```
+                GEMINI_API_KEY=<your_gemini_api_key>
+                ```
+                - Run the app with `python app.py`
+                - This will print the two URLs of the agents running locally
+                - Use ngrok to exponse one agent to the internet. This is so that you can acces it from your phone
+                - Use the ngrok URL to access the agent from your phone
+                - Now, start the "teacher gemini" agent first. Then, start the "student gemini" agent. The student gemini will start talking to the teacher gemini. And the teacher gemini will respond!
+
+                Important:
+                - Make sure the audio sources are not too close to each other or too loud. Sometimes that causes them to talk over each other..
+                - Feel free to modify the `system_instruction` to change the behavior of the agents.
+                - You can also modify the `voice_name` to change the voice of the agents.
+                - Have fun!
+                """
+            )
+        demo.launch()
+
+    import time
+
+    _ = gemini_stream.ui.launch(server_port=7860, prevent_thread_lock=True)
+    _ = gemini_stream_2.ui.launch(server_port=7861, prevent_thread_lock=True)
+    try:
+        while True:
+            time.sleep(1)
+    except KeyboardInterrupt:
+        gemini_stream.ui.close()
+        gemini_stream_2.ui.close()
--- a/demo/hello_computer/README.md
+++ b/demo/hello_computer/README.md
@@ -0,0 +1,15 @@
+---
+title: Hello Computer
+emoji: 💻
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Say computer before asking your question
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|SAMBANOVA_API_KEY]
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/hello_computer/README_gradio.md
+++ b/demo/hello_computer/README_gradio.md
@@ -0,0 +1,15 @@
+---
+title: Hello Computer (Gradio)
+emoji: 💻
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Say computer (Gradio)
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|SAMBANOVA_API_KEY]
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/hello_computer/app.py
+++ b/demo/hello_computer/app.py
@@ -0,0 +1,145 @@
+import base64
+import json
+import os
+from pathlib import Path
+
+import gradio as gr
+import huggingface_hub
+import numpy as np
+from dotenv import load_dotenv
+from fastapi import FastAPI
+from fastapi.responses import HTMLResponse, StreamingResponse
+from fastrtc import (
+    AdditionalOutputs,
+    ReplyOnStopWords,
+    Stream,
+    get_stt_model,
+    get_twilio_turn_credentials,
+)
+from gradio.utils import get_space
+from pydantic import BaseModel
+
+load_dotenv()
+
+curr_dir = Path(__file__).parent
+
+
+client = huggingface_hub.InferenceClient(
+    api_key=os.environ.get("SAMBANOVA_API_KEY"),
+    provider="sambanova",
+)
+model = get_stt_model()
+
+
+def response(
+    audio: tuple[int, np.ndarray],
+    gradio_chatbot: list[dict] | None = None,
+    conversation_state: list[dict] | None = None,
+):
+    gradio_chatbot = gradio_chatbot or []
+    conversation_state = conversation_state or []
+    text = model.stt(audio)
+    print("STT in handler", text)
+    sample_rate, array = audio
+    gradio_chatbot.append(
+        {"role": "user", "content": gr.Audio((sample_rate, array.squeeze()))}
+    )
+    yield AdditionalOutputs(gradio_chatbot, conversation_state)
+
+    conversation_state.append({"role": "user", "content": text})
+
+    request = client.chat.completions.create(
+        model="meta-llama/Llama-3.2-3B-Instruct",
+        messages=conversation_state,  # type: ignore
+        temperature=0.1,
+        top_p=0.1,
+    )
+    response = {"role": "assistant", "content": request.choices[0].message.content}
+
+    conversation_state.append(response)
+    gradio_chatbot.append(response)
+
+    yield AdditionalOutputs(gradio_chatbot, conversation_state)
+
+
+chatbot = gr.Chatbot(type="messages", value=[])
+state = gr.State(value=[])
+stream = Stream(
+    ReplyOnStopWords(
+        response,  # type: ignore
+        stop_words=["computer"],
+        input_sample_rate=16000,
+    ),
+    mode="send",
+    modality="audio",
+    additional_inputs=[chatbot, state],
+    additional_outputs=[chatbot, state],
+    additional_outputs_handler=lambda *a: (a[2], a[3]),
+    concurrency_limit=5 if get_space() else None,
+    time_limit=90 if get_space() else None,
+    rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
+)
+app = FastAPI()
+stream.mount(app)
+
+
+class Message(BaseModel):
+    role: str
+    content: str
+
+
+class InputData(BaseModel):
+    webrtc_id: str
+    chatbot: list[Message]
+    state: list[Message]
+
+
+@app.get("/")
+async def _():
+    rtc_config = get_twilio_turn_credentials() if get_space() else None
+    html_content = (curr_dir / "index.html").read_text()
+    html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
+    return HTMLResponse(content=html_content)
+
+
+@app.post("/input_hook")
+async def _(data: InputData):
+    body = data.model_dump()
+    stream.set_input(data.webrtc_id, body["chatbot"], body["state"])
+
+
+def audio_to_base64(file_path):
+    audio_format = "wav"
+    with open(file_path, "rb") as audio_file:
+        encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8")
+    return f"data:audio/{audio_format};base64,{encoded_audio}"
+
+
+@app.get("/outputs")
+async def _(webrtc_id: str):
+    async def output_stream():
+        async for output in stream.output_stream(webrtc_id):
+            chatbot = output.args[0]
+            state = output.args[1]
+            data = {
+                "message": state[-1],
+                "audio": audio_to_base64(chatbot[-1]["content"].value["path"])
+                if chatbot[-1]["role"] == "user"
+                else None,
+            }
+            yield f"event: output\ndata: {json.dumps(data)}\n\n"
+
+    return StreamingResponse(output_stream(), media_type="text/event-stream")
+
+
+if __name__ == "__main__":
+    import os
+
+    if (mode := os.getenv("MODE")) == "UI":
+        stream.ui.launch(server_port=7860)
+    elif mode == "PHONE":
+        raise ValueError("Phone mode not supported")
+    else:
+        import uvicorn
+
+        uvicorn.run(app, host="0.0.0.0", port=7860)
--- a/demo/hello_computer/index.html
+++ b/demo/hello_computer/index.html
@@ -0,0 +1,486 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Hello Computer 💻</title>
+    <style>
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
+            background-color: #f8f9fa;
+            color: #1a1a1a;
+            margin: 0;
+            padding: 20px;
+            height: 100vh;
+            box-sizing: border-box;
+        }
+
+        .container {
+            max-width: 800px;
+            margin: 0 auto;
+            height: calc(100% - 100px);
+        }
+
+        .logo {
+            text-align: center;
+            margin-bottom: 40px;
+        }
+
+        .chat-container {
+            background: white;
+            border-radius: 8px;
+            box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+            padding: 20px;
+            height: 90%;
+            box-sizing: border-box;
+            display: flex;
+            flex-direction: column;
+        }
+
+        .chat-messages {
+            flex-grow: 1;
+            overflow-y: auto;
+            margin-bottom: 20px;
+            padding: 10px;
+        }
+
+        .message {
+            margin-bottom: 20px;
+            padding: 12px;
+            border-radius: 8px;
+            font-size: 14px;
+            line-height: 1.5;
+        }
+
+        .message.user {
+            background-color: #e9ecef;
+            margin-left: 20%;
+        }
+
+        .message.assistant {
+            background-color: #f1f3f5;
+            margin-right: 20%;
+        }
+
+        .controls {
+            text-align: center;
+            margin-top: 20px;
+        }
+
+        button {
+            background-color: #0066cc;
+            color: white;
+            border: none;
+            padding: 12px 24px;
+            font-family: inherit;
+            font-size: 14px;
+            cursor: pointer;
+            transition: all 0.3s;
+            border-radius: 4px;
+            font-weight: 500;
+        }
+
+        button:hover {
+            background-color: #0052a3;
+        }
+
+        #audio-output {
+            display: none;
+        }
+
+        .icon-with-spinner {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 12px;
+            min-width: 180px;
+        }
+
+        .spinner {
+            width: 20px;
+            height: 20px;
+            border: 2px solid #ffffff;
+            border-top-color: transparent;
+            border-radius: 50%;
+            animation: spin 1s linear infinite;
+            flex-shrink: 0;
+        }
+
+        @keyframes spin {
+            to {
+                transform: rotate(360deg);
+            }
+        }
+
+        .pulse-container {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 12px;
+            min-width: 180px;
+        }
+
+        .pulse-circle {
+            width: 20px;
+            height: 20px;
+            border-radius: 50%;
+            background-color: #ffffff;
+            opacity: 0.2;
+            flex-shrink: 0;
+            transform: translateX(-0%) scale(var(--audio-level, 1));
+            transition: transform 0.1s ease;
+        }
+
+        /* Add styles for typing indicator */
+        .typing-indicator {
+            padding: 8px;
+            background-color: #f1f3f5;
+            border-radius: 8px;
+            margin-bottom: 10px;
+            display: none;
+        }
+
+        .dots {
+            display: inline-flex;
+            gap: 4px;
+        }
+
+        .dot {
+            width: 8px;
+            height: 8px;
+            background-color: #0066cc;
+            border-radius: 50%;
+            animation: pulse 1.5s infinite;
+            opacity: 0.5;
+        }
+
+        .dot:nth-child(2) {
+            animation-delay: 0.5s;
+        }
+
+        .dot:nth-child(3) {
+            animation-delay: 1s;
+        }
+
+        @keyframes pulse {
+
+            0%,
+            100% {
+                opacity: 0.5;
+                transform: scale(1);
+            }
+
+            50% {
+                opacity: 1;
+                transform: scale(1.2);
+            }
+        }
+
+        /* Add styles for toast notifications */
+        .toast {
+            position: fixed;
+            top: 20px;
+            left: 50%;
+            transform: translateX(-50%);
+            padding: 16px 24px;
+            border-radius: 4px;
+            font-size: 14px;
+            z-index: 1000;
+            display: none;
+            box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
+        }
+
+        .toast.error {
+            background-color: #f44336;
+            color: white;
+        }
+
+        .toast.warning {
+            background-color: #ffd700;
+            color: black;
+        }
+    </style>
+</head>
+
+<body>
+    <!-- Add toast element after body opening tag -->
+    <div id="error-toast" class="toast"></div>
+    <div class="container">
+        <div class="logo">
+            <h1>Hello Computer 💻</h1>
+            <h2 style="font-size: 1.2em; color: #666; margin-top: 10px;">Say 'Computer' before asking your question</h2>
+        </div>
+        <div class="chat-container">
+            <div class="chat-messages" id="chat-messages"></div>
+            <div class="typing-indicator" id="typing-indicator">
+                <div class="dots">
+                    <div class="dot"></div>
+                    <div class="dot"></div>
+                    <div class="dot"></div>
+                </div>
+            </div>
+        </div>
+        <div class="controls">
+            <button id="start-button">Start Conversation</button>
+        </div>
+    </div>
+    <audio id="audio-output"></audio>
+
+    <script>
+        let peerConnection;
+        let webrtc_id;
+        const startButton = document.getElementById('start-button');
+        const chatMessages = document.getElementById('chat-messages');
+
+        let audioLevel = 0;
+        let animationFrame;
+        let audioContext, analyser, audioSource;
+        let messages = [];
+        let eventSource;
+
+        function updateButtonState() {
+            const button = document.getElementById('start-button');
+            if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
+                button.innerHTML = `
+                    <div class="icon-with-spinner">
+                        <div class="spinner"></div>
+                        <span>Connecting...</span>
+                    </div>
+                `;
+            } else if (peerConnection && peerConnection.connectionState === 'connected') {
+                button.innerHTML = `
+                    <div class="pulse-container">
+                        <div class="pulse-circle"></div>
+                        <span>Stop Conversation</span>
+                    </div>
+                `;
+            } else {
+                button.innerHTML = 'Start Conversation';
+            }
+        }
+
+        function setupAudioVisualization(stream) {
+            audioContext = new (window.AudioContext || window.webkitAudioContext)();
+            analyser = audioContext.createAnalyser();
+            audioSource = audioContext.createMediaStreamSource(stream);
+            audioSource.connect(analyser);
+            analyser.fftSize = 64;
+            const dataArray = new Uint8Array(analyser.frequencyBinCount);
+
+            function updateAudioLevel() {
+                analyser.getByteFrequencyData(dataArray);
+                const average = Array.from(dataArray).reduce((a, b) => a + b, 0) / dataArray.length;
+                audioLevel = average / 255;
+
+                const pulseCircle = document.querySelector('.pulse-circle');
+                if (pulseCircle) {
+                    pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
+                }
+
+                animationFrame = requestAnimationFrame(updateAudioLevel);
+            }
+            updateAudioLevel();
+        }
+
+        function showError(message) {
+            const toast = document.getElementById('error-toast');
+            toast.textContent = message;
+            toast.className = 'toast error';
+            toast.style.display = 'block';
+
+            // Hide toast after 5 seconds
+            setTimeout(() => {
+                toast.style.display = 'none';
+            }, 5000);
+        }
+
+        function handleMessage(event) {
+            const eventJson = JSON.parse(event.data);
+            const typingIndicator = document.getElementById('typing-indicator');
+
+            if (eventJson.type === "error") {
+                showError(eventJson.message);
+            } else if (eventJson.type === "send_input") {
+                fetch('/input_hook', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify({
+                        webrtc_id: webrtc_id,
+                        chatbot: messages,
+                        state: messages
+                    })
+                });
+            } else if (eventJson.type === "log") {
+                if (eventJson.data === "pause_detected") {
+                    typingIndicator.style.display = 'block';
+                    chatMessages.scrollTop = chatMessages.scrollHeight;
+                } else if (eventJson.data === "response_starting") {
+                    typingIndicator.style.display = 'none';
+                }
+            }
+        }
+
+        async function setupWebRTC() {
+            const config = __RTC_CONFIGURATION__;
+            peerConnection = new RTCPeerConnection(config);
+
+            const timeoutId = setTimeout(() => {
+                const toast = document.getElementById('error-toast');
+                toast.textContent = "Connection is taking longer than usual. Are you on a VPN?";
+                toast.className = 'toast warning';
+                toast.style.display = 'block';
+
+                // Hide warning after 5 seconds
+                setTimeout(() => {
+                    toast.style.display = 'none';
+                }, 5000);
+            }, 5000);
+
+            try {
+                const stream = await navigator.mediaDevices.getUserMedia({
+                    audio: true
+                });
+
+                setupAudioVisualization(stream);
+
+                stream.getTracks().forEach(track => {
+                    peerConnection.addTrack(track, stream);
+                });
+
+                const dataChannel = peerConnection.createDataChannel('text');
+                dataChannel.onmessage = handleMessage;
+
+                const offer = await peerConnection.createOffer();
+                await peerConnection.setLocalDescription(offer);
+
+                await new Promise((resolve) => {
+                    if (peerConnection.iceGatheringState === "complete") {
+                        resolve();
+                    } else {
+                        const checkState = () => {
+                            if (peerConnection.iceGatheringState === "complete") {
+                                peerConnection.removeEventListener("icegatheringstatechange", checkState);
+                                resolve();
+                            }
+                        };
+                        peerConnection.addEventListener("icegatheringstatechange", checkState);
+                    }
+                });
+
+                peerConnection.addEventListener('connectionstatechange', () => {
+                    console.log('connectionstatechange', peerConnection.connectionState);
+                    if (peerConnection.connectionState === 'connected') {
+                        clearTimeout(timeoutId);
+                        const toast = document.getElementById('error-toast');
+                        toast.style.display = 'none';
+                    }
+                    updateButtonState();
+                });
+
+                webrtc_id = Math.random().toString(36).substring(7);
+
+                const response = await fetch('/webrtc/offer', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({
+                        sdp: peerConnection.localDescription.sdp,
+                        type: peerConnection.localDescription.type,
+                        webrtc_id: webrtc_id
+                    })
+                });
+
+                const serverResponse = await response.json();
+
+                if (serverResponse.status === 'failed') {
+                    showError(serverResponse.meta.error === 'concurrency_limit_reached'
+                        ? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
+                        : serverResponse.meta.error);
+                    stop();
+                    return;
+                }
+
+                await peerConnection.setRemoteDescription(serverResponse);
+
+                eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
+                eventSource.addEventListener("output", (event) => {
+                    const eventJson = JSON.parse(event.data);
+                    console.log(eventJson);
+                    messages.push(eventJson.message);
+                    addMessage(eventJson.message.role, eventJson.audio ?? eventJson.message.content);
+                });
+            } catch (err) {
+                clearTimeout(timeoutId);
+                console.error('Error setting up WebRTC:', err);
+                showError('Failed to establish connection. Please try again.');
+                stop();
+            }
+        }
+
+        function addMessage(role, content) {
+            const messageDiv = document.createElement('div');
+            messageDiv.classList.add('message', role);
+
+            if (role === 'user') {
+                // Create audio element for user messages
+                const audio = document.createElement('audio');
+                audio.controls = true;
+                audio.src = content;
+                messageDiv.appendChild(audio);
+            } else {
+                // Text content for assistant messages
+                messageDiv.textContent = content;
+            }
+
+            chatMessages.appendChild(messageDiv);
+            chatMessages.scrollTop = chatMessages.scrollHeight;
+        }
+
+        function stop() {
+            if (eventSource) {
+                eventSource.close();
+                eventSource = null;
+            }
+
+            if (animationFrame) {
+                cancelAnimationFrame(animationFrame);
+            }
+            if (audioContext) {
+                audioContext.close();
+                audioContext = null;
+                analyser = null;
+                audioSource = null;
+            }
+            if (peerConnection) {
+                if (peerConnection.getTransceivers) {
+                    peerConnection.getTransceivers().forEach(transceiver => {
+                        if (transceiver.stop) {
+                            transceiver.stop();
+                        }
+                    });
+                }
+
+                if (peerConnection.getSenders) {
+                    peerConnection.getSenders().forEach(sender => {
+                        if (sender.track && sender.track.stop) sender.track.stop();
+                    });
+                }
+                peerConnection.close();
+            }
+            updateButtonState();
+            audioLevel = 0;
+        }
+
+        startButton.addEventListener('click', () => {
+            if (!peerConnection || peerConnection.connectionState !== 'connected') {
+                setupWebRTC();
+            } else {
+                stop();
+            }
+        });
+    </script>
+</body>
+
+</html>
--- a/demo/hello_computer/requirements.txt
+++ b/demo/hello_computer/requirements.txt
@@ -0,0 +1,4 @@
+fastrtc[stopword]
+python-dotenv
+huggingface_hub>=0.29.0
+twilio
--- a/demo/llama_code_editor/README.md
+++ b/demo/llama_code_editor/README.md
@@ -0,0 +1,16 @@
+---
+title: Llama Code Editor
+emoji: 🦙
+colorFrom: indigo
+colorTo: pink
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Create interactive HTML web pages with your voice
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN,
+secret|SAMBANOVA_API_KEY, secret|GROQ_API_KEY]
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/llama_code_editor/app.py
+++ b/demo/llama_code_editor/app.py
@@ -0,0 +1,45 @@
+from fastapi import FastAPI
+from fastapi.responses import RedirectResponse
+from fastrtc import Stream
+from gradio.utils import get_space
+
+try:
+    from demo.llama_code_editor.handler import (
+        CodeHandler,
+    )
+    from demo.llama_code_editor.ui import demo as ui
+except (ImportError, ModuleNotFoundError):
+    from handler import CodeHandler
+    from ui import demo as ui
+
+
+stream = Stream(
+    handler=CodeHandler,
+    modality="audio",
+    mode="send-receive",
+    concurrency_limit=10 if get_space() else None,
+    time_limit=90 if get_space() else None,
+)
+
+stream.ui = ui
+
+app = FastAPI()
+
+
+@app.get("/")
+async def _():
+    url = "/ui" if not get_space() else "https://fastrtc-llama-code-editor.hf.space/ui/"
+    return RedirectResponse(url)
+
+
+if __name__ == "__main__":
+    import os
+
+    if (mode := os.getenv("MODE")) == "UI":
+        stream.ui.launch(server_port=7860, server_name="0.0.0.0")
+    elif mode == "PHONE":
+        stream.fastphone(host="0.0.0.0", port=7860)
+    else:
+        import uvicorn
+
+        uvicorn.run(app, host="0.0.0.0", port=7860)
--- a/demo/llama_code_editor/assets/sandbox.html
+++ b/demo/llama_code_editor/assets/sandbox.html
@@ -0,0 +1,37 @@
+<div style="
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  justify-content: center;
+  min-height: 400px;
+  background: linear-gradient(135deg, #f5f7fa 0%, #e4e8ec 100%);
+  border-radius: 8px;
+  border: 2px dashed #cbd5e1;
+  padding: 2rem;
+  text-align: center;
+  color: #64748b;
+  font-family: system-ui, -apple-system, sans-serif;
+">
+  <div style="
+    width: 80px;
+    height: 80px;
+    margin-bottom: 1.5rem;
+    border: 3px solid #cbd5e1;
+    border-radius: 12px;
+    position: relative;
+  ">
+    <div style="
+      position: absolute;
+      top: 50%;
+      left: 50%;
+      transform: translate(-50%, -50%);
+      font-size: 2rem;
+    ">📦</div>
+  </div>
+  <h2 style="
+    margin: 0 0 0.5rem 0;
+    font-size: 1.5rem;
+    font-weight: 600;
+    color: #475569;
+  ">No Application Created</h2>
+</div>
--- a/demo/llama_code_editor/assets/spinner.html
+++ b/demo/llama_code_editor/assets/spinner.html
@@ -0,0 +1,60 @@
+<div style="
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  justify-content: center;
+  min-height: 400px;
+  background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%);
+  border-radius: 8px;
+  padding: 2rem;
+  text-align: center;
+  font-family: system-ui, -apple-system, sans-serif;
+">
+  <!-- Spinner container -->
+  <div style="
+    position: relative;
+    width: 64px;
+    height: 64px;
+    margin-bottom: 1.5rem;
+  ">
+    <!-- Static ring -->
+    <div style="
+      position: absolute;
+      width: 100%;
+      height: 100%;
+      border: 4px solid #e2e8f0;
+      border-radius: 50%;
+    "></div>
+    <!-- Animated spinner -->
+    <div style="
+      position: absolute;
+      width: 100%;
+      height: 100%;
+      border: 4px solid transparent;
+      border-top-color: #3b82f6;
+      border-radius: 50%;
+      animation: spin 1s linear infinite;
+    "></div>
+  </div>
+
+  <!-- Text content -->
+  <h2 style="
+    margin: 0 0 0.5rem 0;
+    font-size: 1.25rem;
+    font-weight: 600;
+    color: #475569;
+  ">Generating your application...</h2>
+  
+  <p style="
+    margin: 0;
+    font-size: 0.875rem;
+    color: #64748b;
+  ">This may take a few moments</p>
+
+  <style>
+    @keyframes spin {
+      0% { transform: rotate(0deg); }
+      100% { transform: rotate(360deg); }
+    }
+  </style>
+</div>
--- a/demo/llama_code_editor/handler.py
+++ b/demo/llama_code_editor/handler.py
@@ -0,0 +1,73 @@
+import base64
+import os
+import re
+from pathlib import Path
+
+import numpy as np
+import openai
+from dotenv import load_dotenv
+from fastrtc import (
+    AdditionalOutputs,
+    ReplyOnPause,
+    audio_to_bytes,
+)
+from groq import Groq
+
+load_dotenv()
+
+groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+
+client = openai.OpenAI(
+    api_key=os.environ.get("SAMBANOVA_API_KEY"),
+    base_url="https://api.sambanova.ai/v1",
+)
+
+path = Path(__file__).parent / "assets"
+
+spinner_html = open(path / "spinner.html").read()
+
+
+system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response."
+user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}"
+
+
+def extract_html_content(text):
+    """
+    Extract content including HTML tags.
+    """
+    match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL)
+    return match.group(0) if match else None
+
+
+def display_in_sandbox(code):
+    encoded_html = base64.b64encode(code.encode("utf-8")).decode("utf-8")
+    data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
+    return f'<iframe src="{data_uri}" width="100%" height="600px"></iframe>'
+
+
+def generate(user_message: tuple[int, np.ndarray], history: list[dict], code: str):
+    yield AdditionalOutputs(history, spinner_html)
+
+    text = groq_client.audio.transcriptions.create(
+        file=("audio-file.mp3", audio_to_bytes(user_message)),
+        model="whisper-large-v3-turbo",
+        response_format="verbose_json",
+    ).text
+
+    user_msg_formatted = user_prompt.format(user_message=text, code=code)
+    history.append({"role": "user", "content": user_msg_formatted})
+
+    response = client.chat.completions.create(
+        model="Meta-Llama-3.1-70B-Instruct",
+        messages=history,  # type: ignore
+        temperature=0.1,
+        top_p=0.1,
+    )
+
+    output = response.choices[0].message.content
+    html_code = extract_html_content(output)
+    history.append({"role": "assistant", "content": output})
+    yield AdditionalOutputs(history, html_code)
+
+
+CodeHandler = ReplyOnPause(generate)  # type: ignore
--- a/demo/llama_code_editor/requirements.in
+++ b/demo/llama_code_editor/requirements.in
@@ -0,0 +1,5 @@
+fastrtc[vad]
+groq
+openai
+python-dotenv
+twilio
--- a/demo/llama_code_editor/requirements.txt
+++ b/demo/llama_code_editor/requirements.txt
@@ -0,0 +1,295 @@
+# This file was autogenerated by uv via the following command:
+#    uv pip compile demo/llama_code_editor/requirements.in -o demo/llama_code_editor/requirements.txt
+aiofiles==23.2.1
+    # via gradio
+aiohappyeyeballs==2.4.6
+    # via aiohttp
+aiohttp==3.11.12
+    # via
+    #   aiohttp-retry
+    #   twilio
+aiohttp-retry==2.9.1
+    # via twilio
+aioice==0.9.0
+    # via aiortc
+aiortc==1.10.1
+    # via fastrtc
+aiosignal==1.3.2
+    # via aiohttp
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.6.2.post1
+    # via
+    #   gradio
+    #   groq
+    #   httpx
+    #   openai
+    #   starlette
+attrs==25.1.0
+    # via aiohttp
+audioread==3.0.1
+    # via librosa
+av==12.3.0
+    # via aiortc
+certifi==2024.8.30
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+cffi==1.17.1
+    # via
+    #   aiortc
+    #   cryptography
+    #   pylibsrtp
+    #   soundfile
+charset-normalizer==3.4.0
+    # via requests
+click==8.1.7
+    # via
+    #   typer
+    #   uvicorn
+coloredlogs==15.0.1
+    # via onnxruntime
+cryptography==43.0.3
+    # via
+    #   aiortc
+    #   pyopenssl
+decorator==5.1.1
+    # via librosa
+distro==1.9.0
+    # via
+    #   groq
+    #   openai
+dnspython==2.7.0
+    # via aioice
+fastapi==0.115.5
+    # via gradio
+fastrtc==0.0.2.post4
+    # via -r demo/llama_code_editor/requirements.in
+ffmpy==0.4.0
+    # via gradio
+filelock==3.16.1
+    # via huggingface-hub
+flatbuffers==24.3.25
+    # via onnxruntime
+frozenlist==1.5.0
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2024.10.0
+    # via
+    #   gradio-client
+    #   huggingface-hub
+google-crc32c==1.6.0
+    # via aiortc
+gradio==5.16.0
+    # via fastrtc
+gradio-client==1.7.0
+    # via gradio
+groq==0.18.0
+    # via -r demo/llama_code_editor/requirements.in
+h11==0.14.0
+    # via
+    #   httpcore
+    #   uvicorn
+httpcore==1.0.7
+    # via httpx
+httpx==0.27.2
+    # via
+    #   gradio
+    #   gradio-client
+    #   groq
+    #   openai
+    #   safehttpx
+huggingface-hub==0.28.1
+    # via
+    #   gradio
+    #   gradio-client
+humanfriendly==10.0
+    # via coloredlogs
+idna==3.10
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+    #   yarl
+ifaddr==0.2.0
+    # via aioice
+jinja2==3.1.4
+    # via gradio
+jiter==0.7.1
+    # via openai
+joblib==1.4.2
+    # via
+    #   librosa
+    #   scikit-learn
+lazy-loader==0.4
+    # via librosa
+librosa==0.10.2.post1
+    # via fastrtc
+llvmlite==0.43.0
+    # via numba
+markdown-it-py==3.0.0
+    # via rich
+markupsafe==2.1.5
+    # via
+    #   gradio
+    #   jinja2
+mdurl==0.1.2
+    # via markdown-it-py
+mpmath==1.3.0
+    # via sympy
+msgpack==1.1.0
+    # via librosa
+multidict==6.1.0
+    # via
+    #   aiohttp
+    #   yarl
+numba==0.60.0
+    # via librosa
+numpy==2.0.2
+    # via
+    #   gradio
+    #   librosa
+    #   numba
+    #   onnxruntime
+    #   pandas
+    #   scikit-learn
+    #   scipy
+    #   soxr
+onnxruntime==1.20.1
+    # via fastrtc
+openai==1.54.4
+    # via -r demo/llama_code_editor/requirements.in
+orjson==3.10.11
+    # via gradio
+packaging==24.2
+    # via
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   lazy-loader
+    #   onnxruntime
+    #   pooch
+pandas==2.2.3
+    # via gradio
+pillow==11.0.0
+    # via gradio
+platformdirs==4.3.6
+    # via pooch
+pooch==1.8.2
+    # via librosa
+propcache==0.2.1
+    # via
+    #   aiohttp
+    #   yarl
+protobuf==5.28.3
+    # via onnxruntime
+pycparser==2.22
+    # via cffi
+pydantic==2.9.2
+    # via
+    #   fastapi
+    #   gradio
+    #   groq
+    #   openai
+pydantic-core==2.23.4
+    # via pydantic
+pydub==0.25.1
+    # via gradio
+pyee==12.1.1
+    # via aiortc
+pygments==2.18.0
+    # via rich
+pyjwt==2.10.1
+    # via twilio
+pylibsrtp==0.10.0
+    # via aiortc
+pyopenssl==24.2.1
+    # via aiortc
+python-dateutil==2.9.0.post0
+    # via pandas
+python-dotenv==1.0.1
+    # via -r demo/llama_code_editor/requirements.in
+python-multipart==0.0.20
+    # via gradio
+pytz==2024.2
+    # via pandas
+pyyaml==6.0.2
+    # via
+    #   gradio
+    #   huggingface-hub
+requests==2.32.3
+    # via
+    #   huggingface-hub
+    #   pooch
+    #   twilio
+rich==13.9.4
+    # via typer
+ruff==0.9.6
+    # via gradio
+safehttpx==0.1.6
+    # via gradio
+scikit-learn==1.5.2
+    # via librosa
+scipy==1.14.1
+    # via
+    #   librosa
+    #   scikit-learn
+semantic-version==2.10.0
+    # via gradio
+shellingham==1.5.4
+    # via typer
+six==1.16.0
+    # via python-dateutil
+sniffio==1.3.1
+    # via
+    #   anyio
+    #   groq
+    #   httpx
+    #   openai
+soundfile==0.12.1
+    # via librosa
+soxr==0.5.0.post1
+    # via librosa
+starlette==0.42.0
+    # via
+    #   fastapi
+    #   gradio
+sympy==1.13.3
+    # via onnxruntime
+threadpoolctl==3.5.0
+    # via scikit-learn
+tomlkit==0.12.0
+    # via gradio
+tqdm==4.67.0
+    # via
+    #   huggingface-hub
+    #   openai
+twilio==9.4.5
+    # via -r demo/llama_code_editor/requirements.in
+typer==0.13.1
+    # via gradio
+typing-extensions==4.12.2
+    # via
+    #   fastapi
+    #   gradio
+    #   gradio-client
+    #   groq
+    #   huggingface-hub
+    #   librosa
+    #   openai
+    #   pydantic
+    #   pydantic-core
+    #   pyee
+    #   typer
+tzdata==2024.2
+    # via pandas
+urllib3==2.2.3
+    # via requests
+uvicorn==0.32.0
+    # via gradio
+websockets==12.0
+    # via gradio-client
+yarl==1.18.3
+    # via aiohttp
--- a/demo/llama_code_editor/ui.py
+++ b/demo/llama_code_editor/ui.py
@@ -0,0 +1,75 @@
+from pathlib import Path
+
+import gradio as gr
+from dotenv import load_dotenv
+from fastrtc import WebRTC, get_twilio_turn_credentials
+from gradio.utils import get_space
+
+try:
+    from demo.llama_code_editor.handler import (
+        CodeHandler,
+        display_in_sandbox,
+        system_prompt,
+    )
+except (ImportError, ModuleNotFoundError):
+    from handler import CodeHandler, display_in_sandbox, system_prompt
+
+load_dotenv()
+
+path = Path(__file__).parent / "assets"
+
+with gr.Blocks(css=".code-component {max-height: 500px !important}") as demo:
+    history = gr.State([{"role": "system", "content": system_prompt}])
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.HTML(
+                """
+                <h1 style='text-align: center'>
+                Llama Code Editor
+                </h1>
+                <h2 style='text-align: center'>
+                Powered by SambaNova and Gradio-WebRTC ⚡️
+                </h2>
+                <p style='text-align: center'>
+                Create and edit single-file HTML applications with just your voice!
+                </p>
+                <p style='text-align: center'>
+                Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.
+                </p>
+                """
+            )
+            webrtc = WebRTC(
+                rtc_configuration=get_twilio_turn_credentials()
+                if get_space()
+                else None,
+                mode="send",
+                modality="audio",
+            )
+        with gr.Column(scale=10):
+            with gr.Tabs():
+                with gr.Tab("Sandbox"):
+                    sandbox = gr.HTML(value=open(path / "sandbox.html").read())
+                with gr.Tab("Code"):
+                    code = gr.Code(
+                        language="html",
+                        max_lines=50,
+                        interactive=False,
+                        elem_classes="code-component",
+                    )
+                with gr.Tab("Chat"):
+                    cb = gr.Chatbot(type="messages")
+
+    webrtc.stream(
+        CodeHandler,
+        inputs=[webrtc, history, code],
+        outputs=[webrtc],
+        time_limit=90 if get_space() else None,
+        concurrency_limit=10 if get_space() else None,
+    )
+    webrtc.on_additional_outputs(
+        lambda history, code: (history, code, history), outputs=[history, code, cb]
+    )
+    code.change(display_in_sandbox, code, sandbox, queue=False)
+
+if __name__ == "__main__":
+    demo.launch()
--- a/demo/llm_voice_chat/README.md
+++ b/demo/llm_voice_chat/README.md
@@ -0,0 +1,15 @@
+---
+title: LLM Voice Chat
+emoji: 💻
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Talk to an LLM with ElevenLabs
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GROQ_API_KEY, secret|ELEVENLABS_API_KEY]
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/llm_voice_chat/README_gradio.md
+++ b/demo/llm_voice_chat/README_gradio.md
@@ -0,0 +1,15 @@
+---
+title: LLM Voice Chat (Gradio)
+emoji: 💻
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: LLM Voice by ElevenLabs (Gradio)
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GROQ_API_KEY, secret|ELEVENLABS_API_KEY]
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/llm_voice_chat/app.py
+++ b/demo/llm_voice_chat/app.py
@@ -0,0 +1,97 @@
+import os
+import time
+
+import gradio as gr
+import numpy as np
+from dotenv import load_dotenv
+from elevenlabs import ElevenLabs
+from fastapi import FastAPI
+from fastrtc import (
+    AdditionalOutputs,
+    ReplyOnPause,
+    Stream,
+    get_stt_model,
+    get_twilio_turn_credentials,
+)
+from gradio.utils import get_space
+from groq import Groq
+from numpy.typing import NDArray
+
+load_dotenv()
+groq_client = Groq()
+tts_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
+stt_model = get_stt_model()
+
+
+# See "Talk to Claude" in Cookbook for an example of how to keep
+# track of the chat history.
+def response(
+    audio: tuple[int, NDArray[np.int16 | np.float32]],
+    chatbot: list[dict] | None = None,
+):
+    chatbot = chatbot or []
+    messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
+    start = time.time()
+    text = stt_model.stt(audio)
+    print("transcription", time.time() - start)
+    print("prompt", text)
+    chatbot.append({"role": "user", "content": text})
+    yield AdditionalOutputs(chatbot)
+    messages.append({"role": "user", "content": text})
+    response_text = (
+        groq_client.chat.completions.create(
+            model="llama-3.1-8b-instant",
+            max_tokens=200,
+            messages=messages,  # type: ignore
+        )
+        .choices[0]
+        .message.content
+    )
+
+    chatbot.append({"role": "assistant", "content": response_text})
+
+    for i, chunk in enumerate(
+        tts_client.text_to_speech.convert_as_stream(
+            text=response_text,  # type: ignore
+            voice_id="JBFqnCBsd6RMkjVDRZzb",
+            model_id="eleven_multilingual_v2",
+            output_format="pcm_24000",
+        )
+    ):
+        if i == 0:
+            yield AdditionalOutputs(chatbot)
+        audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
+        yield (24000, audio_array)
+
+
+chatbot = gr.Chatbot(type="messages")
+stream = Stream(
+    modality="audio",
+    mode="send-receive",
+    handler=ReplyOnPause(response, input_sample_rate=16000),
+    additional_outputs_handler=lambda a, b: b,
+    additional_inputs=[chatbot],
+    additional_outputs=[chatbot],
+    rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
+    concurrency_limit=5 if get_space() else None,
+    time_limit=90 if get_space() else None,
+    ui_args={"title": "LLM Voice Chat (Powered by Groq, ElevenLabs, and WebRTC ⚡️)"},
+)
+
+# Mount the STREAM UI to the FastAPI app
+# Because I don't want to build the UI manually
+app = FastAPI()
+app = gr.mount_gradio_app(app, stream.ui, path="/")
+
+
+if __name__ == "__main__":
+    import os
+
+    os.environ["GRADIO_SSR_MODE"] = "false"
+
+    if (mode := os.getenv("MODE")) == "UI":
+        stream.ui.launch(server_port=7860)
+    elif mode == "PHONE":
+        stream.fastphone(host="0.0.0.0", port=7860)
+    else:
+        stream.ui.launch(server_port=7860)
--- a/demo/llm_voice_chat/requirements.txt
+++ b/demo/llm_voice_chat/requirements.txt
@@ -0,0 +1,6 @@
+fastrtc[stopword]
+python-dotenv
+openai
+twilio
+groq
+elevenlabs
--- a/demo/moonshine_live/README.md
+++ b/demo/moonshine_live/README.md
@@ -0,0 +1,16 @@
+---
+title: Moonshine Live Transcription
+emoji: 🌕
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.17.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Real-time captions with Moonshine ONNX
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN]
+models: [onnx-community/moonshine-base-ONNX, UsefulSensors/moonshine-base]
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/moonshine_live/app.py
+++ b/demo/moonshine_live/app.py
@@ -0,0 +1,73 @@
+from functools import lru_cache
+from typing import Generator, Literal
+
+import gradio as gr
+import numpy as np
+from dotenv import load_dotenv
+from fastrtc import (
+    AdditionalOutputs,
+    ReplyOnPause,
+    Stream,
+    audio_to_float32,
+    get_twilio_turn_credentials,
+)
+from moonshine_onnx import MoonshineOnnxModel, load_tokenizer
+from numpy.typing import NDArray
+
+load_dotenv()
+
+
+@lru_cache(maxsize=None)
+def load_moonshine(
+    model_name: Literal["moonshine/base", "moonshine/tiny"],
+) -> MoonshineOnnxModel:
+    return MoonshineOnnxModel(model_name=model_name)
+
+
+tokenizer = load_tokenizer()
+
+
+def stt(
+    audio: tuple[int, NDArray[np.int16 | np.float32]],
+    model_name: Literal["moonshine/base", "moonshine/tiny"],
+    captions: str,
+) -> Generator[AdditionalOutputs, None, None]:
+    moonshine = load_moonshine(model_name)
+    sr, audio_np = audio  # type: ignore
+    if audio_np.dtype == np.int16:
+        audio_np = audio_to_float32(audio)
+    if audio_np.ndim == 1:
+        audio_np = audio_np.reshape(1, -1)
+    tokens = moonshine.generate(audio_np)
+    yield AdditionalOutputs(
+        (captions + "\n" + tokenizer.decode_batch(tokens)[0]).strip()
+    )
+
+
+captions = gr.Textbox(label="Captions")
+stream = Stream(
+    ReplyOnPause(stt, input_sample_rate=16000),
+    modality="audio",
+    mode="send",
+    ui_args={
+        "title": "Live Captions by Moonshine",
+        "icon": "default-favicon.ico",
+        "icon_button_color": "#5c5c5c",
+        "pulse_color": "#a7c6fc",
+        "icon_radius": 0,
+    },
+    rtc_configuration=get_twilio_turn_credentials(),
+    additional_inputs=[
+        gr.Radio(
+            choices=["moonshine/base", "moonshine/tiny"],
+            value="moonshine/base",
+            label="Model",
+        ),
+        captions,
+    ],
+    additional_outputs=[captions],
+    additional_outputs_handler=lambda prev, current: (prev + "\n" + current).strip(),
+)
+
+if __name__ == "__main__":
+    stream.ui.launch()
--- a/demo/moonshine_live/default-favicon.ico
+++ b/demo/moonshine_live/default-favicon.ico
--- a/demo/moonshine_live/requirements.txt
+++ b/demo/moonshine_live/requirements.txt
@@ -0,0 +1,3 @@
+fastrtc[vad]
+useful-moonshine-onnx@git+https://git@github.com/usefulsensors/moonshine.git#subdirectory=moonshine-onnx
+twilio
--- a/demo/nextjs_voice_chat/README.md
+++ b/demo/nextjs_voice_chat/README.md
@@ -0,0 +1,74 @@
+# FastRTC POC
+A simple POC for a fast real-time voice chat application using FastAPI and FastRTC by [rohanprichard](https://github.com/rohanprichard). I wanted to make one as an example with more production-ready languages, rather than just Gradio.
+
+## Setup
+1. Set your API keys in an `.env` file based on the `.env.example` file
+2. Create a virtual environment and install the dependencies
+    ```bash
+    python3 -m venv env
+    source env/bin/activate
+    pip install -r requirements.txt
+    ```
+
+3. Run the server
+    ```bash
+    ./run.sh
+    ```
+4. Navigate into the frontend directory in another terminal
+    ```bash
+    cd frontend/fastrtc-demo
+    ```
+5. Run the frontend
+    ```bash
+    npm install
+    npm run dev
+    ```
+6. Go to the URL and click the microphone icon to start chatting!
+
+7. Reset chats by clicking the trash button on the bottom right
+
+## Notes
+You can choose to not install the requirements for TTS and STT by removing the `[tts, stt]` from the specifier in the `requirements.txt` file.
+
+- The STT is currently using the ElevenLabs API.
+- The LLM is currently using the OpenAI API.
+- The TTS is currently using the ElevenLabs API.
+- The VAD is currently using the Silero VAD model.
+- You may need to install ffmpeg if you get errors in STT
+
+The prompt can be changed in the `backend/server.py` file and modified as you like.
+
+### Audio Parameters 
+
+#### AlgoOptions
+
+- **audio_chunk_duration**: Length of audio chunks in seconds. Smaller values allow for faster processing but may be less accurate.
+- **started_talking_threshold**: If a chunk has more than this many seconds of speech, the system considers that the user has started talking.
+- **speech_threshold**: After the user has started speaking, if a chunk has less than this many seconds of speech, the system considers that the user has paused.
+
+#### SileroVadOptions
+
+- **threshold**: Speech probability threshold (0.0-1.0). Values above this are considered speech. Higher values are more strict.
+- **min_speech_duration_ms**: Speech segments shorter than this (in milliseconds) are filtered out.
+- **min_silence_duration_ms**: The system waits for this duration of silence (in milliseconds) before considering speech to be finished.
+- **speech_pad_ms**: Padding added to both ends of detected speech segments to prevent cutting off words.
+- **max_speech_duration_s**: Maximum allowed duration for a speech segment in seconds. Prevents indefinite listening.
+
+### Tuning Recommendations
+
+- If the AI interrupts you too early:
+  - Increase `min_silence_duration_ms`
+  - Increase `speech_threshold`
+  - Increase `speech_pad_ms`
+
+- If the AI is slow to respond after you finish speaking:
+  - Decrease `min_silence_duration_ms`
+  - Decrease `speech_threshold`
+
+- If the system fails to detect some speech:
+  - Lower the `threshold` value
+  - Decrease `started_talking_threshold`
+
+
+## Credits:
+Credit for the UI components goes to Shadcn, Aceternity UI and Kokonut UI.
--- a/demo/nextjs_voice_chat/backend/env.py
+++ b/demo/nextjs_voice_chat/backend/env.py
@@ -0,0 +1,7 @@
+from dotenv import load_dotenv
+import os
+
+load_dotenv()
+
+LLM_API_KEY = os.getenv("LLM_API_KEY")
+ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
--- a/demo/nextjs_voice_chat/backend/server.py
+++ b/demo/nextjs_voice_chat/backend/server.py
@@ -0,0 +1,129 @@
+import fastapi
+from fastrtc import ReplyOnPause, Stream, AlgoOptions, SileroVadOptions
+from fastrtc.utils import audio_to_bytes
+from openai import OpenAI
+import logging
+import time
+from fastapi.middleware.cors import CORSMiddleware
+from elevenlabs import VoiceSettings, stream
+from elevenlabs.client import ElevenLabs
+import numpy as np
+
+from .env import LLM_API_KEY, ELEVENLABS_API_KEY
+
+
+sys_prompt = """
+You are a helpful assistant. You are witty, engaging and fun. You love being interactive with the user. 
+You also can add minimalistic utterances like 'uh-huh' or 'mm-hmm' to the conversation to make it more natural. However, only vocalization are allowed, no actions or other non-vocal sounds.
+Begin a conversation with a self-deprecating joke like 'I'm not sure if I'm ready for this...' or 'I bet you already regret clicking that button...'
+"""
+
+messages = [{"role": "system", "content": sys_prompt}]
+
+openai_client = OpenAI(api_key=LLM_API_KEY)
+
+elevenlabs_client = ElevenLabs(api_key=ELEVENLABS_API_KEY)
+
+logging.basicConfig(level=logging.INFO)
+
+
+def echo(audio):
+    stt_time = time.time()
+
+    logging.info("Performing STT")
+
+    transcription = elevenlabs_client.speech_to_text.convert(
+        file=audio_to_bytes(audio),
+        model_id="scribe_v1",
+        tag_audio_events=False,
+        language_code="eng",
+        diarize=False,
+    )
+    prompt = transcription.text
+    if prompt == "":
+        logging.info("STT returned empty string")
+        return
+    logging.info(f"STT response: {prompt}")
+
+    messages.append({"role": "user", "content": prompt})
+
+    logging.info(f"STT took {time.time() - stt_time} seconds")
+
+    llm_time = time.time()
+
+    def text_stream():
+        global full_response
+        full_response = ""
+
+        response = openai_client.chat.completions.create(
+            model="gpt-3.5-turbo", messages=messages, max_tokens=200, stream=True
+        )
+
+        for chunk in response:
+            if chunk.choices[0].finish_reason == "stop":
+                break
+            if chunk.choices[0].delta.content:
+                full_response += chunk.choices[0].delta.content
+                yield chunk.choices[0].delta.content
+
+    audio_stream = elevenlabs_client.generate(
+        text=text_stream(),
+        voice="Rachel",  # Cassidy is also really good
+        voice_settings=VoiceSettings(
+            similarity_boost=0.9, stability=0.6, style=0.4, speed=1
+        ),
+        model="eleven_multilingual_v2",
+        output_format="pcm_24000",
+        stream=True,
+    )
+
+    for audio_chunk in audio_stream:
+        audio_array = (
+            np.frombuffer(audio_chunk, dtype=np.int16).astype(np.float32) / 32768.0
+        )
+        yield (24000, audio_array)
+
+    messages.append({"role": "assistant", "content": full_response + " "})
+    logging.info(f"LLM response: {full_response}")
+    logging.info(f"LLM took {time.time() - llm_time} seconds")
+
+
+stream = Stream(
+    ReplyOnPause(
+        echo,
+        algo_options=AlgoOptions(
+            audio_chunk_duration=0.5,
+            started_talking_threshold=0.1,
+            speech_threshold=0.03,
+        ),
+        model_options=SileroVadOptions(
+            threshold=0.75,
+            min_speech_duration_ms=250,
+            min_silence_duration_ms=1500,
+            speech_pad_ms=400,
+            max_speech_duration_s=15,
+        ),
+    ),
+    modality="audio",
+    mode="send-receive",
+)
+
+app = fastapi.FastAPI()
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+stream.mount(app)
+
+
+@app.get("/reset")
+async def reset():
+    global messages
+    logging.info("Resetting chat")
+    messages = [{"role": "system", "content": sys_prompt}]
+    return {"status": "success"}
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/.gitignore
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/.gitignore
@@ -0,0 +1,41 @@
+# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
+
+# dependencies
+/node_modules
+/.pnp
+.pnp.*
+.yarn/*
+!.yarn/patches
+!.yarn/plugins
+!.yarn/releases
+!.yarn/versions
+
+# testing
+/coverage
+
+# next.js
+/.next/
+/out/
+
+# production
+/build
+
+# misc
+.DS_Store
+*.pem
+
+# debug
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+.pnpm-debug.log*
+
+# env files (can opt-in for committing if needed)
+.env*
+
+# vercel
+.vercel
+
+# typescript
+*.tsbuildinfo
+next-env.d.ts
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/README.md
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/README.md
@@ -0,0 +1,36 @@
+This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
+
+## Getting Started
+
+First, run the development server:
+
+```bash
+npm run dev
+# or
+yarn dev
+# or
+pnpm dev
+# or
+bun dev
+```
+
+Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
+
+You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
+
+This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
+
+## Learn More
+
+To learn more about Next.js, take a look at the following resources:
+
+- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
+- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
+
+You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
+
+## Deploy on Vercel
+
+The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
+
+Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/app/favicon.ico
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/app/favicon.ico
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/app/globals.css
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/app/globals.css
@@ -0,0 +1,130 @@
+@import "tailwindcss";
+
+@plugin "tailwindcss-animate";
+
+@custom-variant dark (&:is(.dark *));
+
+@theme inline {
+  --color-background: var(--background);
+  --color-foreground: var(--foreground);
+  --font-sans: var(--font-geist-sans);
+  --font-mono: var(--font-geist-mono);
+  --color-sidebar-ring: var(--sidebar-ring);
+  --color-sidebar-border: var(--sidebar-border);
+  --color-sidebar-accent-foreground: var(--sidebar-accent-foreground);
+  --color-sidebar-accent: var(--sidebar-accent);
+  --color-sidebar-primary-foreground: var(--sidebar-primary-foreground);
+  --color-sidebar-primary: var(--sidebar-primary);
+  --color-sidebar-foreground: var(--sidebar-foreground);
+  --color-sidebar: var(--sidebar);
+  --color-chart-5: var(--chart-5);
+  --color-chart-4: var(--chart-4);
+  --color-chart-3: var(--chart-3);
+  --color-chart-2: var(--chart-2);
+  --color-chart-1: var(--chart-1);
+  --color-ring: var(--ring);
+  --color-input: var(--input);
+  --color-border: var(--border);
+  --color-destructive-foreground: var(--destructive-foreground);
+  --color-destructive: var(--destructive);
+  --color-accent-foreground: var(--accent-foreground);
+  --color-accent: var(--accent);
+  --color-muted-foreground: var(--muted-foreground);
+  --color-muted: var(--muted);
+  --color-secondary-foreground: var(--secondary-foreground);
+  --color-secondary: var(--secondary);
+  --color-primary-foreground: var(--primary-foreground);
+  --color-primary: var(--primary);
+  --color-popover-foreground: var(--popover-foreground);
+  --color-popover: var(--popover);
+  --color-card-foreground: var(--card-foreground);
+  --color-card: var(--card);
+  --radius-sm: calc(var(--radius) - 4px);
+  --radius-md: calc(var(--radius) - 2px);
+  --radius-lg: var(--radius);
+  --radius-xl: calc(var(--radius) + 4px);
+}
+
+:root {
+  --background: oklch(1 0 0);
+  --foreground: oklch(0.129 0.042 264.695);
+  --card: oklch(1 0 0);
+  --card-foreground: oklch(0.129 0.042 264.695);
+  --popover: oklch(1 0 0);
+  --popover-foreground: oklch(0.129 0.042 264.695);
+  --primary: oklch(0.208 0.042 265.755);
+  --primary-foreground: oklch(0.984 0.003 247.858);
+  --secondary: oklch(0.968 0.007 247.896);
+  --secondary-foreground: oklch(0.208 0.042 265.755);
+  --muted: oklch(0.968 0.007 247.896);
+  --muted-foreground: oklch(0.554 0.046 257.417);
+  --accent: oklch(0.968 0.007 247.896);
+  --accent-foreground: oklch(0.208 0.042 265.755);
+  --destructive: oklch(0.577 0.245 27.325);
+  --destructive-foreground: oklch(0.577 0.245 27.325);
+  --border: oklch(0.929 0.013 255.508);
+  --input: oklch(0.929 0.013 255.508);
+  --ring: oklch(0.704 0.04 256.788);
+  --chart-1: oklch(0.646 0.222 41.116);
+  --chart-2: oklch(0.6 0.118 184.704);
+  --chart-3: oklch(0.398 0.07 227.392);
+  --chart-4: oklch(0.828 0.189 84.429);
+  --chart-5: oklch(0.769 0.188 70.08);
+  --radius: 0.625rem;
+  --sidebar: oklch(0.984 0.003 247.858);
+  --sidebar-foreground: oklch(0.129 0.042 264.695);
+  --sidebar-primary: oklch(0.208 0.042 265.755);
+  --sidebar-primary-foreground: oklch(0.984 0.003 247.858);
+  --sidebar-accent: oklch(0.968 0.007 247.896);
+  --sidebar-accent-foreground: oklch(0.208 0.042 265.755);
+  --sidebar-border: oklch(0.929 0.013 255.508);
+  --sidebar-ring: oklch(0.704 0.04 256.788);
+}
+
+.dark {
+  --background: oklch(0.129 0.042 264.695);
+  --foreground: oklch(0.984 0.003 247.858);
+  --card: oklch(0.129 0.042 264.695);
+  --card-foreground: oklch(0.984 0.003 247.858);
+  --popover: oklch(0.129 0.042 264.695);
+  --popover-foreground: oklch(0.984 0.003 247.858);
+  --primary: oklch(0.984 0.003 247.858);
+  --primary-foreground: oklch(0.208 0.042 265.755);
+  --secondary: oklch(0.279 0.041 260.031);
+  --secondary-foreground: oklch(0.984 0.003 247.858);
+  --muted: oklch(0.279 0.041 260.031);
+  --muted-foreground: oklch(0.704 0.04 256.788);
+  --accent: oklch(0.279 0.041 260.031);
+  --accent-foreground: oklch(0.984 0.003 247.858);
+  --destructive: oklch(0.396 0.141 25.723);
+  --destructive-foreground: oklch(0.637 0.237 25.331);
+  --border: oklch(0.279 0.041 260.031);
+  --input: oklch(0.279 0.041 260.031);
+  --ring: oklch(0.446 0.043 257.281);
+  --chart-1: oklch(0.488 0.243 264.376);
+  --chart-2: oklch(0.696 0.17 162.48);
+  --chart-3: oklch(0.769 0.188 70.08);
+  --chart-4: oklch(0.627 0.265 303.9);
+  --chart-5: oklch(0.645 0.246 16.439);
+  --sidebar: oklch(0.208 0.042 265.755);
+  --sidebar-foreground: oklch(0.984 0.003 247.858);
+  --sidebar-primary: oklch(0.488 0.243 264.376);
+  --sidebar-primary-foreground: oklch(0.984 0.003 247.858);
+  --sidebar-accent: oklch(0.279 0.041 260.031);
+  --sidebar-accent-foreground: oklch(0.984 0.003 247.858);
+  --sidebar-border: oklch(0.279 0.041 260.031);
+  --sidebar-ring: oklch(0.446 0.043 257.281);
+}
+
+@layer base {
+  * {
+    @apply border-border outline-ring/50;
+  }
+  body {
+    @apply bg-background text-foreground;
+  }
+}
+
+.no-transitions * {
+  transition: none !important;
+}
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/app/layout.tsx
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/app/layout.tsx
@@ -0,0 +1,44 @@
+import type { Metadata } from "next";
+import { Geist, Geist_Mono } from "next/font/google";
+import "./globals.css";
+import { ThemeProvider } from "@/components/theme-provider";
+import { ThemeTransition } from "@/components/ui/theme-transition";
+
+const geistSans = Geist({
+  variable: "--font-geist-sans",
+  subsets: ["latin"],
+});
+
+const geistMono = Geist_Mono({
+  variable: "--font-geist-mono",
+  subsets: ["latin"],
+});
+
+export const metadata: Metadata = {
+  title: "FastRTC Demo",
+  description: "Interactive WebRTC demo with audio visualization",
+};
+
+export default function RootLayout({
+  children,
+}: Readonly<{
+  children: React.ReactNode;
+}>) {
+  return (
+    <html lang="en" suppressHydrationWarning>
+      <body
+        className={`${geistSans.variable} ${geistMono.variable} antialiased`}
+      >
+        <ThemeProvider
+          attribute="class"
+          defaultTheme="dark"
+          enableSystem
+          disableTransitionOnChange
+        >
+          {children}
+          <ThemeTransition />
+        </ThemeProvider>
+      </body>
+    </html>
+  );
+}
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/app/page.tsx
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/app/page.tsx
@@ -0,0 +1,16 @@
+import { BackgroundCircleProvider } from "@/components/background-circle-provider";
+import { ThemeToggle } from "@/components/ui/theme-toggle";
+import { ResetChat } from "@/components/ui/reset-chat";
+export default function Home() {
+  return (
+    <div className="flex flex-col items-center justify-center h-screen">
+      <BackgroundCircleProvider />
+      <div className="absolute top-4 right-4 z-10">
+        <ThemeToggle />
+      </div>
+      <div className="absolute bottom-4 right-4 z-10">
+        <ResetChat />
+      </div>
+    </div>
+  );
+}
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/components.json
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/components.json
@@ -0,0 +1,21 @@
+{
+  "$schema": "https://ui.shadcn.com/schema.json",
+  "style": "new-york",
+  "rsc": true,
+  "tsx": true,
+  "tailwind": {
+    "config": "",
+    "css": "app/globals.css",
+    "baseColor": "slate",
+    "cssVariables": true,
+    "prefix": ""
+  },
+  "aliases": {
+    "components": "@/components",
+    "utils": "@/lib/utils",
+    "ui": "@/components/ui",
+    "lib": "@/lib",
+    "hooks": "@/hooks"
+  },
+  "iconLibrary": "lucide"
+}
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/components/background-circle-provider.tsx
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/components/background-circle-provider.tsx
@@ -0,0 +1,123 @@
+"use client"
+
+import { useState, useEffect, useRef, useCallback } from "react";
+import { BackgroundCircles } from "@/components/ui/background-circles";
+import { AIVoiceInput } from "@/components/ui/ai-voice-input";
+import { WebRTCClient } from "@/lib/webrtc-client";
+
+export function BackgroundCircleProvider() {
+    const [currentVariant, setCurrentVariant] = 
+        useState<keyof typeof COLOR_VARIANTS>("octonary");
+    const [isConnected, setIsConnected] = useState(false);
+    const [webrtcClient, setWebrtcClient] = useState<WebRTCClient | null>(null);
+    const [audioLevel, setAudioLevel] = useState(0);
+    const audioRef = useRef<HTMLAudioElement>(null);
+
+    // Memoize callbacks to prevent recreation on each render
+    const handleConnected = useCallback(() => setIsConnected(true), []);
+    const handleDisconnected = useCallback(() => setIsConnected(false), []);
+    
+    const handleAudioStream = useCallback((stream: MediaStream) => {
+        if (audioRef.current) {
+            audioRef.current.srcObject = stream;
+        }
+    }, []);
+    
+    const handleAudioLevel = useCallback((level: number) => {
+        // Apply some smoothing to the audio level
+        setAudioLevel(prev => prev * 0.7 + level * 0.3);
+    }, []);
+
+    // Get all available variants
+    const variants = Object.keys(
+        COLOR_VARIANTS
+    ) as (keyof typeof COLOR_VARIANTS)[];
+
+    // Function to change to the next color variant
+    const changeVariant = () => {
+        const currentIndex = variants.indexOf(currentVariant);
+        const nextVariant = variants[(currentIndex + 1) % variants.length];
+        setCurrentVariant(nextVariant);
+    };
+
+    useEffect(() => {
+        // Initialize WebRTC client with memoized callbacks
+        const client = new WebRTCClient({
+            onConnected: handleConnected,
+            onDisconnected: handleDisconnected,
+            onAudioStream: handleAudioStream,
+            onAudioLevel: handleAudioLevel
+        });
+        setWebrtcClient(client);
+
+        return () => {
+            client.disconnect();
+        };
+    }, [handleConnected, handleDisconnected, handleAudioStream, handleAudioLevel]);
+
+    const handleStart = () => {
+        webrtcClient?.connect();
+    };
+
+    const handleStop = () => {
+        webrtcClient?.disconnect();
+    };
+
+    return (
+        <div 
+            className="relative w-full h-full"
+            onClick={changeVariant} // Add click handler to change color
+        >
+            <BackgroundCircles 
+                variant={currentVariant} 
+                audioLevel={audioLevel}
+                isActive={isConnected}
+            />
+            <div className="absolute inset-0 flex items-center justify-center">
+                <AIVoiceInput 
+                    onStart={handleStart}
+                    onStop={handleStop}
+                    isConnected={isConnected}
+                />
+            </div>
+            <audio ref={audioRef} autoPlay hidden />
+        </div>
+    );
+}
+
+export default { BackgroundCircleProvider }
+
+const COLOR_VARIANTS = {
+    primary: {
+        border: [
+            "border-emerald-500/60",
+            "border-cyan-400/50",
+            "border-slate-600/30",
+        ],
+        gradient: "from-emerald-500/30",
+    },
+    secondary: {
+        border: [
+            "border-violet-500/60",
+            "border-fuchsia-400/50",
+            "border-slate-600/30",
+        ],
+        gradient: "from-violet-500/30",
+    },
+    senary: {
+        border: [
+            "border-blue-500/60",
+            "border-sky-400/50",
+            "border-slate-600/30",
+        ],
+        gradient: "from-blue-500/30",
+    }, // blue
+    octonary: {
+        border: [
+            "border-red-500/60",
+            "border-rose-400/50",
+            "border-slate-600/30",
+        ],
+        gradient: "from-red-500/30",
+    },
+} as const;
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/components/theme-provider.tsx
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/components/theme-provider.tsx
@@ -0,0 +1,101 @@
+"use client";
+
+import { createContext, useContext, useEffect, useState } from "react";
+
+type Theme = "light" | "dark" | "system";
+
+type ThemeProviderProps = {
+  children: React.ReactNode;
+  defaultTheme?: Theme;
+  storageKey?: string;
+  attribute?: string;
+  enableSystem?: boolean;
+  disableTransitionOnChange?: boolean;
+};
+
+type ThemeProviderState = {
+  theme: Theme;
+  setTheme: (theme: Theme) => void;
+};
+
+const initialState: ThemeProviderState = {
+  theme: "system",
+  setTheme: () => null,
+};
+
+const ThemeProviderContext = createContext<ThemeProviderState>(initialState);
+
+export function ThemeProvider({
+  children,
+  defaultTheme = "system",
+  storageKey = "theme",
+  attribute = "class",
+  enableSystem = true,
+  disableTransitionOnChange = false,
+  ...props
+}: ThemeProviderProps) {
+  const [theme, setTheme] = useState<Theme>(defaultTheme);
+
+  useEffect(() => {
+    const savedTheme = localStorage.getItem(storageKey) as Theme | null;
+    
+    if (savedTheme) {
+      setTheme(savedTheme);
+    } else if (defaultTheme === "system" && enableSystem) {
+      const systemTheme = window.matchMedia("(prefers-color-scheme: dark)").matches
+        ? "dark"
+        : "light";
+      setTheme(systemTheme);
+    }
+  }, [defaultTheme, storageKey, enableSystem]);
+
+  useEffect(() => {
+    const root = window.document.documentElement;
+    
+    if (disableTransitionOnChange) {
+      root.classList.add("no-transitions");
+      
+      // Force a reflow
+      window.getComputedStyle(root).getPropertyValue("opacity");
+      
+      setTimeout(() => {
+        root.classList.remove("no-transitions");
+      }, 0);
+    }
+    
+    root.classList.remove("light", "dark");
+    
+    if (theme === "system" && enableSystem) {
+      const systemTheme = window.matchMedia("(prefers-color-scheme: dark)").matches
+        ? "dark"
+        : "light";
+      root.classList.add(systemTheme);
+    } else {
+      root.classList.add(theme);
+    }
+
+    localStorage.setItem(storageKey, theme);
+  }, [theme, storageKey, enableSystem, disableTransitionOnChange]);
+
+  const value = {
+    theme,
+    setTheme: (theme: Theme) => {
+      setTheme(theme);
+    },
+  };
+
+  return (
+    <ThemeProviderContext.Provider {...props} value={value}>
+      {children}
+    </ThemeProviderContext.Provider>
+  );
+}
+
+export const useTheme = () => {
+  const context = useContext(ThemeProviderContext);
+
+  if (context === undefined)
+    throw new Error("useTheme must be used within a ThemeProvider");
+
+  return context;
+};
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/components/ui/ai-voice-input.tsx
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/components/ui/ai-voice-input.tsx
@@ -0,0 +1,114 @@
+"use client";
+
+import { Mic, Square } from "lucide-react";
+import { useState, useEffect } from "react";
+import { cn } from "@/lib/utils";
+
+interface AIVoiceInputProps {
+  onStart?: () => void;
+  onStop?: (duration: number) => void;
+  isConnected?: boolean;
+  className?: string;
+}
+
+export function AIVoiceInput({
+  onStart,
+  onStop,
+  isConnected = false,
+  className
+}: AIVoiceInputProps) {
+  const [active, setActive] = useState(false);
+  const [time, setTime] = useState(0);
+  const [isClient, setIsClient] = useState(false);
+  const [status, setStatus] = useState<'disconnected' | 'connecting' | 'connected'>('disconnected');
+
+  useEffect(() => {
+    setIsClient(true);
+  }, []);
+
+  useEffect(() => {
+    let intervalId: NodeJS.Timeout;
+
+    if (active) {
+      intervalId = setInterval(() => {
+        setTime((t) => t + 1);
+      }, 1000);
+    } else {
+      setTime(0);
+    }
+
+    return () => clearInterval(intervalId);
+  }, [active]);
+
+  useEffect(() => {
+    if (isConnected) {
+      setStatus('connected');
+      setActive(true);
+    } else {
+      setStatus('disconnected');
+      setActive(false);
+    }
+  }, [isConnected]);
+
+  const formatTime = (seconds: number) => {
+    const mins = Math.floor(seconds / 60);
+    const secs = seconds % 60;
+    return `${mins.toString().padStart(2, "0")}:${secs.toString().padStart(2, "0")}`;
+  };
+
+  const handleStart = () => {
+    setStatus('connecting');
+    onStart?.();
+  };
+
+  const handleStop = () => {
+    onStop?.(time);
+    setStatus('disconnected');
+  };
+
+  return (
+    <div className={cn("w-full py-4", className)}>
+      <div className="relative max-w-xl w-full mx-auto flex items-center flex-col gap-4">
+        <div className={cn(
+          "px-2 py-1 rounded-md text-xs font-medium bg-black/10 dark:bg-white/10 text-gray-700 dark:text-white"
+        )}>
+          {status === 'connected' ? 'Connected' : status === 'connecting' ? 'Connecting...' : 'Disconnected'}
+        </div>
+
+        <button
+          className={cn(
+            "group w-16 h-16 rounded-xl flex items-center justify-center transition-colors",
+            active
+              ? "bg-red-500/20 hover:bg-red-500/30"
+              : "bg-black/10 hover:bg-black/20 dark:bg-white/10 dark:hover:bg-white/20"
+          )}
+          type="button"
+          onClick={active ? handleStop : handleStart}
+          disabled={status === 'connecting'}
+        >
+          {status === 'connecting' ? (
+            <div
+              className="w-6 h-6 rounded-sm animate-spin bg-black dark:bg-white cursor-pointer pointer-events-auto"
+              style={{ animationDuration: "3s" }}
+            />
+          ) : active ? (
+            <Square className="w-6 h-6 text-red-500" />
+          ) : (
+            <Mic className="w-6 h-6 text-black/70 dark:text-white/70" />
+          )}
+        </button>
+
+        <span
+          className={cn(
+            "font-mono text-sm transition-opacity duration-300",
+            active
+              ? "text-black/70 dark:text-white/70"
+              : "text-black/30 dark:text-white/30"
+          )}
+        >
+          {formatTime(time)}
+        </span>
+      </div>
+    </div>
+  );
+}
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/components/ui/background-circles.tsx
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/components/ui/background-circles.tsx
@@ -0,0 +1,309 @@
+"use client";
+
+import { motion } from "framer-motion";
+import clsx from "clsx";
+import { useState, useEffect } from "react";
+
+interface BackgroundCirclesProps {
+    title?: string;
+    description?: string;
+    className?: string;
+    variant?: keyof typeof COLOR_VARIANTS;
+    audioLevel?: number;
+    isActive?: boolean;
+}
+
+const COLOR_VARIANTS = {
+    primary: {
+        border: [
+            "border-emerald-500/60",
+            "border-cyan-400/50",
+            "border-slate-600/30",
+        ],
+        gradient: "from-emerald-500/30",
+    },
+    secondary: {
+        border: [
+            "border-violet-500/60",
+            "border-fuchsia-400/50",
+            "border-slate-600/30",
+        ],
+        gradient: "from-violet-500/30",
+    },
+    tertiary: {
+        border: [
+            "border-orange-500/60",
+            "border-yellow-400/50",
+            "border-slate-600/30",
+        ],
+        gradient: "from-orange-500/30",
+    },
+    quaternary: {
+        border: [
+            "border-purple-500/60",
+            "border-pink-400/50",
+            "border-slate-600/30",
+        ],
+        gradient: "from-purple-500/30",
+    },
+    quinary: {
+        border: [
+            "border-red-500/60",
+            "border-rose-400/50",
+            "border-slate-600/30",
+        ],
+        gradient: "from-red-500/30",
+    }, // red
+    senary: {
+        border: [
+            "border-blue-500/60",
+            "border-sky-400/50",
+            "border-slate-600/30",
+        ],
+        gradient: "from-blue-500/30",
+    }, // blue
+    septenary: {
+        border: [
+            "border-gray-500/60",
+            "border-gray-400/50",
+            "border-slate-600/30",
+        ],
+        gradient: "from-gray-500/30",
+    },
+    octonary: {
+        border: [
+            "border-red-500/60",
+            "border-rose-400/50",
+            "border-slate-600/30",
+        ],
+        gradient: "from-red-500/30",
+    },
+} as const;
+
+const AnimatedGrid = () => (
+    <motion.div
+        className="absolute inset-0 [mask-image:radial-gradient(ellipse_at_center,transparent_30%,black)]"
+        animate={{
+            backgroundPosition: ["0% 0%", "100% 100%"],
+        }}
+        transition={{
+            duration: 40,
+            repeat: Number.POSITIVE_INFINITY,
+            ease: "linear",
+        }}
+    >
+        <div className="h-full w-full [background-image:repeating-linear-gradient(100deg,#64748B_0%,#64748B_1px,transparent_1px,transparent_4%)] opacity-20" />
+    </motion.div>
+);
+
+export function BackgroundCircles({
+    title = "",
+    description = "",
+    className,
+    variant = "octonary",
+    audioLevel = 0,
+    isActive = false,
+}: BackgroundCirclesProps) {
+    const variantStyles = COLOR_VARIANTS[variant];
+    const [animationParams, setAnimationParams] = useState({
+        scale: 1,
+        duration: 5,
+        intensity: 0
+    });
+    const [isLoaded, setIsLoaded] = useState(false);
+    
+    // Initial page load animation
+    useEffect(() => {
+        // Small delay to ensure the black screen is visible first
+        const timer = setTimeout(() => {
+            setIsLoaded(true);
+        }, 300);
+        
+        return () => clearTimeout(timer);
+    }, []);
+    
+    // Update animation based on audio level
+    useEffect(() => {
+        if (isActive && audioLevel > 0) {
+            // Simple enhancement of audio level for more dramatic effect
+            const enhancedLevel = Math.min(1, audioLevel * 1.5);
+            
+            setAnimationParams({
+                scale: 1 + enhancedLevel * 0.3,
+                duration: Math.max(2, 5 - enhancedLevel * 3),
+                intensity: enhancedLevel
+            });
+        } else if (animationParams.intensity > 0) {
+            // Only reset if we need to (prevents unnecessary updates)
+            const timer = setTimeout(() => {
+                setAnimationParams({
+                    scale: 1,
+                    duration: 5,
+                    intensity: 0
+                });
+            }, 300);
+            
+            return () => clearTimeout(timer);
+        }
+    }, [audioLevel, isActive, animationParams.intensity]);
+    
+    return (
+        <>
+            {/* Initial black overlay that fades out */}
+            <motion.div 
+                className="fixed inset-0 bg-black z-50"
+                initial={{ opacity: 1 }}
+                animate={{ opacity: isLoaded ? 0 : 1 }}
+                transition={{ duration: 1.2, ease: "easeInOut" }}
+                style={{ pointerEvents: isLoaded ? "none" : "auto" }}
+            />
+            
+            <div
+                className={clsx(
+                    "relative flex h-screen w-full items-center justify-center overflow-hidden",
+                    "bg-white dark:bg-black/5",
+                    className
+                )}
+            >
+                <AnimatedGrid />
+                <motion.div 
+                    className="absolute h-[480px] w-[480px]"
+                    initial={{ opacity: 0, scale: 0.9 }}
+                    animate={{ 
+                        opacity: isLoaded ? 1 : 0,
+                        scale: isLoaded ? 1 : 0.9
+                    }}
+                    transition={{ 
+                        duration: 1.5, 
+                        delay: 0.3,
+                        ease: "easeOut" 
+                    }}
+                >
+                    {[0, 1, 2].map((i) => (
+                        <motion.div
+                            key={i}
+                            className={clsx(
+                                "absolute inset-0 rounded-full",
+                                "border-2 bg-gradient-to-br to-transparent",
+                                variantStyles.border[i],
+                                variantStyles.gradient
+                            )}
+                            animate={{
+                                rotate: 360,
+                                scale: [
+                                    1 + (i * 0.05), 
+                                    (1 + (i * 0.05)) * (1 + (isActive ? animationParams.intensity * 0.2 : 0.02)),
+                                    1 + (i * 0.05)
+                                ],
+                                opacity: [
+                                    0.7 + (i * 0.1),
+                                    0.8 + (i * 0.1) + (isActive ? animationParams.intensity * 0.2 : 0),
+                                    0.7 + (i * 0.1)
+                                ]
+                            }}
+                            transition={{
+                                duration: isActive ? animationParams.duration : 8 + (i * 2),
+                                repeat: Number.POSITIVE_INFINITY,
+                                ease: "easeInOut",
+                            }}
+                        >
+                            <div
+                                className={clsx(
+                                    "absolute inset-0 rounded-full mix-blend-screen",
+                                    `bg-[radial-gradient(ellipse_at_center,${variantStyles.gradient.replace(
+                                        "from-",
+                                        ""
+                                    )}/10%,transparent_70%)]`
+                                )}
+                            />
+                        </motion.div>
+                    ))}
+                </motion.div>
+
+                <div className="absolute inset-0 [mask-image:radial-gradient(90%_60%_at_50%_50%,#000_40%,transparent)]">
+                    <motion.div 
+                        className="absolute inset-0 bg-[radial-gradient(ellipse_at_center,#0F766E/30%,transparent_70%)] blur-[120px]"
+                        initial={{ opacity: 0 }}
+                        animate={{
+                            opacity: isLoaded ? 0.7 : 0,
+                            scale: [1, 1 + (isActive ? animationParams.intensity * 0.3 : 0.02), 1],
+                        }}
+                        transition={{
+                            opacity: { duration: 1.8, delay: 0.5 },
+                            scale: { 
+                                duration: isActive ? 2 : 12,
+                                repeat: Number.POSITIVE_INFINITY,
+                                ease: "easeInOut",
+                            }
+                        }}
+                    />
+                    <motion.div 
+                        className="absolute inset-0 bg-[radial-gradient(ellipse_at_center,#2DD4BF/15%,transparent)] blur-[80px]"
+                        initial={{ opacity: 0 }}
+                        animate={{
+                            opacity: isLoaded ? 1 : 0,
+                            scale: [1, 1 + (isActive ? animationParams.intensity * 0.4 : 0.03), 1]
+                        }}
+                        transition={{
+                            opacity: { duration: 2, delay: 0.7 },
+                            scale: { 
+                                duration: isActive ? 1.5 : 15,
+                                repeat: Number.POSITIVE_INFINITY,
+                                ease: "easeInOut",
+                            }
+                        }}
+                    />
+                    
+                    {/* Additional glow that appears only during high audio levels */}
+                    {isActive && animationParams.intensity > 0.4 && (
+                        <motion.div 
+                            className={`absolute inset-0 bg-[radial-gradient(ellipse_at_center,${variantStyles.gradient.replace("from-", "")}/20%,transparent_70%)] blur-[60px]`}
+                            initial={{ opacity: 0, scale: 0.8 }}
+                            animate={{
+                                opacity: [0, animationParams.intensity * 0.6, 0],
+                                scale: [0.8, 1.1, 0.8],
+                            }}
+                            transition={{
+                                duration: 0.8,
+                                repeat: Number.POSITIVE_INFINITY,
+                                ease: "easeInOut",
+                            }}
+                        />
+                    )}
+                </div>
+            </div>
+        </>
+    );
+}
+
+export function DemoCircles() {
+    const [currentVariant, setCurrentVariant] =
+        useState<keyof typeof COLOR_VARIANTS>("octonary");
+
+    const variants = Object.keys(
+        COLOR_VARIANTS
+    ) as (keyof typeof COLOR_VARIANTS)[];
+
+    function getNextVariant() {
+        const currentIndex = variants.indexOf(currentVariant);
+        const nextVariant = variants[(currentIndex + 1) % variants.length];
+        return nextVariant;
+    }
+
+    return (
+        <>
+            <BackgroundCircles variant={currentVariant} />
+            <div className="absolute top-12 right-12">
+                <button
+                    type="button"
+                    className="bg-slate-950 dark:bg-white text-white dark:text-slate-950 px-4 py-1 rounded-md z-10 text-sm font-medium"
+                    onClick={() => {
+                        setCurrentVariant(getNextVariant());
+                    }}
+                >
+                    Change Variant
+                </button>
+            </div>
+        </>
+    );
+}
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/components/ui/reset-chat.tsx
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/components/ui/reset-chat.tsx
@@ -0,0 +1,18 @@
+"use client"
+
+import { Trash } from "lucide-react"
+
+export function ResetChat() {
+    return (
+        <button
+            className="w-10 h-10 rounded-md flex items-center justify-center transition-colors relative overflow-hidden bg-black/10 hover:bg-black/20 dark:bg-white/10 dark:hover:bg-white/20"
+            aria-label="Reset chat"
+            onClick={() => fetch("http://localhost:8000/reset")}
+        >
+            <div className="relative z-10">
+                <Trash className="h-5 w-5 text-black/70 dark:text-white/70" />
+            </div>
+    </button>
+    )
+}
+
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/components/ui/theme-toggle.tsx
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/components/ui/theme-toggle.tsx
@@ -0,0 +1,61 @@
+"use client";
+
+import { useTheme } from "@/components/theme-provider";
+import { cn } from "@/lib/utils";
+import { Moon, Sun } from "lucide-react";
+import { useRef } from "react";
+
+interface ThemeToggleProps {
+  className?: string;
+}
+
+export function ThemeToggle({ className }: ThemeToggleProps) {
+  const { theme } = useTheme();
+  const buttonRef = useRef<HTMLButtonElement>(null);
+
+  const toggleTheme = () => {
+    // Instead of directly changing the theme, dispatch a custom event
+    const newTheme = theme === "light" ? "dark" : "light";
+    
+    // Dispatch custom event with the new theme
+    window.dispatchEvent(
+      new CustomEvent('themeToggleRequest', { 
+        detail: { theme: newTheme } 
+      })
+    );
+  };
+
+  return (
+    <button
+      ref={buttonRef}
+      onClick={toggleTheme}
+      className={cn(
+        "w-10 h-10 rounded-md flex items-center justify-center transition-colors relative overflow-hidden",
+        "bg-black/10 hover:bg-black/20 dark:bg-white/10 dark:hover:bg-white/20",
+        className
+      )}
+      aria-label="Toggle theme"
+    >
+      <div className="relative z-10">
+        {theme === "light" ? (
+          <Moon className="h-5 w-5 text-black/70" />
+        ) : (
+          <Sun className="h-5 w-5 text-white/70" />
+        )}
+      </div>
+      
+      {/* Small inner animation for the button itself */}
+      <div 
+        className={cn(
+          "absolute inset-0 transition-transform duration-500",
+          theme === "light" 
+            ? "bg-gradient-to-br from-blue-500/20 to-purple-500/20 translate-y-full" 
+            : "bg-gradient-to-br from-amber-500/20 to-orange-500/20 -translate-y-full"
+        )}
+        style={{
+          transitionTimingFunction: "cubic-bezier(0.22, 1, 0.36, 1)"
+        }}
+      />
+    </button>
+  );
+} 
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/components/ui/theme-transition.tsx
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/components/ui/theme-transition.tsx
@@ -0,0 +1,120 @@
+"use client";
+
+import { useTheme } from "@/components/theme-provider";
+import { useEffect, useState } from "react";
+import { motion, AnimatePresence } from "framer-motion";
+
+interface ThemeTransitionProps {
+  className?: string;
+}
+
+export function ThemeTransition({ className }: ThemeTransitionProps) {
+  const { theme, setTheme } = useTheme();
+  const [position, setPosition] = useState({ x: 0, y: 0 });
+  const [isAnimating, setIsAnimating] = useState(false);
+  const [pendingTheme, setPendingTheme] = useState<string | null>(null);
+  const [visualTheme, setVisualTheme] = useState<string | null>(theme);
+
+  // Track mouse/touch position for click events
+  useEffect(() => {
+    const handleMouseMove = (e: MouseEvent) => {
+      setPosition({ x: e.clientX, y: e.clientY });
+    };
+    
+    const handleTouchMove = (e: TouchEvent) => {
+      if (e.touches[0]) {
+        setPosition({ x: e.touches[0].clientX, y: e.touches[0].clientY });
+      }
+    };
+    
+    window.addEventListener("mousemove", handleMouseMove);
+    window.addEventListener("touchmove", handleTouchMove);
+    
+    return () => {
+      window.removeEventListener("mousemove", handleMouseMove);
+      window.removeEventListener("touchmove", handleTouchMove);
+    };
+  }, []);
+
+  // Listen for theme toggle requests
+  useEffect(() => {
+    // Custom event for theme toggle requests
+    const handleThemeToggle = (e: CustomEvent) => {
+      if (isAnimating) return; // Prevent multiple animations
+      
+      const newTheme = e.detail.theme;
+      if (newTheme === theme) return;
+      
+      // Store the pending theme but don't apply it yet
+      setPendingTheme(newTheme);
+      setIsAnimating(true);
+      
+      // The actual theme will be applied mid-animation
+    };
+
+    window.addEventListener('themeToggleRequest' as any, handleThemeToggle as EventListener);
+    
+    return () => {
+      window.removeEventListener('themeToggleRequest' as any, handleThemeToggle as EventListener);
+    };
+  }, [theme, isAnimating]);
+
+  // Apply the theme change mid-animation
+  useEffect(() => {
+    if (isAnimating && pendingTheme) {
+      // Set visual theme immediately for the animation
+      setVisualTheme(pendingTheme);
+      
+      // Apply the actual theme change after a delay (mid-animation)
+      const timer = setTimeout(() => {
+        setTheme(pendingTheme as any);
+      }, 400); // Half of the animation duration
+      
+      // End the animation after it completes
+      const endTimer = setTimeout(() => {
+        setIsAnimating(false);
+        setPendingTheme(null);
+      }, 1000); // Match with animation duration
+      
+      return () => {
+        clearTimeout(timer);
+        clearTimeout(endTimer);
+      };
+    }
+  }, [isAnimating, pendingTheme, setTheme]);
+
+  return (
+    <AnimatePresence>
+      {isAnimating && (
+        <motion.div
+          className="fixed inset-0 z-[9999] pointer-events-none"
+          initial={{ opacity: 0 }}
+          animate={{ opacity: 1 }}
+          exit={{ opacity: 0 }}
+          transition={{ duration: 0.3 }}
+        >
+          <motion.div
+            className={`absolute rounded-full ${visualTheme === 'dark' ? 'bg-slate-950' : 'bg-white'}`}
+            initial={{ 
+              width: 0, 
+              height: 0,
+              x: position.x,
+              y: position.y,
+              borderRadius: '100%' 
+            }}
+            animate={{ 
+              width: Math.max(window.innerWidth * 3, window.innerHeight * 3),
+              height: Math.max(window.innerWidth * 3, window.innerHeight * 3),
+              x: position.x - Math.max(window.innerWidth * 3, window.innerHeight * 3) / 2,
+              y: position.y - Math.max(window.innerWidth * 3, window.innerHeight * 3) / 2,
+            }}
+            transition={{ 
+              duration: 0.8,
+              ease: [0.22, 1, 0.36, 1]
+            }}
+          />
+        </motion.div>
+      )}
+    </AnimatePresence>
+  );
+} 
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/eslint.config.mjs
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/eslint.config.mjs
@@ -0,0 +1,28 @@
+import { dirname } from "path";
+import { fileURLToPath } from "url";
+import { FlatCompat } from "@eslint/eslintrc";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const compat = new FlatCompat({
+  baseDirectory: __dirname,
+});
+
+const eslintConfig = [
+  ...compat.extends("next/core-web-vitals", "next/typescript"),
+  {
+    rules: {
+      "no-unused-vars": "off",
+      "no-explicit-any": "off",
+      "no-console": "off",
+      "no-debugger": "off",
+      "eqeqeq": "off",
+      "curly": "off",
+      "quotes": "off",
+      "semi": "off",
+    },
+  },
+];
+
+export default eslintConfig;
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/lib/utils.ts
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/lib/utils.ts
@@ -0,0 +1,6 @@
+import { clsx, type ClassValue } from "clsx"
+import { twMerge } from "tailwind-merge"
+
+export function cn(...inputs: ClassValue[]) {
+  return twMerge(clsx(inputs))
+}
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/lib/webrtc-client.ts
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/lib/webrtc-client.ts
@@ -0,0 +1,189 @@
+interface WebRTCClientOptions {
+    onConnected?: () => void;
+    onDisconnected?: () => void;
+    onMessage?: (message: any) => void;
+    onAudioStream?: (stream: MediaStream) => void;
+    onAudioLevel?: (level: number) => void;
+}
+
+export class WebRTCClient {
+    private peerConnection: RTCPeerConnection | null = null;
+    private mediaStream: MediaStream | null = null;
+    private dataChannel: RTCDataChannel | null = null;
+    private options: WebRTCClientOptions;
+    private audioContext: AudioContext | null = null;
+    private analyser: AnalyserNode | null = null;
+    private dataArray: Uint8Array | null = null;
+    private animationFrameId: number | null = null;
+
+    constructor(options: WebRTCClientOptions = {}) {
+        this.options = options;
+    }
+
+    async connect() {
+        try {
+            this.peerConnection = new RTCPeerConnection();
+            
+            // Get user media
+            try {
+                this.mediaStream = await navigator.mediaDevices.getUserMedia({
+                    audio: true
+                });
+            } catch (mediaError: any) {
+                console.error('Media error:', mediaError);
+                if (mediaError.name === 'NotAllowedError') {
+                    throw new Error('Microphone access denied. Please allow microphone access and try again.');
+                } else if (mediaError.name === 'NotFoundError') {
+                    throw new Error('No microphone detected. Please connect a microphone and try again.');
+                } else {
+                    throw mediaError;
+                }
+            }
+            
+            this.setupAudioAnalysis();
+            
+            this.mediaStream.getTracks().forEach(track => {
+                if (this.peerConnection) {
+                    this.peerConnection.addTrack(track, this.mediaStream!);
+                }
+            });
+            
+            this.peerConnection.addEventListener('track', (event) => {
+                if (this.options.onAudioStream) {
+                    this.options.onAudioStream(event.streams[0]);
+                }
+            });
+            
+            this.dataChannel = this.peerConnection.createDataChannel('text');
+            
+            this.dataChannel.addEventListener('message', (event) => {
+                try {
+                    const message = JSON.parse(event.data);
+                    console.log('Received message:', message);
+                    
+                    if (this.options.onMessage) {
+                        this.options.onMessage(message);
+                    }
+                } catch (error) {
+                    console.error('Error parsing message:', error);
+                }
+            });
+            
+            // Create and send offer
+            const offer = await this.peerConnection.createOffer();
+            await this.peerConnection.setLocalDescription(offer);
+            
+            // Use same-origin request to avoid CORS preflight
+            const response = await fetch('http://localhost:8000/webrtc/offer', {
+                method: 'POST',
+                headers: { 
+                    'Content-Type': 'application/json',
+                    'Accept': 'application/json'
+                },
+                mode: 'cors', // Explicitly set CORS mode
+                credentials: 'same-origin',
+                body: JSON.stringify({
+                    sdp: offer.sdp,
+                    type: offer.type,
+                    webrtc_id: Math.random().toString(36).substring(7)
+                })
+            });
+            
+            const serverResponse = await response.json();
+            await this.peerConnection.setRemoteDescription(serverResponse);
+            
+            if (this.options.onConnected) {
+                this.options.onConnected();
+            }
+        } catch (error) {
+            console.error('Error connecting:', error);
+            this.disconnect();
+            throw error;
+        }
+    }
+
+    private setupAudioAnalysis() {
+        if (!this.mediaStream) return;
+        
+        try {
+            this.audioContext = new AudioContext();
+            this.analyser = this.audioContext.createAnalyser();
+            this.analyser.fftSize = 256;
+            
+            const source = this.audioContext.createMediaStreamSource(this.mediaStream);
+            source.connect(this.analyser);
+            
+            const bufferLength = this.analyser.frequencyBinCount;
+            this.dataArray = new Uint8Array(bufferLength);
+            
+            this.startAnalysis();
+        } catch (error) {
+            console.error('Error setting up audio analysis:', error);
+        }
+    }
+
+    private startAnalysis() {
+        if (!this.analyser || !this.dataArray || !this.options.onAudioLevel) return;
+        
+        // Add throttling to prevent too many updates
+        let lastUpdateTime = 0;
+        const throttleInterval = 100; // Only update every 100ms
+        
+        const analyze = () => {
+            this.analyser!.getByteFrequencyData(this.dataArray!);
+            
+            const currentTime = Date.now();
+            // Only update if enough time has passed since last update
+            if (currentTime - lastUpdateTime > throttleInterval) {
+                // Calculate average volume level (0-1)
+                let sum = 0;
+                for (let i = 0; i < this.dataArray!.length; i++) {
+                    sum += this.dataArray![i];
+                }
+                const average = sum / this.dataArray!.length / 255;
+                
+                this.options.onAudioLevel!(average);
+                lastUpdateTime = currentTime;
+            }
+            
+            this.animationFrameId = requestAnimationFrame(analyze);
+        };
+        
+        this.animationFrameId = requestAnimationFrame(analyze);
+    }
+
+    private stopAnalysis() {
+        if (this.animationFrameId !== null) {
+            cancelAnimationFrame(this.animationFrameId);
+            this.animationFrameId = null;
+        }
+        
+        if (this.audioContext) {
+            this.audioContext.close();
+            this.audioContext = null;
+        }
+        
+        this.analyser = null;
+        this.dataArray = null;
+    }
+
+    disconnect() {
+        this.stopAnalysis();
+        
+        if (this.mediaStream) {
+            this.mediaStream.getTracks().forEach(track => track.stop());
+            this.mediaStream = null;
+        }
+        
+        if (this.peerConnection) {
+            this.peerConnection.close();
+            this.peerConnection = null;
+        }
+        
+        this.dataChannel = null;
+        
+        if (this.options.onDisconnected) {
+            this.options.onDisconnected();
+        }
+    }
+} 
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/next.config.ts
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/next.config.ts
@@ -0,0 +1,7 @@
+import type { NextConfig } from "next";
+
+const nextConfig: NextConfig = {
+  /* config options here */
+};
+
+export default nextConfig;
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/package.json
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/package.json
@@ -0,0 +1,33 @@
+{
+  "name": "fastrtc-demo",
+  "version": "0.1.0",
+  "private": true,
+  "scripts": {
+    "dev": "next dev --turbopack",
+    "build": "next build --no-lint",
+    "start": "next start",
+    "lint": "next lint"
+  },
+  "dependencies": {
+    "class-variance-authority": "^0.7.1",
+    "clsx": "^2.1.1",
+    "framer-motion": "^12.4.10",
+    "lucide-react": "^0.477.0",
+    "next": "15.2.2-canary.1",
+    "react": "^19.0.0",
+    "react-dom": "^19.0.0",
+    "tailwind-merge": "^3.0.2",
+    "tailwindcss-animate": "^1.0.7"
+  },
+  "devDependencies": {
+    "@eslint/eslintrc": "^3",
+    "@tailwindcss/postcss": "^4",
+    "@types/node": "^20",
+    "@types/react": "^19",
+    "@types/react-dom": "^19",
+    "eslint": "^9",
+    "eslint-config-next": "15.2.2-canary.1",
+    "tailwindcss": "^4",
+    "typescript": "^5"
+  }
+}
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/postcss.config.mjs
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/postcss.config.mjs
@@ -0,0 +1,5 @@
+const config = {
+  plugins: ["@tailwindcss/postcss"],
+};
+
+export default config;
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/public/file.svg
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/public/file.svg
@@ -0,0 +1 @@
+<svg fill="none" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg"><path d="M14.5 13.5V5.41a1 1 0 0 0-.3-.7L9.8.29A1 1 0 0 0 9.08 0H1.5v13.5A2.5 2.5 0 0 0 4 16h8a2.5 2.5 0 0 0 2.5-2.5m-1.5 0v-7H8v-5H3v12a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1M9.5 5V2.12L12.38 5zM5.13 5h-.62v1.25h2.12V5zm-.62 3h7.12v1.25H4.5zm.62 3h-.62v1.25h7.12V11z" clip-rule="evenodd" fill="#666" fill-rule="evenodd"/></svg>
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/public/globe.svg
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/public/globe.svg
@@ -0,0 +1 @@
+<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><g clip-path="url(#a)"><path fill-rule="evenodd" clip-rule="evenodd" d="M10.27 14.1a6.5 6.5 0 0 0 3.67-3.45q-1.24.21-2.7.34-.31 1.83-.97 3.1M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16m.48-1.52a7 7 0 0 1-.96 0H7.5a4 4 0 0 1-.84-1.32q-.38-.89-.63-2.08a40 40 0 0 0 3.92 0q-.25 1.2-.63 2.08a4 4 0 0 1-.84 1.31zm2.94-4.76q1.66-.15 2.95-.43a7 7 0 0 0 0-2.58q-1.3-.27-2.95-.43a18 18 0 0 1 0 3.44m-1.27-3.54a17 17 0 0 1 0 3.64 39 39 0 0 1-4.3 0 17 17 0 0 1 0-3.64 39 39 0 0 1 4.3 0m1.1-1.17q1.45.13 2.69.34a6.5 6.5 0 0 0-3.67-3.44q.65 1.26.98 3.1M8.48 1.5l.01.02q.41.37.84 1.31.38.89.63 2.08a40 40 0 0 0-3.92 0q.25-1.2.63-2.08a4 4 0 0 1 .85-1.32 7 7 0 0 1 .96 0m-2.75.4a6.5 6.5 0 0 0-3.67 3.44 29 29 0 0 1 2.7-.34q.31-1.83.97-3.1M4.58 6.28q-1.66.16-2.95.43a7 7 0 0 0 0 2.58q1.3.27 2.95.43a18 18 0 0 1 0-3.44m.17 4.71q-1.45-.12-2.69-.34a6.5 6.5 0 0 0 3.67 3.44q-.65-1.27-.98-3.1" fill="#666"/></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h16v16H0z"/></clipPath></defs></svg>
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/public/next.svg
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/public/next.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/public/vercel.svg
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/public/vercel.svg
@@ -0,0 +1 @@
+<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1155 1000"><path d="m577.3 0 577.4 1000H0z" fill="#fff"/></svg>
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/public/window.svg
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/public/window.svg
@@ -0,0 +1 @@
+<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path fill-rule="evenodd" clip-rule="evenodd" d="M1.5 2.5h13v10a1 1 0 0 1-1 1h-11a1 1 0 0 1-1-1zM0 1h16v11.5a2.5 2.5 0 0 1-2.5 2.5h-11A2.5 2.5 0 0 1 0 12.5zm3.75 4.5a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5M7 4.75a.75.75 0 1 1-1.5 0 .75.75 0 0 1 1.5 0m1.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5" fill="#666"/></svg>
--- a/demo/nextjs_voice_chat/frontend/fastrtc-demo/tsconfig.json
+++ b/demo/nextjs_voice_chat/frontend/fastrtc-demo/tsconfig.json
@@ -0,0 +1,27 @@
+{
+  "compilerOptions": {
+    "target": "ES2017",
+    "lib": ["dom", "dom.iterable", "esnext"],
+    "allowJs": true,
+    "skipLibCheck": true,
+    "strict": true,
+    "noEmit": true,
+    "esModuleInterop": true,
+    "module": "esnext",
+    "moduleResolution": "bundler",
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "jsx": "preserve",
+    "incremental": true,
+    "plugins": [
+      {
+        "name": "next"
+      }
+    ],
+    "paths": {
+      "@/*": ["./*"]
+    }
+  },
+  "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
+  "exclude": ["node_modules"]
+}
--- a/demo/nextjs_voice_chat/requirements.txt
+++ b/demo/nextjs_voice_chat/requirements.txt
@@ -0,0 +1,5 @@
+openai
+fastapi
+python-dotenv
+elevenlabs
+fastrtc[vad, stt, tts]
--- a/demo/nextjs_voice_chat/run.sh
+++ b/demo/nextjs_voice_chat/run.sh
@@ -0,0 +1 @@
+uvicorn backend.server:app --host 0.0.0.0 --port 8000
--- a/demo/object_detection/README.md
+++ b/demo/object_detection/README.md
@@ -0,0 +1,15 @@
+---
+title: Object Detection
+emoji: 📸
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Use YOLOv10 to detect objects in real-time
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN]
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/object_detection/app.py
+++ b/demo/object_detection/app.py
@@ -0,0 +1,77 @@
+import json
+from pathlib import Path
+
+import cv2
+import gradio as gr
+from fastapi import FastAPI
+from fastapi.responses import HTMLResponse
+from fastrtc import Stream, get_twilio_turn_credentials
+from gradio.utils import get_space
+from huggingface_hub import hf_hub_download
+from pydantic import BaseModel, Field
+
+try:
+    from demo.object_detection.inference import YOLOv10
+except (ImportError, ModuleNotFoundError):
+    from inference import YOLOv10
+
+
+cur_dir = Path(__file__).parent
+
+model_file = hf_hub_download(
+    repo_id="onnx-community/yolov10n", filename="onnx/model.onnx"
+)
+
+model = YOLOv10(model_file)
+
+
+def detection(image, conf_threshold=0.3):
+    image = cv2.resize(image, (model.input_width, model.input_height))
+    print("conf_threshold", conf_threshold)
+    new_image = model.detect_objects(image, conf_threshold)
+    return cv2.resize(new_image, (500, 500))
+
+
+stream = Stream(
+    handler=detection,
+    modality="video",
+    mode="send-receive",
+    additional_inputs=[gr.Slider(minimum=0, maximum=1, step=0.01, value=0.3)],
+    rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
+    concurrency_limit=2 if get_space() else None,
+)
+
+app = FastAPI()
+
+stream.mount(app)
+
+
+@app.get("/")
+async def _():
+    rtc_config = get_twilio_turn_credentials() if get_space() else None
+    html_content = open(cur_dir / "index.html").read()
+    html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
+    return HTMLResponse(content=html_content)
+
+
+class InputData(BaseModel):
+    webrtc_id: str
+    conf_threshold: float = Field(ge=0, le=1)
+
+
+@app.post("/input_hook")
+async def _(data: InputData):
+    stream.set_input(data.webrtc_id, data.conf_threshold)
+
+
+if __name__ == "__main__":
+    import os
+
+    if (mode := os.getenv("MODE")) == "UI":
+        stream.ui.launch(server_port=7860)
+    elif mode == "PHONE":
+        stream.fastphone(host="0.0.0.0", port=7860)
+    else:
+        import uvicorn
+
+        uvicorn.run(app, host="0.0.0.0", port=7860)
--- a/demo/object_detection/index.html
+++ b/demo/object_detection/index.html
@@ -0,0 +1,340 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Object Detection</title>
+    <style>
+        body {
+            font-family: system-ui, -apple-system, sans-serif;
+            background: linear-gradient(135deg, #2d2b52 0%, #191731 100%);
+            color: white;
+            margin: 0;
+            padding: 20px;
+            height: 100vh;
+            box-sizing: border-box;
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+            justify-content: center;
+        }
+
+        .container {
+            width: 100%;
+            max-width: 800px;
+            text-align: center;
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+        }
+
+        .video-container {
+            width: 100%;
+            max-width: 500px;
+            aspect-ratio: 1/1;
+            background: rgba(255, 255, 255, 0.1);
+            border-radius: 12px;
+            overflow: hidden;
+            box-shadow: 0 8px 32px rgba(0, 0, 0, 0.2);
+            margin: 10px 0;
+        }
+
+        #video-output {
+            width: 100%;
+            height: 100%;
+            object-fit: cover;
+        }
+
+        button {
+            background: white;
+            color: #2d2b52;
+            border: none;
+            padding: 12px 32px;
+            border-radius: 24px;
+            font-size: 16px;
+            font-weight: 600;
+            cursor: pointer;
+            transition: all 0.3s ease;
+            box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
+        }
+
+        button:hover {
+            transform: translateY(-2px);
+            box-shadow: 0 6px 16px rgba(0, 0, 0, 0.2);
+        }
+
+        h1 {
+            font-size: 2.5em;
+            margin-bottom: 0.3em;
+        }
+
+        p {
+            color: rgba(255, 255, 255, 0.8);
+            margin-bottom: 1em;
+        }
+
+        .controls {
+            display: flex;
+            flex-direction: column;
+            gap: 12px;
+            align-items: center;
+            margin-top: 10px;
+        }
+
+        .slider-container {
+            width: 100%;
+            max-width: 300px;
+            display: flex;
+            flex-direction: column;
+            gap: 8px;
+        }
+
+        .slider-container label {
+            color: rgba(255, 255, 255, 0.8);
+            font-size: 14px;
+        }
+
+        input[type="range"] {
+            width: 100%;
+            height: 6px;
+            -webkit-appearance: none;
+            background: rgba(255, 255, 255, 0.1);
+            border-radius: 3px;
+            outline: none;
+        }
+
+        input[type="range"]::-webkit-slider-thumb {
+            -webkit-appearance: none;
+            width: 18px;
+            height: 18px;
+            background: white;
+            border-radius: 50%;
+            cursor: pointer;
+        }
+
+        /* Add styles for toast notifications */
+        .toast {
+            position: fixed;
+            top: 20px;
+            left: 50%;
+            transform: translateX(-50%);
+            padding: 16px 24px;
+            border-radius: 4px;
+            font-size: 14px;
+            z-index: 1000;
+            display: none;
+            box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
+        }
+
+        .toast.error {
+            background-color: #f44336;
+            color: white;
+        }
+
+        .toast.warning {
+            background-color: #ffd700;
+            color: black;
+        }
+    </style>
+</head>
+
+<body>
+    <!-- Add toast element after body opening tag -->
+    <div id="error-toast" class="toast"></div>
+    <div class="container">
+        <h1>Real-time Object Detection</h1>
+        <p>Using YOLOv10 to detect objects in your webcam feed</p>
+        <div class="video-container">
+            <video id="video-output" autoplay playsinline></video>
+        </div>
+        <div class="controls">
+            <div class="slider-container">
+                <label>Confidence Threshold: <span id="conf-value">0.3</span></label>
+                <input type="range" id="conf-threshold" min="0" max="1" step="0.01" value="0.3">
+            </div>
+            <button id="start-button">Start</button>
+        </div>
+    </div>
+
+    <script>
+        let peerConnection;
+        let webrtc_id;
+        const startButton = document.getElementById('start-button');
+        const videoOutput = document.getElementById('video-output');
+        const confThreshold = document.getElementById('conf-threshold');
+        const confValue = document.getElementById('conf-value');
+
+        // Update confidence value display
+        confThreshold.addEventListener('input', (e) => {
+            confValue.textContent = e.target.value;
+            if (peerConnection) {
+                updateConfThreshold(e.target.value);
+            }
+        });
+
+        function updateConfThreshold(value) {
+            fetch('/input_hook', {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json',
+                },
+                body: JSON.stringify({
+                    webrtc_id: webrtc_id,
+                    conf_threshold: parseFloat(value)
+                })
+            });
+        }
+
+        function showError(message) {
+            const toast = document.getElementById('error-toast');
+            toast.textContent = message;
+            toast.className = 'toast error';
+            toast.style.display = 'block';
+
+            // Hide toast after 5 seconds
+            setTimeout(() => {
+                toast.style.display = 'none';
+            }, 5000);
+        }
+
+        async function setupWebRTC() {
+            const config = __RTC_CONFIGURATION__;
+            peerConnection = new RTCPeerConnection(config);
+
+            const timeoutId = setTimeout(() => {
+                const toast = document.getElementById('error-toast');
+                toast.textContent = "Connection is taking longer than usual. Are you on a VPN?";
+                toast.className = 'toast warning';
+                toast.style.display = 'block';
+
+                // Hide warning after 5 seconds
+                setTimeout(() => {
+                    toast.style.display = 'none';
+                }, 5000);
+            }, 5000);
+
+            try {
+                const stream = await navigator.mediaDevices.getUserMedia({
+                    video: true
+                });
+
+                stream.getTracks().forEach(track => {
+                    peerConnection.addTrack(track, stream);
+                });
+
+                peerConnection.addEventListener('track', (evt) => {
+                    if (videoOutput && videoOutput.srcObject !== evt.streams[0]) {
+                        videoOutput.srcObject = evt.streams[0];
+                    }
+                });
+
+                const dataChannel = peerConnection.createDataChannel('text');
+                dataChannel.onmessage = (event) => {
+                    const eventJson = JSON.parse(event.data);
+                    if (eventJson.type === "error") {
+                        showError(eventJson.message);
+                    } else if (eventJson.type === "send_input") {
+                        updateConfThreshold(confThreshold.value);
+                    }
+                };
+
+                const offer = await peerConnection.createOffer();
+                await peerConnection.setLocalDescription(offer);
+
+                await new Promise((resolve) => {
+                    if (peerConnection.iceGatheringState === "complete") {
+                        resolve();
+                    } else {
+                        const checkState = () => {
+                            if (peerConnection.iceGatheringState === "complete") {
+                                peerConnection.removeEventListener("icegatheringstatechange", checkState);
+                                resolve();
+                            }
+                        };
+                        peerConnection.addEventListener("icegatheringstatechange", checkState);
+                    }
+                });
+
+                webrtc_id = Math.random().toString(36).substring(7);
+
+                const response = await fetch('/webrtc/offer', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({
+                        sdp: peerConnection.localDescription.sdp,
+                        type: peerConnection.localDescription.type,
+                        webrtc_id: webrtc_id
+                    })
+                });
+
+                const serverResponse = await response.json();
+
+                if (serverResponse.status === 'failed') {
+                    showError(serverResponse.meta.error === 'concurrency_limit_reached'
+                        ? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
+                        : serverResponse.meta.error);
+                    stop();
+                    startButton.textContent = 'Start';
+                    return;
+                }
+
+                await peerConnection.setRemoteDescription(serverResponse);
+
+                // Send initial confidence threshold
+                updateConfThreshold(confThreshold.value);
+
+                peerConnection.addEventListener('connectionstatechange', () => {
+                    if (peerConnection.connectionState === 'connected') {
+                        clearTimeout(timeoutId);
+                        const toast = document.getElementById('error-toast');
+                        toast.style.display = 'none';
+                    }
+                });
+
+            } catch (err) {
+                clearTimeout(timeoutId);
+                console.error('Error setting up WebRTC:', err);
+                showError('Failed to establish connection. Please try again.');
+                stop();
+                startButton.textContent = 'Start';
+            }
+        }
+
+        function stop() {
+            if (peerConnection) {
+                if (peerConnection.getTransceivers) {
+                    peerConnection.getTransceivers().forEach(transceiver => {
+                        if (transceiver.stop) {
+                            transceiver.stop();
+                        }
+                    });
+                }
+
+                if (peerConnection.getSenders) {
+                    peerConnection.getSenders().forEach(sender => {
+                        if (sender.track && sender.track.stop) sender.track.stop();
+                    });
+                }
+
+                setTimeout(() => {
+                    peerConnection.close();
+                }, 500);
+            }
+
+            videoOutput.srcObject = null;
+        }
+
+        startButton.addEventListener('click', () => {
+            if (startButton.textContent === 'Start') {
+                setupWebRTC();
+                startButton.textContent = 'Stop';
+            } else {
+                stop();
+                startButton.textContent = 'Start';
+            }
+        });
+    </script>
+</body>
+
+</html>
--- a/demo/object_detection/inference.py
+++ b/demo/object_detection/inference.py
@@ -0,0 +1,153 @@
+import time
+
+import cv2
+import numpy as np
+import onnxruntime
+
+try:
+    from demo.object_detection.utils import draw_detections
+except (ImportError, ModuleNotFoundError):
+    from utils import draw_detections
+
+
+class YOLOv10:
+    def __init__(self, path):
+        # Initialize model
+        self.initialize_model(path)
+
+    def __call__(self, image):
+        return self.detect_objects(image)
+
+    def initialize_model(self, path):
+        self.session = onnxruntime.InferenceSession(
+            path, providers=onnxruntime.get_available_providers()
+        )
+        # Get model info
+        self.get_input_details()
+        self.get_output_details()
+
+    def detect_objects(self, image, conf_threshold=0.3):
+        input_tensor = self.prepare_input(image)
+
+        # Perform inference on the image
+        new_image = self.inference(image, input_tensor, conf_threshold)
+
+        return new_image
+
+    def prepare_input(self, image):
+        self.img_height, self.img_width = image.shape[:2]
+
+        input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+
+        # Resize input image
+        input_img = cv2.resize(input_img, (self.input_width, self.input_height))
+
+        # Scale input pixel values to 0 to 1
+        input_img = input_img / 255.0
+        input_img = input_img.transpose(2, 0, 1)
+        input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)
+
+        return input_tensor
+
+    def inference(self, image, input_tensor, conf_threshold=0.3):
+        start = time.perf_counter()
+        outputs = self.session.run(
+            self.output_names, {self.input_names[0]: input_tensor}
+        )
+
+        print(f"Inference time: {(time.perf_counter() - start) * 1000:.2f} ms")
+        (
+            boxes,
+            scores,
+            class_ids,
+        ) = self.process_output(outputs, conf_threshold)
+        return self.draw_detections(image, boxes, scores, class_ids)
+
+    def process_output(self, output, conf_threshold=0.3):
+        predictions = np.squeeze(output[0])
+
+        # Filter out object confidence scores below threshold
+        scores = predictions[:, 4]
+        predictions = predictions[scores > conf_threshold, :]
+        scores = scores[scores > conf_threshold]
+
+        if len(scores) == 0:
+            return [], [], []
+
+        # Get the class with the highest confidence
+        class_ids = predictions[:, 5].astype(int)
+
+        # Get bounding boxes for each object
+        boxes = self.extract_boxes(predictions)
+
+        return boxes, scores, class_ids
+
+    def extract_boxes(self, predictions):
+        # Extract boxes from predictions
+        boxes = predictions[:, :4]
+
+        # Scale boxes to original image dimensions
+        boxes = self.rescale_boxes(boxes)
+
+        # Convert boxes to xyxy format
+        # boxes = xywh2xyxy(boxes)
+
+        return boxes
+
+    def rescale_boxes(self, boxes):
+        # Rescale boxes to original image dimensions
+        input_shape = np.array(
+            [self.input_width, self.input_height, self.input_width, self.input_height]
+        )
+        boxes = np.divide(boxes, input_shape, dtype=np.float32)
+        boxes *= np.array(
+            [self.img_width, self.img_height, self.img_width, self.img_height]
+        )
+        return boxes
+
+    def draw_detections(
+        self, image, boxes, scores, class_ids, draw_scores=True, mask_alpha=0.4
+    ):
+        return draw_detections(image, boxes, scores, class_ids, mask_alpha)
+
+    def get_input_details(self):
+        model_inputs = self.session.get_inputs()
+        self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]
+
+        self.input_shape = model_inputs[0].shape
+        self.input_height = self.input_shape[2]
+        self.input_width = self.input_shape[3]
+
+    def get_output_details(self):
+        model_outputs = self.session.get_outputs()
+        self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]
+
+
+if __name__ == "__main__":
+    import tempfile
+
+    import requests
+    from huggingface_hub import hf_hub_download
+
+    model_file = hf_hub_download(
+        repo_id="onnx-community/yolov10s", filename="onnx/model.onnx"
+    )
+
+    yolov8_detector = YOLOv10(model_file)
+
+    with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as f:
+        f.write(
+            requests.get(
+                "https://live.staticflickr.com/13/19041780_d6fd803de0_3k.jpg"
+            ).content
+        )
+        f.seek(0)
+        img = cv2.imread(f.name)
+
+    # # Detect Objects
+    combined_image = yolov8_detector.detect_objects(img)
+
+    # Draw detections
+    cv2.namedWindow("Output", cv2.WINDOW_NORMAL)
+    cv2.imshow("Output", combined_image)
+    cv2.waitKey(0)
--- a/demo/object_detection/requirements.txt
+++ b/demo/object_detection/requirements.txt
@@ -0,0 +1,4 @@
+fastrtc
+opencv-python
+twilio
+onnxruntime-gpu
--- a/demo/object_detection/utils.py
+++ b/demo/object_detection/utils.py
@@ -0,0 +1,237 @@
+import cv2
+import numpy as np
+
+class_names = [
+    "person",
+    "bicycle",
+    "car",
+    "motorcycle",
+    "airplane",
+    "bus",
+    "train",
+    "truck",
+    "boat",
+    "traffic light",
+    "fire hydrant",
+    "stop sign",
+    "parking meter",
+    "bench",
+    "bird",
+    "cat",
+    "dog",
+    "horse",
+    "sheep",
+    "cow",
+    "elephant",
+    "bear",
+    "zebra",
+    "giraffe",
+    "backpack",
+    "umbrella",
+    "handbag",
+    "tie",
+    "suitcase",
+    "frisbee",
+    "skis",
+    "snowboard",
+    "sports ball",
+    "kite",
+    "baseball bat",
+    "baseball glove",
+    "skateboard",
+    "surfboard",
+    "tennis racket",
+    "bottle",
+    "wine glass",
+    "cup",
+    "fork",
+    "knife",
+    "spoon",
+    "bowl",
+    "banana",
+    "apple",
+    "sandwich",
+    "orange",
+    "broccoli",
+    "carrot",
+    "hot dog",
+    "pizza",
+    "donut",
+    "cake",
+    "chair",
+    "couch",
+    "potted plant",
+    "bed",
+    "dining table",
+    "toilet",
+    "tv",
+    "laptop",
+    "mouse",
+    "remote",
+    "keyboard",
+    "cell phone",
+    "microwave",
+    "oven",
+    "toaster",
+    "sink",
+    "refrigerator",
+    "book",
+    "clock",
+    "vase",
+    "scissors",
+    "teddy bear",
+    "hair drier",
+    "toothbrush",
+]
+
+# Create a list of colors for each class where each color is a tuple of 3 integer values
+rng = np.random.default_rng(3)
+colors = rng.uniform(0, 255, size=(len(class_names), 3))
+
+
+def nms(boxes, scores, iou_threshold):
+    # Sort by score
+    sorted_indices = np.argsort(scores)[::-1]
+
+    keep_boxes = []
+    while sorted_indices.size > 0:
+        # Pick the last box
+        box_id = sorted_indices[0]
+        keep_boxes.append(box_id)
+
+        # Compute IoU of the picked box with the rest
+        ious = compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])
+
+        # Remove boxes with IoU over the threshold
+        keep_indices = np.where(ious < iou_threshold)[0]
+
+        # print(keep_indices.shape, sorted_indices.shape)
+        sorted_indices = sorted_indices[keep_indices + 1]
+
+    return keep_boxes
+
+
+def multiclass_nms(boxes, scores, class_ids, iou_threshold):
+    unique_class_ids = np.unique(class_ids)
+
+    keep_boxes = []
+    for class_id in unique_class_ids:
+        class_indices = np.where(class_ids == class_id)[0]
+        class_boxes = boxes[class_indices, :]
+        class_scores = scores[class_indices]
+
+        class_keep_boxes = nms(class_boxes, class_scores, iou_threshold)
+        keep_boxes.extend(class_indices[class_keep_boxes])
+
+    return keep_boxes
+
+
+def compute_iou(box, boxes):
+    # Compute xmin, ymin, xmax, ymax for both boxes
+    xmin = np.maximum(box[0], boxes[:, 0])
+    ymin = np.maximum(box[1], boxes[:, 1])
+    xmax = np.minimum(box[2], boxes[:, 2])
+    ymax = np.minimum(box[3], boxes[:, 3])
+
+    # Compute intersection area
+    intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)
+
+    # Compute union area
+    box_area = (box[2] - box[0]) * (box[3] - box[1])
+    boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+    union_area = box_area + boxes_area - intersection_area
+
+    # Compute IoU
+    iou = intersection_area / union_area
+
+    return iou
+
+
+def xywh2xyxy(x):
+    # Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)
+    y = np.copy(x)
+    y[..., 0] = x[..., 0] - x[..., 2] / 2
+    y[..., 1] = x[..., 1] - x[..., 3] / 2
+    y[..., 2] = x[..., 0] + x[..., 2] / 2
+    y[..., 3] = x[..., 1] + x[..., 3] / 2
+    return y
+
+
+def draw_detections(image, boxes, scores, class_ids, mask_alpha=0.3):
+    det_img = image.copy()
+
+    img_height, img_width = image.shape[:2]
+    font_size = min([img_height, img_width]) * 0.0006
+    text_thickness = int(min([img_height, img_width]) * 0.001)
+
+    # det_img = draw_masks(det_img, boxes, class_ids, mask_alpha)
+
+    # Draw bounding boxes and labels of detections
+    for class_id, box, score in zip(class_ids, boxes, scores):
+        color = colors[class_id]
+
+        draw_box(det_img, box, color)  # type: ignore
+
+        label = class_names[class_id]
+        caption = f"{label} {int(score * 100)}%"
+        draw_text(det_img, caption, box, color, font_size, text_thickness)  # type: ignore
+
+    return det_img
+
+
+def draw_box(
+    image: np.ndarray,
+    box: np.ndarray,
+    color: tuple[int, int, int] = (0, 0, 255),
+    thickness: int = 2,
+) -> np.ndarray:
+    x1, y1, x2, y2 = box.astype(int)
+    return cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)
+
+
+def draw_text(
+    image: np.ndarray,
+    text: str,
+    box: np.ndarray,
+    color: tuple[int, int, int] = (0, 0, 255),
+    font_size: float = 0.001,
+    text_thickness: int = 2,
+) -> np.ndarray:
+    x1, y1, x2, y2 = box.astype(int)
+    (tw, th), _ = cv2.getTextSize(
+        text=text,
+        fontFace=cv2.FONT_HERSHEY_SIMPLEX,
+        fontScale=font_size,
+        thickness=text_thickness,
+    )
+    th = int(th * 1.2)
+
+    cv2.rectangle(image, (x1, y1), (x1 + tw, y1 - th), color, -1)
+
+    return cv2.putText(
+        image,
+        text,
+        (x1, y1),
+        cv2.FONT_HERSHEY_SIMPLEX,
+        font_size,
+        (255, 255, 255),
+        text_thickness,
+        cv2.LINE_AA,
+    )
+
+
+def draw_masks(
+    image: np.ndarray, boxes: np.ndarray, classes: np.ndarray, mask_alpha: float = 0.3
+) -> np.ndarray:
+    mask_img = image.copy()
+
+    # Draw bounding boxes and labels of detections
+    for box, class_id in zip(boxes, classes):
+        color = colors[class_id]
+
+        x1, y1, x2, y2 = box.astype(int)
+
+        # Draw fill rectangle in mask image
+        cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1)  # type: ignore
+
+    return cv2.addWeighted(mask_img, mask_alpha, image, 1 - mask_alpha, 0)
--- a/demo/phonic_chat/README.md
+++ b/demo/phonic_chat/README.md
@@ -0,0 +1,16 @@
+---
+title: Phonic AI Chat
+emoji: 🎙️
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Talk to Phonic AI's speech-to-speech model
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|PHONIC_API_KEY]
+python_version: 3.11
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/phonic_chat/app.py
+++ b/demo/phonic_chat/app.py
@@ -0,0 +1,116 @@
+import asyncio
+import base64
+import os
+
+import gradio as gr
+from gradio.utils import get_space
+import numpy as np
+from dotenv import load_dotenv
+from fastrtc import (
+    AdditionalOutputs,
+    AsyncStreamHandler,
+    Stream,
+    get_twilio_turn_credentials,
+    audio_to_float32,
+    wait_for_item,
+)
+from phonic.client import PhonicSTSClient, get_voices
+
+load_dotenv()
+
+STS_URI = "wss://api.phonic.co/v1/sts/ws"
+API_KEY = os.environ["PHONIC_API_KEY"]
+SAMPLE_RATE = 44_100
+voices = get_voices(API_KEY)
+voice_ids = [voice["id"] for voice in voices]
+
+
+class PhonicHandler(AsyncStreamHandler):
+    def __init__(self):
+        super().__init__(input_sample_rate=SAMPLE_RATE, output_sample_rate=SAMPLE_RATE)
+        self.output_queue = asyncio.Queue()
+        self.client = None
+
+    def copy(self) -> AsyncStreamHandler:
+        return PhonicHandler()
+
+    async def start_up(self):
+        await self.wait_for_args()
+        voice_id = self.latest_args[1]
+        async with PhonicSTSClient(STS_URI, API_KEY) as client:
+            self.client = client
+            sts_stream = client.sts(  # type: ignore
+                input_format="pcm_44100",
+                output_format="pcm_44100",
+                system_prompt="You are a helpful voice assistant. Respond conversationally.",
+                # welcome_message="Hello! I'm your voice assistant. How can I help you today?",
+                voice_id=voice_id,
+            )
+            async for message in sts_stream:
+                message_type = message.get("type")
+                if message_type == "audio_chunk":
+                    audio_b64 = message["audio"]
+                    audio_bytes = base64.b64decode(audio_b64)
+                    await self.output_queue.put(
+                        (SAMPLE_RATE, np.frombuffer(audio_bytes, dtype=np.int16))
+                    )
+                    if text := message.get("text"):
+                        msg = {"role": "assistant", "content": text}
+                        await self.output_queue.put(AdditionalOutputs(msg))
+                elif message_type == "input_text":
+                    msg = {"role": "user", "content": message["text"]}
+                    await self.output_queue.put(AdditionalOutputs(msg))
+
+    async def emit(self):
+        return await wait_for_item(self.output_queue)
+
+    async def receive(self, frame: tuple[int, np.ndarray]) -> None:
+        if not self.client:
+            return
+        audio_float32 = audio_to_float32(frame)
+        await self.client.send_audio(audio_float32)  # type: ignore
+
+    async def shutdown(self):
+        if self.client:
+            await self.client._websocket.close()
+        return super().shutdown()
+
+
+def add_to_chatbot(chatbot, message):
+    chatbot.append(message)
+    return chatbot
+
+
+chatbot = gr.Chatbot(type="messages", value=[])
+stream = Stream(
+    handler=PhonicHandler(),
+    mode="send-receive",
+    modality="audio",
+    additional_inputs=[
+        gr.Dropdown(
+            choices=voice_ids,
+            value="victoria",
+            label="Voice",
+            info="Select a voice from the dropdown",
+        )
+    ],
+    additional_outputs=[chatbot],
+    additional_outputs_handler=add_to_chatbot,
+    ui_args={
+        "title": "Phonic Chat (Powered by FastRTC ⚡️)",
+    },
+    rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
+    concurrency_limit=5 if get_space() else None,
+    time_limit=90 if get_space() else None,
+)
+
+# with stream.ui:
+#     state.change(lambda s: s, inputs=state, outputs=chatbot)
+
+if __name__ == "__main__":
+    if (mode := os.getenv("MODE")) == "UI":
+        stream.ui.launch(server_port=7860)
+    elif mode == "PHONE":
+        stream.fastphone(host="0.0.0.0", port=7860)
+    else:
+        stream.ui.launch(server_port=7860)
--- a/demo/phonic_chat/requirements.txt
+++ b/demo/phonic_chat/requirements.txt
@@ -0,0 +1,74 @@
+# This file was autogenerated by uv via the following command:
+#    uv pip compile requirements.in -o requirements.txt
+aiohappyeyeballs==2.4.6
+    # via aiohttp
+aiohttp==3.11.12
+    # via
+    #   aiohttp-retry
+    #   twilio
+aiohttp-retry==2.9.1
+    # via twilio
+aiosignal==1.3.2
+    # via aiohttp
+attrs==25.1.0
+    # via aiohttp
+certifi==2025.1.31
+    # via requests
+cffi==1.17.1
+    # via sounddevice
+charset-normalizer==3.4.1
+    # via requests
+fastrtc==0.0.1
+    # via -r requirements.in
+frozenlist==1.5.0
+    # via
+    #   aiohttp
+    #   aiosignal
+idna==3.10
+    # via
+    #   requests
+    #   yarl
+isort==6.0.0
+    # via phonic-python
+loguru==0.7.3
+    # via phonic-python
+multidict==6.1.0
+    # via
+    #   aiohttp
+    #   yarl
+numpy==2.2.3
+    # via
+    #   phonic-python
+    #   scipy
+phonic-python==0.1.3
+    # via -r requirements.in
+propcache==0.3.0
+    # via
+    #   aiohttp
+    #   yarl
+pycparser==2.22
+    # via cffi
+pyjwt==2.10.1
+    # via twilio
+python-dotenv==1.0.1
+    # via
+    #   -r requirements.in
+    #   phonic-python
+requests==2.32.3
+    # via
+    #   phonic-python
+    #   twilio
+scipy==1.15.2
+    # via phonic-python
+sounddevice==0.5.1
+    # via phonic-python
+twilio==9.4.6
+    # via -r requirements.in
+typing-extensions==4.12.2
+    # via phonic-python
+urllib3==2.3.0
+    # via requests
+websockets==15.0
+    # via phonic-python
+yarl==1.18.3
+    # via aiohttp
--- a/demo/talk_to_claude/README.md
+++ b/demo/talk_to_claude/README.md
@@ -0,0 +1,15 @@
+---
+title: Talk to Claude
+emoji: 👨‍🦰
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Talk to Anthropic's Claude
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GROQ_API_KEY, secret|ANTHROPIC_API_KEY, secret|ELEVENLABS_API_KEY]
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/talk_to_claude/app.py
+++ b/demo/talk_to_claude/app.py
@@ -0,0 +1,134 @@
+import json
+import os
+import time
+from pathlib import Path
+
+import anthropic
+import gradio as gr
+import numpy as np
+from dotenv import load_dotenv
+from elevenlabs import ElevenLabs
+from fastapi import FastAPI
+from fastapi.responses import HTMLResponse, StreamingResponse
+from fastrtc import (
+    AdditionalOutputs,
+    ReplyOnPause,
+    Stream,
+    get_tts_model,
+    get_twilio_turn_credentials,
+)
+from fastrtc.utils import audio_to_bytes
+from gradio.utils import get_space
+from groq import Groq
+from pydantic import BaseModel
+
+load_dotenv()
+
+groq_client = Groq()
+claude_client = anthropic.Anthropic()
+tts_client = ElevenLabs(api_key=os.environ["ELEVENLABS_API_KEY"])
+
+curr_dir = Path(__file__).parent
+
+tts_model = get_tts_model()
+
+
+def response(
+    audio: tuple[int, np.ndarray],
+    chatbot: list[dict] | None = None,
+):
+    chatbot = chatbot or []
+    messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
+    prompt = groq_client.audio.transcriptions.create(
+        file=("audio-file.mp3", audio_to_bytes(audio)),
+        model="whisper-large-v3-turbo",
+        response_format="verbose_json",
+    ).text
+    chatbot.append({"role": "user", "content": prompt})
+    yield AdditionalOutputs(chatbot)
+    messages.append({"role": "user", "content": prompt})
+    response = claude_client.messages.create(
+        model="claude-3-5-haiku-20241022",
+        max_tokens=512,
+        messages=messages,  # type: ignore
+    )
+    response_text = " ".join(
+        block.text  # type: ignore
+        for block in response.content
+        if getattr(block, "type", None) == "text"
+    )
+    chatbot.append({"role": "assistant", "content": response_text})
+
+    start = time.time()
+
+    print("starting tts", start)
+    for i, chunk in enumerate(tts_model.stream_tts_sync(response_text)):
+        print("chunk", i, time.time() - start)
+        yield chunk
+        print("finished tts", time.time() - start)
+        yield AdditionalOutputs(chatbot)
+
+
+chatbot = gr.Chatbot(type="messages")
+stream = Stream(
+    modality="audio",
+    mode="send-receive",
+    handler=ReplyOnPause(response),
+    additional_outputs_handler=lambda a, b: b,
+    additional_inputs=[chatbot],
+    additional_outputs=[chatbot],
+    rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
+    concurrency_limit=5 if get_space() else None,
+    time_limit=90 if get_space() else None,
+)
+
+
+class Message(BaseModel):
+    role: str
+    content: str
+
+
+class InputData(BaseModel):
+    webrtc_id: str
+    chatbot: list[Message]
+
+
+app = FastAPI()
+stream.mount(app)
+
+
+@app.get("/")
+async def _():
+    rtc_config = get_twilio_turn_credentials() if get_space() else None
+    html_content = (curr_dir / "index.html").read_text()
+    html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
+    return HTMLResponse(content=html_content, status_code=200)
+
+
+@app.post("/input_hook")
+async def _(body: InputData):
+    stream.set_input(body.webrtc_id, body.model_dump()["chatbot"])
+    return {"status": "ok"}
+
+
+@app.get("/outputs")
+def _(webrtc_id: str):
+    async def output_stream():
+        async for output in stream.output_stream(webrtc_id):
+            chatbot = output.args[0]
+            yield f"event: output\ndata: {json.dumps(chatbot[-1])}\n\n"
+
+    return StreamingResponse(output_stream(), media_type="text/event-stream")
+
+
+if __name__ == "__main__":
+    import os
+
+    if (mode := os.getenv("MODE")) == "UI":
+        stream.ui.launch(server_port=7860)
+    elif mode == "PHONE":
+        stream.fastphone(host="0.0.0.0", port=7860)
+    else:
+        import uvicorn
+
+        uvicorn.run(app, host="0.0.0.0", port=7860)
--- a/demo/talk_to_claude/index.html
+++ b/demo/talk_to_claude/index.html
@@ -0,0 +1,546 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>RetroChat Audio</title>
+    <style>
+        body {
+            font-family: monospace;
+            background-color: #1a1a1a;
+            color: #00ff00;
+            margin: 0;
+            padding: 20px;
+            height: 100vh;
+            box-sizing: border-box;
+        }
+
+        .container {
+            display: flex;
+            flex-direction: column;
+            gap: 20px;
+            height: calc(100% - 100px);
+            margin-bottom: 20px;
+        }
+
+        .chat-container {
+            border: 2px solid #00ff00;
+            padding: 20px;
+            display: flex;
+            flex-direction: column;
+            flex-grow: 1;
+            box-sizing: border-box;
+        }
+
+        .controls-container {
+            border: 2px solid #00ff00;
+            padding: 20px;
+            display: flex;
+            align-items: center;
+            gap: 20px;
+            height: 128px;
+            box-sizing: border-box;
+        }
+
+        .visualization-container {
+            flex-grow: 1;
+            display: flex;
+            align-items: center;
+        }
+
+        .box-container {
+            display: flex;
+            justify-content: space-between;
+            height: 64px;
+            width: 100%;
+        }
+
+        .box {
+            height: 100%;
+            width: 8px;
+            background: #00ff00;
+            border-radius: 8px;
+            transition: transform 0.05s ease;
+        }
+
+        .chat-messages {
+            flex-grow: 1;
+            overflow-y: auto;
+            margin-bottom: 20px;
+            padding: 10px;
+            border: 1px solid #00ff00;
+        }
+
+        .message {
+            margin-bottom: 10px;
+            padding: 8px;
+            border-radius: 4px;
+        }
+
+        .message.user {
+            background-color: #003300;
+        }
+
+        .message.assistant {
+            background-color: #002200;
+        }
+
+        button {
+            height: 64px;
+            min-width: 120px;
+            background-color: #000;
+            color: #00ff00;
+            border: 2px solid #00ff00;
+            padding: 10px 20px;
+            font-family: monospace;
+            font-size: 16px;
+            cursor: pointer;
+            transition: all 0.3s;
+        }
+
+        button:hover {
+            border-width: 3px;
+        }
+
+        #audio-output {
+            display: none;
+        }
+
+        /* Retro CRT effect */
+        .crt-overlay {
+            position: absolute;
+            top: 0;
+            left: 0;
+            width: 100%;
+            height: 100%;
+            background: repeating-linear-gradient(0deg,
+                    rgba(0, 255, 0, 0.03),
+                    rgba(0, 255, 0, 0.03) 1px,
+                    transparent 1px,
+                    transparent 2px);
+            pointer-events: none;
+        }
+
+        /* Add these new styles */
+        .icon-with-spinner {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 12px;
+            min-width: 180px;
+        }
+
+        .spinner {
+            width: 20px;
+            height: 20px;
+            border: 2px solid #00ff00;
+            border-top-color: transparent;
+            border-radius: 50%;
+            animation: spin 1s linear infinite;
+            flex-shrink: 0;
+        }
+
+        @keyframes spin {
+            to {
+                transform: rotate(360deg);
+            }
+        }
+
+        .pulse-container {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 12px;
+            min-width: 180px;
+        }
+
+        .pulse-circle {
+            width: 20px;
+            height: 20px;
+            border-radius: 50%;
+            background-color: #00ff00;
+            opacity: 0.2;
+            flex-shrink: 0;
+            transform: translateX(-0%) scale(var(--audio-level, 1));
+            transition: transform 0.1s ease;
+        }
+
+        /* Add styles for typing indicator */
+        .typing-indicator {
+            padding: 8px;
+            background-color: #002200;
+            border-radius: 4px;
+            margin-bottom: 10px;
+            display: none;
+        }
+
+        .dots {
+            display: inline-flex;
+            gap: 4px;
+        }
+
+        .dot {
+            width: 8px;
+            height: 8px;
+            background-color: #00ff00;
+            border-radius: 50%;
+            animation: pulse 1.5s infinite;
+            opacity: 0.5;
+        }
+
+        .dot:nth-child(2) {
+            animation-delay: 0.5s;
+        }
+
+        .dot:nth-child(3) {
+            animation-delay: 1s;
+        }
+
+        @keyframes pulse {
+
+            0%,
+            100% {
+                opacity: 0.5;
+                transform: scale(1);
+            }
+
+            50% {
+                opacity: 1;
+                transform: scale(1.2);
+            }
+        }
+
+        /* Add styles for toast notifications */
+        .toast {
+            position: fixed;
+            top: 20px;
+            left: 50%;
+            transform: translateX(-50%);
+            padding: 16px 24px;
+            border-radius: 4px;
+            font-size: 14px;
+            z-index: 1000;
+            display: none;
+            box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
+        }
+
+        .toast.error {
+            background-color: #f44336;
+            color: white;
+        }
+
+        .toast.warning {
+            background-color: #ffd700;
+            color: black;
+        }
+    </style>
+</head>
+
+<body>
+    <!-- Add toast element after body opening tag -->
+    <div id="error-toast" class="toast"></div>
+    <div class="container">
+        <div class="chat-container">
+            <div class="chat-messages" id="chat-messages"></div>
+            <!-- Move typing indicator outside the chat messages -->
+            <div class="typing-indicator" id="typing-indicator">
+                <div class="dots">
+                    <div class="dot"></div>
+                    <div class="dot"></div>
+                    <div class="dot"></div>
+                </div>
+            </div>
+        </div>
+        <div class="controls-container">
+            <div class="visualization-container">
+                <div class="box-container">
+                    <!-- Boxes will be dynamically added here -->
+                </div>
+            </div>
+            <button id="start-button">Start</button>
+        </div>
+    </div>
+    <audio id="audio-output"></audio>
+
+    <script>
+        let audioContext;
+        let analyser_input, analyser_output;
+        let dataArray_input, dataArray_output;
+        let animationId_input, animationId_output;
+        let chatHistory = [];
+        let peerConnection;
+        let webrtc_id;
+
+        const audioOutput = document.getElementById('audio-output');
+        const startButton = document.getElementById('start-button');
+        const chatMessages = document.getElementById('chat-messages');
+
+        function updateButtonState() {
+            if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
+                startButton.innerHTML = `
+                    <div class="icon-with-spinner">
+                        <div class="spinner"></div>
+                        <span>Connecting...</span>
+                    </div>
+                `;
+            } else if (peerConnection && peerConnection.connectionState === 'connected') {
+                startButton.innerHTML = `
+                    <div class="pulse-container">
+                        <div class="pulse-circle"></div>
+                        <span>Stop</span>
+                    </div>
+                `;
+            } else {
+                startButton.innerHTML = 'Start';
+            }
+        }
+
+        function showError(message) {
+            const toast = document.getElementById('error-toast');
+            toast.textContent = message;
+            toast.className = 'toast error';
+            toast.style.display = 'block';
+
+            // Hide toast after 5 seconds
+            setTimeout(() => {
+                toast.style.display = 'none';
+            }, 5000);
+        }
+
+        async function setupWebRTC() {
+            const config = __RTC_CONFIGURATION__;
+            peerConnection = new RTCPeerConnection(config);
+
+            const timeoutId = setTimeout(() => {
+                const toast = document.getElementById('error-toast');
+                toast.textContent = "Connection is taking longer than usual. Are you on a VPN?";
+                toast.className = 'toast warning';
+                toast.style.display = 'block';
+
+                // Hide warning after 5 seconds
+                setTimeout(() => {
+                    toast.style.display = 'none';
+                }, 5000);
+            }, 5000);
+
+            try {
+                const stream = await navigator.mediaDevices.getUserMedia({
+                    audio: true
+                });
+
+                // Set up input visualization
+                audioContext = new AudioContext();
+                analyser_input = audioContext.createAnalyser();
+                const inputSource = audioContext.createMediaStreamSource(stream);
+                inputSource.connect(analyser_input);
+                analyser_input.fftSize = 64;
+                dataArray_input = new Uint8Array(analyser_input.frequencyBinCount);
+
+                function updateAudioLevel() {
+                    analyser_input.getByteFrequencyData(dataArray_input);
+                    const average = Array.from(dataArray_input).reduce((a, b) => a + b, 0) / dataArray_input.length;
+                    const audioLevel = average / 255;
+
+                    const pulseCircle = document.querySelector('.pulse-circle');
+                    if (pulseCircle) {
+                        pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
+                    }
+
+                    animationId_input = requestAnimationFrame(updateAudioLevel);
+                }
+                updateAudioLevel();
+
+                stream.getTracks().forEach(track => {
+                    peerConnection.addTrack(track, stream);
+                });
+
+                // Add connection state change listener
+                peerConnection.addEventListener('connectionstatechange', () => {
+                    console.log('Connection state:', peerConnection.connectionState);
+                    if (peerConnection.connectionState === 'connected') {
+                        clearTimeout(timeoutId);
+                        const toast = document.getElementById('error-toast');
+                        toast.style.display = 'none';
+                    }
+                    updateButtonState();
+                });
+
+                // Handle incoming audio
+                peerConnection.addEventListener('track', (evt) => {
+                    if (audioOutput.srcObject !== evt.streams[0]) {
+                        audioOutput.srcObject = evt.streams[0];
+                        audioOutput.play();
+
+                        // Set up output visualization
+                        analyser_output = audioContext.createAnalyser();
+                        const outputSource = audioContext.createMediaStreamSource(evt.streams[0]);
+                        outputSource.connect(analyser_output);
+                        analyser_output.fftSize = 2048;
+                        dataArray_output = new Uint8Array(analyser_output.frequencyBinCount);
+                        updateVisualization();
+                    }
+                });
+
+                // Create data channel for messages
+                const dataChannel = peerConnection.createDataChannel('text');
+                dataChannel.onmessage = (event) => {
+                    const eventJson = JSON.parse(event.data);
+                    const typingIndicator = document.getElementById('typing-indicator');
+
+                    if (eventJson.type === "error") {
+                        showError(eventJson.message);
+                    } else if (eventJson.type === "send_input") {
+                        fetch('/input_hook', {
+                            method: 'POST',
+                            headers: {
+                                'Content-Type': 'application/json',
+                            },
+                            body: JSON.stringify({
+                                webrtc_id: webrtc_id,
+                                chatbot: chatHistory
+                            })
+                        });
+                    } else if (eventJson.type === "log") {
+                        if (eventJson.data === "pause_detected") {
+                            typingIndicator.style.display = 'block';
+                            chatMessages.scrollTop = chatMessages.scrollHeight;
+                        } else if (eventJson.data === "response_starting") {
+                            typingIndicator.style.display = 'none';
+                        }
+                    }
+                };
+
+                // Create and send offer
+                const offer = await peerConnection.createOffer();
+                await peerConnection.setLocalDescription(offer);
+
+                await new Promise((resolve) => {
+                    if (peerConnection.iceGatheringState === "complete") {
+                        resolve();
+                    } else {
+                        const checkState = () => {
+                            if (peerConnection.iceGatheringState === "complete") {
+                                peerConnection.removeEventListener("icegatheringstatechange", checkState);
+                                resolve();
+                            }
+                        };
+                        peerConnection.addEventListener("icegatheringstatechange", checkState);
+                    }
+                });
+
+                webrtc_id = Math.random().toString(36).substring(7);
+
+                const response = await fetch('/webrtc/offer', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({
+                        sdp: peerConnection.localDescription.sdp,
+                        type: peerConnection.localDescription.type,
+                        webrtc_id: webrtc_id
+                    })
+                });
+
+                const serverResponse = await response.json();
+
+                if (serverResponse.status === 'failed') {
+                    showError(serverResponse.meta.error === 'concurrency_limit_reached'
+                        ? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
+                        : serverResponse.meta.error);
+                    stop();
+                    return;
+                }
+
+                await peerConnection.setRemoteDescription(serverResponse);
+
+                // Start visualization
+                updateVisualization();
+
+                // create event stream to receive messages from /output
+                const eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
+                eventSource.addEventListener("output", (event) => {
+                    const eventJson = JSON.parse(event.data);
+                    addMessage(eventJson.role, eventJson.content);
+                });
+            } catch (err) {
+                clearTimeout(timeoutId);
+                console.error('Error setting up WebRTC:', err);
+                showError('Failed to establish connection. Please try again.');
+                stop();
+            }
+        }
+
+        function addMessage(role, content) {
+            const messageDiv = document.createElement('div');
+            messageDiv.classList.add('message', role);
+            messageDiv.textContent = content;
+            chatMessages.appendChild(messageDiv);
+            chatMessages.scrollTop = chatMessages.scrollHeight;
+            chatHistory.push({ role, content });
+        }
+
+        // Add this after other const declarations
+        const boxContainer = document.querySelector('.box-container');
+        const numBars = 32;
+        for (let i = 0; i < numBars; i++) {
+            const box = document.createElement('div');
+            box.className = 'box';
+            boxContainer.appendChild(box);
+        }
+
+        // Replace the draw function with updateVisualization
+        function updateVisualization() {
+            animationId_output = requestAnimationFrame(updateVisualization);
+
+            analyser_output.getByteFrequencyData(dataArray_output);
+            const bars = document.querySelectorAll('.box');
+
+            for (let i = 0; i < bars.length; i++) {
+                const barHeight = (dataArray_output[i] / 255) * 2;
+                bars[i].style.transform = `scaleY(${Math.max(0.1, barHeight)})`;
+            }
+        }
+
+        function stop() {
+            if (peerConnection) {
+                if (peerConnection.getTransceivers) {
+                    peerConnection.getTransceivers().forEach(transceiver => {
+                        if (transceiver.stop) {
+                            transceiver.stop();
+                        }
+                    });
+                }
+
+                if (peerConnection.getSenders) {
+                    peerConnection.getSenders().forEach(sender => {
+                        if (sender.track && sender.track.stop) sender.track.stop();
+                    });
+                }
+
+                peerConnection.close();
+            }
+
+            if (animationId_input) {
+                cancelAnimationFrame(animationId_input);
+            }
+            if (animationId_output) {
+                cancelAnimationFrame(animationId_output);
+            }
+            if (audioContext) {
+                audioContext.close();
+            }
+
+            updateButtonState();
+        }
+
+        startButton.addEventListener('click', () => {
+            if (startButton.textContent === 'Start') {
+                setupWebRTC();
+            } else {
+                stop();
+            }
+        });
+    </script>
+</body>
+
+</html>
--- a/demo/talk_to_claude/requirements.txt
+++ b/demo/talk_to_claude/requirements.txt
@@ -0,0 +1,6 @@
+fastrtc[vad, tts]
+elevenlabs
+groq
+anthropic
+twilio
+python-dotenv
--- a/demo/talk_to_gemini/README.md
+++ b/demo/talk_to_gemini/README.md
@@ -0,0 +1,15 @@
+---
+title: Talk to Gemini
+emoji: ♊️
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Talk to Gemini using Google's multimodal API
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GEMINI_API_KEY]
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/talk_to_gemini/README_gradio.md
+++ b/demo/talk_to_gemini/README_gradio.md
@@ -0,0 +1,15 @@
+---
+title: Talk to Gemini (Gradio UI)
+emoji: ♊️
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Talk to Gemini (Gradio UI)
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GEMINI_API_KEY]
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/talk_to_gemini/app.py
+++ b/demo/talk_to_gemini/app.py
@@ -0,0 +1,181 @@
+import asyncio
+import base64
+import json
+import os
+import pathlib
+from typing import AsyncGenerator, Literal
+
+import gradio as gr
+import numpy as np
+from dotenv import load_dotenv
+from fastapi import FastAPI
+from fastapi.responses import HTMLResponse
+from fastrtc import (
+    AsyncStreamHandler,
+    Stream,
+    get_twilio_turn_credentials,
+    wait_for_item,
+)
+from google import genai
+from google.genai.types import (
+    LiveConnectConfig,
+    PrebuiltVoiceConfig,
+    SpeechConfig,
+    VoiceConfig,
+)
+from gradio.utils import get_space
+from pydantic import BaseModel
+
+current_dir = pathlib.Path(__file__).parent
+
+load_dotenv()
+
+
+def encode_audio(data: np.ndarray) -> str:
+    """Encode Audio data to send to the server"""
+    return base64.b64encode(data.tobytes()).decode("UTF-8")
+
+
+class GeminiHandler(AsyncStreamHandler):
+    """Handler for the Gemini API"""
+
+    def __init__(
+        self,
+        expected_layout: Literal["mono"] = "mono",
+        output_sample_rate: int = 24000,
+        output_frame_size: int = 480,
+    ) -> None:
+        super().__init__(
+            expected_layout,
+            output_sample_rate,
+            output_frame_size,
+            input_sample_rate=16000,
+        )
+        self.input_queue: asyncio.Queue = asyncio.Queue()
+        self.output_queue: asyncio.Queue = asyncio.Queue()
+        self.quit: asyncio.Event = asyncio.Event()
+
+    def copy(self) -> "GeminiHandler":
+        return GeminiHandler(
+            expected_layout="mono",
+            output_sample_rate=self.output_sample_rate,
+            output_frame_size=self.output_frame_size,
+        )
+
+    async def start_up(self):
+        if not self.phone_mode:
+            await self.wait_for_args()
+            api_key, voice_name = self.latest_args[1:]
+        else:
+            api_key, voice_name = None, "Puck"
+
+        client = genai.Client(
+            api_key=api_key or os.getenv("GEMINI_API_KEY"),
+            http_options={"api_version": "v1alpha"},
+        )
+
+        config = LiveConnectConfig(
+            response_modalities=["AUDIO"],  # type: ignore
+            speech_config=SpeechConfig(
+                voice_config=VoiceConfig(
+                    prebuilt_voice_config=PrebuiltVoiceConfig(
+                        voice_name=voice_name,
+                    )
+                )
+            ),
+        )
+        async with client.aio.live.connect(
+            model="gemini-2.0-flash-exp", config=config
+        ) as session:
+            async for audio in session.start_stream(
+                stream=self.stream(), mime_type="audio/pcm"
+            ):
+                if audio.data:
+                    array = np.frombuffer(audio.data, dtype=np.int16)
+                    self.output_queue.put_nowait((self.output_sample_rate, array))
+
+    async def stream(self) -> AsyncGenerator[bytes, None]:
+        while not self.quit.is_set():
+            try:
+                audio = await asyncio.wait_for(self.input_queue.get(), 0.1)
+                yield audio
+            except (asyncio.TimeoutError, TimeoutError):
+                pass
+
+    async def receive(self, frame: tuple[int, np.ndarray]) -> None:
+        _, array = frame
+        array = array.squeeze()
+        audio_message = encode_audio(array)
+        self.input_queue.put_nowait(audio_message)
+
+    async def emit(self) -> tuple[int, np.ndarray] | None:
+        return await wait_for_item(self.output_queue)
+
+    def shutdown(self) -> None:
+        self.quit.set()
+
+
+stream = Stream(
+    modality="audio",
+    mode="send-receive",
+    handler=GeminiHandler(),
+    rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
+    concurrency_limit=5 if get_space() else None,
+    time_limit=90 if get_space() else None,
+    additional_inputs=[
+        gr.Textbox(
+            label="API Key",
+            type="password",
+            value=os.getenv("GEMINI_API_KEY") if not get_space() else "",
+        ),
+        gr.Dropdown(
+            label="Voice",
+            choices=[
+                "Puck",
+                "Charon",
+                "Kore",
+                "Fenrir",
+                "Aoede",
+            ],
+            value="Puck",
+        ),
+    ],
+)
+
+
+class InputData(BaseModel):
+    webrtc_id: str
+    voice_name: str
+    api_key: str
+
+
+app = FastAPI()
+
+stream.mount(app)
+
+
+@app.post("/input_hook")
+async def _(body: InputData):
+    stream.set_input(body.webrtc_id, body.api_key, body.voice_name)
+    return {"status": "ok"}
+
+
+@app.get("/")
+async def index():
+    rtc_config = get_twilio_turn_credentials() if get_space() else None
+    html_content = (current_dir / "index.html").read_text()
+    html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
+    return HTMLResponse(content=html_content)
+
+
+if __name__ == "__main__":
+    import os
+
+    if (mode := os.getenv("MODE")) == "UI":
+        stream.ui.launch(server_port=7860)
+    elif mode == "PHONE":
+        stream.fastphone(host="0.0.0.0", port=7860)
+    else:
+        import uvicorn
+
+        uvicorn.run(app, host="0.0.0.0", port=7860)
--- a/demo/talk_to_gemini/index.html
+++ b/demo/talk_to_gemini/index.html
@@ -0,0 +1,452 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Gemini Voice Chat</title>
+    <style>
+        :root {
+            --color-accent: #6366f1;
+            --color-background: #0f172a;
+            --color-surface: #1e293b;
+            --color-text: #e2e8f0;
+            --boxSize: 8px;
+            --gutter: 4px;
+        }
+
+        body {
+            margin: 0;
+            padding: 0;
+            background-color: var(--color-background);
+            color: var(--color-text);
+            font-family: system-ui, -apple-system, sans-serif;
+            min-height: 100vh;
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+            justify-content: center;
+        }
+
+        .container {
+            width: 90%;
+            max-width: 800px;
+            background-color: var(--color-surface);
+            padding: 2rem;
+            border-radius: 1rem;
+            box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.25);
+        }
+
+        .wave-container {
+            position: relative;
+            display: flex;
+            min-height: 100px;
+            max-height: 128px;
+            justify-content: center;
+            align-items: center;
+            margin: 2rem 0;
+        }
+
+        .box-container {
+            display: flex;
+            justify-content: space-between;
+            height: 64px;
+            width: 100%;
+        }
+
+        .box {
+            height: 100%;
+            width: var(--boxSize);
+            background: var(--color-accent);
+            border-radius: 8px;
+            transition: transform 0.05s ease;
+        }
+
+        .controls {
+            display: grid;
+            gap: 1rem;
+            margin-bottom: 2rem;
+        }
+
+        .input-group {
+            display: flex;
+            flex-direction: column;
+            gap: 0.5rem;
+        }
+
+        label {
+            font-size: 0.875rem;
+            font-weight: 500;
+        }
+
+        input,
+        select {
+            padding: 0.75rem;
+            border-radius: 0.5rem;
+            border: 1px solid rgba(255, 255, 255, 0.1);
+            background-color: var(--color-background);
+            color: var(--color-text);
+            font-size: 1rem;
+        }
+
+        button {
+            padding: 1rem 2rem;
+            border-radius: 0.5rem;
+            border: none;
+            background-color: var(--color-accent);
+            color: white;
+            font-weight: 600;
+            cursor: pointer;
+            transition: all 0.2s ease;
+        }
+
+        button:hover {
+            opacity: 0.9;
+            transform: translateY(-1px);
+        }
+
+        .icon-with-spinner {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 12px;
+            min-width: 180px;
+        }
+
+        .spinner {
+            width: 20px;
+            height: 20px;
+            border: 2px solid white;
+            border-top-color: transparent;
+            border-radius: 50%;
+            animation: spin 1s linear infinite;
+            flex-shrink: 0;
+        }
+
+        @keyframes spin {
+            to {
+                transform: rotate(360deg);
+            }
+        }
+
+        .pulse-container {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 12px;
+            min-width: 180px;
+        }
+
+        .pulse-circle {
+            width: 20px;
+            height: 20px;
+            border-radius: 50%;
+            background-color: white;
+            opacity: 0.2;
+            flex-shrink: 0;
+            transform: translateX(-0%) scale(var(--audio-level, 1));
+            transition: transform 0.1s ease;
+        }
+
+        /* Add styles for toast notifications */
+        .toast {
+            position: fixed;
+            top: 20px;
+            left: 50%;
+            transform: translateX(-50%);
+            padding: 16px 24px;
+            border-radius: 4px;
+            font-size: 14px;
+            z-index: 1000;
+            display: none;
+            box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
+        }
+
+        .toast.error {
+            background-color: #f44336;
+            color: white;
+        }
+
+        .toast.warning {
+            background-color: #ffd700;
+            color: black;
+        }
+    </style>
+</head>
+
+
+<body>
+    <!-- Add toast element after body opening tag -->
+    <div id="error-toast" class="toast"></div>
+    <div style="text-align: center">
+        <h1>Gemini Voice Chat</h1>
+        <p>Speak with Gemini using real-time audio streaming</p>
+        <p>
+            Get a Gemini API key
+            <a href="https://ai.google.dev/gemini-api/docs/api-key">here</a>
+        </p>
+    </div>
+    <div class="container">
+        <div class="controls">
+            <div class="input-group">
+                <label for="api-key">API Key</label>
+                <input type="password" id="api-key" placeholder="Enter your API key">
+            </div>
+            <div class="input-group">
+                <label for="voice">Voice</label>
+                <select id="voice">
+                    <option value="Puck">Puck</option>
+                    <option value="Charon">Charon</option>
+                    <option value="Kore">Kore</option>
+                    <option value="Fenrir">Fenrir</option>
+                    <option value="Aoede">Aoede</option>
+                </select>
+            </div>
+        </div>
+
+        <div class="wave-container">
+            <div class="box-container">
+                <!-- Boxes will be dynamically added here -->
+            </div>
+        </div>
+
+        <button id="start-button">Start Recording</button>
+    </div>
+
+    <audio id="audio-output"></audio>
+
+    <script>
+        let peerConnection;
+        let audioContext;
+        let dataChannel;
+        let isRecording = false;
+        let webrtc_id;
+
+        const startButton = document.getElementById('start-button');
+        const apiKeyInput = document.getElementById('api-key');
+        const voiceSelect = document.getElementById('voice');
+        const audioOutput = document.getElementById('audio-output');
+        const boxContainer = document.querySelector('.box-container');
+
+        const numBars = 32;
+        for (let i = 0; i < numBars; i++) {
+            const box = document.createElement('div');
+            box.className = 'box';
+            boxContainer.appendChild(box);
+        }
+
+        function updateButtonState() {
+            if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
+                startButton.innerHTML = `
+                    <div class="icon-with-spinner">
+                        <div class="spinner"></div>
+                        <span>Connecting...</span>
+                    </div>
+                `;
+            } else if (peerConnection && peerConnection.connectionState === 'connected') {
+                startButton.innerHTML = `
+                    <div class="pulse-container">
+                        <div class="pulse-circle"></div>
+                        <span>Stop Recording</span>
+                    </div>
+                `;
+            } else {
+                startButton.innerHTML = 'Start Recording';
+            }
+        }
+
+        function showError(message) {
+            const toast = document.getElementById('error-toast');
+            toast.textContent = message;
+            toast.className = 'toast error';
+            toast.style.display = 'block';
+
+            // Hide toast after 5 seconds
+            setTimeout(() => {
+                toast.style.display = 'none';
+            }, 5000);
+        }
+
+        async function setupWebRTC() {
+            const config = __RTC_CONFIGURATION__;
+            peerConnection = new RTCPeerConnection(config);
+            webrtc_id = Math.random().toString(36).substring(7);
+
+            const timeoutId = setTimeout(() => {
+                const toast = document.getElementById('error-toast');
+                toast.textContent = "Connection is taking longer than usual. Are you on a VPN?";
+                toast.className = 'toast warning';
+                toast.style.display = 'block';
+
+                // Hide warning after 5 seconds
+                setTimeout(() => {
+                    toast.style.display = 'none';
+                }, 5000);
+            }, 5000);
+
+            try {
+                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+                stream.getTracks().forEach(track => peerConnection.addTrack(track, stream));
+
+                // Update audio visualization setup
+                audioContext = new AudioContext();
+                analyser_input = audioContext.createAnalyser();
+                const source = audioContext.createMediaStreamSource(stream);
+                source.connect(analyser_input);
+                analyser_input.fftSize = 64;
+                dataArray_input = new Uint8Array(analyser_input.frequencyBinCount);
+
+                function updateAudioLevel() {
+                    analyser_input.getByteFrequencyData(dataArray_input);
+                    const average = Array.from(dataArray_input).reduce((a, b) => a + b, 0) / dataArray_input.length;
+                    const audioLevel = average / 255;
+
+                    const pulseCircle = document.querySelector('.pulse-circle');
+                    if (pulseCircle) {
+                        console.log("audioLevel", audioLevel);
+                        pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
+                    }
+
+                    animationId = requestAnimationFrame(updateAudioLevel);
+                }
+                updateAudioLevel();
+
+                // Add connection state change listener
+                peerConnection.addEventListener('connectionstatechange', () => {
+                    console.log('connectionstatechange', peerConnection.connectionState);
+                    if (peerConnection.connectionState === 'connected') {
+                        clearTimeout(timeoutId);
+                        const toast = document.getElementById('error-toast');
+                        toast.style.display = 'none';
+                    }
+                    updateButtonState();
+                });
+
+                // Handle incoming audio
+                peerConnection.addEventListener('track', (evt) => {
+                    if (audioOutput && audioOutput.srcObject !== evt.streams[0]) {
+                        audioOutput.srcObject = evt.streams[0];
+                        audioOutput.play();
+
+                        // Set up audio visualization on the output stream
+                        audioContext = new AudioContext();
+                        analyser = audioContext.createAnalyser();
+                        const source = audioContext.createMediaStreamSource(evt.streams[0]);
+                        source.connect(analyser);
+                        analyser.fftSize = 2048;
+                        dataArray = new Uint8Array(analyser.frequencyBinCount);
+                        updateVisualization();
+                    }
+                });
+
+                // Create data channel for messages
+                dataChannel = peerConnection.createDataChannel('text');
+                dataChannel.onmessage = (event) => {
+                    const eventJson = JSON.parse(event.data);
+                    if (eventJson.type === "error") {
+                        showError(eventJson.message);
+                    } else if (eventJson.type === "send_input") {
+                        fetch('/input_hook', {
+                            method: 'POST',
+                            headers: {
+                                'Content-Type': 'application/json',
+                            },
+                            body: JSON.stringify({
+                                webrtc_id: webrtc_id,
+                                api_key: apiKeyInput.value,
+                                voice_name: voiceSelect.value
+                            })
+                        });
+                    }
+                };
+
+                // Create and send offer
+                const offer = await peerConnection.createOffer();
+                await peerConnection.setLocalDescription(offer);
+
+                await new Promise((resolve) => {
+                    if (peerConnection.iceGatheringState === "complete") {
+                        resolve();
+                    } else {
+                        const checkState = () => {
+                            if (peerConnection.iceGatheringState === "complete") {
+                                peerConnection.removeEventListener("icegatheringstatechange", checkState);
+                                resolve();
+                            }
+                        };
+                        peerConnection.addEventListener("icegatheringstatechange", checkState);
+                    }
+                });
+
+                const response = await fetch('/webrtc/offer', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({
+                        sdp: peerConnection.localDescription.sdp,
+                        type: peerConnection.localDescription.type,
+                        webrtc_id: webrtc_id,
+                    })
+                });
+
+                const serverResponse = await response.json();
+
+                if (serverResponse.status === 'failed') {
+                    showError(serverResponse.meta.error === 'concurrency_limit_reached'
+                        ? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
+                        : serverResponse.meta.error);
+                    stop();
+                    startButton.textContent = 'Start Recording';
+                    return;
+                }
+
+                await peerConnection.setRemoteDescription(serverResponse);
+            } catch (err) {
+                clearTimeout(timeoutId);
+                console.error('Error setting up WebRTC:', err);
+                showError('Failed to establish connection. Please try again.');
+                stop();
+                startButton.textContent = 'Start Recording';
+            }
+        }
+
+        function updateVisualization() {
+            if (!analyser) return;
+
+            analyser.getByteFrequencyData(dataArray);
+            const bars = document.querySelectorAll('.box');
+
+            for (let i = 0; i < bars.length; i++) {
+                const barHeight = (dataArray[i] / 255) * 2;
+                bars[i].style.transform = `scaleY(${Math.max(0.1, barHeight)})`;
+            }
+
+            animationId = requestAnimationFrame(updateVisualization);
+        }
+
+        function stopWebRTC() {
+            if (peerConnection) {
+                peerConnection.close();
+            }
+            if (animationId) {
+                cancelAnimationFrame(animationId);
+            }
+            if (audioContext) {
+                audioContext.close();
+            }
+            updateButtonState();
+        }
+
+        startButton.addEventListener('click', () => {
+            if (!isRecording) {
+                setupWebRTC();
+                startButton.classList.add('recording');
+            } else {
+                stopWebRTC();
+                startButton.classList.remove('recording');
+            }
+            isRecording = !isRecording;
+        });
+    </script>
+</body>
+
+</html>
--- a/demo/talk_to_gemini/requirements.txt
+++ b/demo/talk_to_gemini/requirements.txt
@@ -0,0 +1,4 @@
+fastrtc
+python-dotenv
+google-genai
+twilio
--- a/demo/talk_to_openai/README.md
+++ b/demo/talk_to_openai/README.md
@@ -0,0 +1,15 @@
+---
+title: Talk to OpenAI
+emoji: 🗣️
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Talk to OpenAI using their multimodal API
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|OPENAI_API_KEY]
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/talk_to_openai/README_gradio.md
+++ b/demo/talk_to_openai/README_gradio.md
@@ -0,0 +1,15 @@
+---
+title: Talk to OpenAI (Gradio UI)
+emoji: 🗣️
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Talk to OpenAI (Gradio UI)
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|OPENAI_API_KEY]
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/talk_to_openai/app.py
+++ b/demo/talk_to_openai/app.py
@@ -0,0 +1,141 @@
+import asyncio
+import base64
+import json
+from pathlib import Path
+
+import gradio as gr
+import numpy as np
+import openai
+from dotenv import load_dotenv
+from fastapi import FastAPI
+from fastapi.responses import HTMLResponse, StreamingResponse
+from fastrtc import (
+    AdditionalOutputs,
+    AsyncStreamHandler,
+    Stream,
+    get_twilio_turn_credentials,
+    wait_for_item,
+)
+from gradio.utils import get_space
+from openai.types.beta.realtime import ResponseAudioTranscriptDoneEvent
+
+load_dotenv()
+
+cur_dir = Path(__file__).parent
+
+SAMPLE_RATE = 24000
+
+
+class OpenAIHandler(AsyncStreamHandler):
+    def __init__(
+        self,
+    ) -> None:
+        super().__init__(
+            expected_layout="mono",
+            output_sample_rate=SAMPLE_RATE,
+            output_frame_size=480,
+            input_sample_rate=SAMPLE_RATE,
+        )
+        self.connection = None
+        self.output_queue = asyncio.Queue()
+
+    def copy(self):
+        return OpenAIHandler()
+
+    async def start_up(
+        self,
+    ):
+        """Connect to realtime API. Run forever in separate thread to keep connection open."""
+        self.client = openai.AsyncOpenAI()
+        async with self.client.beta.realtime.connect(
+            model="gpt-4o-mini-realtime-preview-2024-12-17"
+        ) as conn:
+            await conn.session.update(
+                session={"turn_detection": {"type": "server_vad"}}
+            )
+            self.connection = conn
+            async for event in self.connection:
+                if event.type == "response.audio_transcript.done":
+                    await self.output_queue.put(AdditionalOutputs(event))
+                if event.type == "response.audio.delta":
+                    await self.output_queue.put(
+                        (
+                            self.output_sample_rate,
+                            np.frombuffer(
+                                base64.b64decode(event.delta), dtype=np.int16
+                            ).reshape(1, -1),
+                        ),
+                    )
+
+    async def receive(self, frame: tuple[int, np.ndarray]) -> None:
+        if not self.connection:
+            return
+        _, array = frame
+        array = array.squeeze()
+        audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
+        await self.connection.input_audio_buffer.append(audio=audio_message)  # type: ignore
+
+    async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
+        return await wait_for_item(self.output_queue)
+
+    async def shutdown(self) -> None:
+        if self.connection:
+            await self.connection.close()
+            self.connection = None
+
+
+def update_chatbot(chatbot: list[dict], response: ResponseAudioTranscriptDoneEvent):
+    chatbot.append({"role": "assistant", "content": response.transcript})
+    return chatbot
+
+
+chatbot = gr.Chatbot(type="messages")
+latest_message = gr.Textbox(type="text", visible=False)
+stream = Stream(
+    OpenAIHandler(),
+    mode="send-receive",
+    modality="audio",
+    additional_inputs=[chatbot],
+    additional_outputs=[chatbot],
+    additional_outputs_handler=update_chatbot,
+    rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
+    concurrency_limit=5 if get_space() else None,
+    time_limit=90 if get_space() else None,
+)
+
+app = FastAPI()
+
+stream.mount(app)
+
+
+@app.get("/")
+async def _():
+    rtc_config = get_twilio_turn_credentials() if get_space() else None
+    html_content = (cur_dir / "index.html").read_text()
+    html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
+    return HTMLResponse(content=html_content)
+
+
+@app.get("/outputs")
+def _(webrtc_id: str):
+    async def output_stream():
+        import json
+
+        async for output in stream.output_stream(webrtc_id):
+            s = json.dumps({"role": "assistant", "content": output.args[0].transcript})
+            yield f"event: output\ndata: {s}\n\n"
+
+    return StreamingResponse(output_stream(), media_type="text/event-stream")
+
+
+if __name__ == "__main__":
+    import os
+
+    if (mode := os.getenv("MODE")) == "UI":
+        stream.ui.launch(server_port=7860)
+    elif mode == "PHONE":
+        stream.fastphone(host="0.0.0.0", port=7860)
+    else:
+        import uvicorn
+
+        uvicorn.run(app, host="0.0.0.0", port=7860)
--- a/demo/talk_to_openai/index.html
+++ b/demo/talk_to_openai/index.html
@@ -0,0 +1,404 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>OpenAI Real-Time Chat</title>
+    <style>
+        body {
+            font-family: "SF Pro Display", -apple-system, BlinkMacSystemFont, sans-serif;
+            background-color: #0a0a0a;
+            color: #ffffff;
+            margin: 0;
+            padding: 20px;
+            height: 100vh;
+            box-sizing: border-box;
+        }
+
+        .container {
+            max-width: 800px;
+            margin: 0 auto;
+            height: calc(100% - 100px);
+        }
+
+        .logo {
+            text-align: center;
+            margin-bottom: 40px;
+        }
+
+        .chat-container {
+            border: 1px solid #333;
+            padding: 20px;
+            height: 90%;
+            box-sizing: border-box;
+            display: flex;
+            flex-direction: column;
+        }
+
+        .chat-messages {
+            flex-grow: 1;
+            overflow-y: auto;
+            margin-bottom: 20px;
+            padding: 10px;
+        }
+
+        .message {
+            margin-bottom: 20px;
+            padding: 12px;
+            border-radius: 4px;
+            font-size: 16px;
+            line-height: 1.5;
+        }
+
+        .message.user {
+            background-color: #1a1a1a;
+            margin-left: 20%;
+        }
+
+        .message.assistant {
+            background-color: #262626;
+            margin-right: 20%;
+        }
+
+        .controls {
+            text-align: center;
+            margin-top: 20px;
+        }
+
+        button {
+            background-color: transparent;
+            color: #ffffff;
+            border: 1px solid #ffffff;
+            padding: 12px 24px;
+            font-family: inherit;
+            font-size: 16px;
+            cursor: pointer;
+            transition: all 0.3s;
+            text-transform: uppercase;
+            letter-spacing: 1px;
+        }
+
+        button:hover {
+            border-width: 2px;
+            transform: scale(1.02);
+            box-shadow: 0 0 10px rgba(255, 255, 255, 0.2);
+        }
+
+        #audio-output {
+            display: none;
+        }
+
+        .icon-with-spinner {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 12px;
+            min-width: 180px;
+        }
+
+        .spinner {
+            width: 20px;
+            height: 20px;
+            border: 2px solid #ffffff;
+            border-top-color: transparent;
+            border-radius: 50%;
+            animation: spin 1s linear infinite;
+            flex-shrink: 0;
+        }
+
+        @keyframes spin {
+            to {
+                transform: rotate(360deg);
+            }
+        }
+
+        .pulse-container {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 12px;
+            min-width: 180px;
+        }
+
+        .pulse-circle {
+            width: 20px;
+            height: 20px;
+            border-radius: 50%;
+            background-color: #ffffff;
+            opacity: 0.2;
+            flex-shrink: 0;
+            transform: translateX(-0%) scale(var(--audio-level, 1));
+            transition: transform 0.1s ease;
+        }
+
+        /* Add styles for toast notifications */
+        .toast {
+            position: fixed;
+            top: 20px;
+            left: 50%;
+            transform: translateX(-50%);
+            padding: 16px 24px;
+            border-radius: 4px;
+            font-size: 14px;
+            z-index: 1000;
+            display: none;
+            box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
+        }
+
+        .toast.error {
+            background-color: #f44336;
+            color: white;
+        }
+
+        .toast.warning {
+            background-color: #ffd700;
+            color: black;
+        }
+    </style>
+</head>
+
+<body>
+    <!-- Add toast element after body opening tag -->
+    <div id="error-toast" class="toast"></div>
+    <div class="container">
+        <div class="logo">
+            <h1>OpenAI Real-Time Chat</h1>
+        </div>
+        <div class="chat-container">
+            <div class="chat-messages" id="chat-messages"></div>
+        </div>
+        <div class="controls">
+            <button id="start-button">Start Conversation</button>
+        </div>
+    </div>
+    <audio id="audio-output"></audio>
+
+    <script>
+        let peerConnection;
+        let webrtc_id;
+        const audioOutput = document.getElementById('audio-output');
+        const startButton = document.getElementById('start-button');
+        const chatMessages = document.getElementById('chat-messages');
+
+        let audioLevel = 0;
+        let animationFrame;
+        let audioContext, analyser, audioSource;
+
+        function updateButtonState() {
+            const button = document.getElementById('start-button');
+            if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
+                button.innerHTML = `
+                    <div class="icon-with-spinner">
+                        <div class="spinner"></div>
+                        <span>Connecting...</span>
+                    </div>
+                `;
+            } else if (peerConnection && peerConnection.connectionState === 'connected') {
+                button.innerHTML = `
+                    <div class="pulse-container">
+                        <div class="pulse-circle"></div>
+                        <span>Stop Conversation</span>
+                    </div>
+                `;
+            } else {
+                button.innerHTML = 'Start Conversation';
+            }
+        }
+
+        function setupAudioVisualization(stream) {
+            audioContext = new (window.AudioContext || window.webkitAudioContext)();
+            analyser = audioContext.createAnalyser();
+            audioSource = audioContext.createMediaStreamSource(stream);
+            audioSource.connect(analyser);
+            analyser.fftSize = 64;
+            const dataArray = new Uint8Array(analyser.frequencyBinCount);
+
+            function updateAudioLevel() {
+                analyser.getByteFrequencyData(dataArray);
+                const average = Array.from(dataArray).reduce((a, b) => a + b, 0) / dataArray.length;
+                audioLevel = average / 255;
+
+                // Update CSS variable instead of rebuilding the button
+                const pulseCircle = document.querySelector('.pulse-circle');
+                if (pulseCircle) {
+                    pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
+                }
+
+                animationFrame = requestAnimationFrame(updateAudioLevel);
+            }
+            updateAudioLevel();
+        }
+
+        function showError(message) {
+            const toast = document.getElementById('error-toast');
+            toast.textContent = message;
+            toast.style.display = 'block';
+
+            // Hide toast after 5 seconds
+            setTimeout(() => {
+                toast.style.display = 'none';
+            }, 5000);
+        }
+
+        async function setupWebRTC() {
+            isConnecting = true;
+            const config = __RTC_CONFIGURATION__;
+            peerConnection = new RTCPeerConnection(config);
+
+            const timeoutId = setTimeout(() => {
+                const toast = document.getElementById('error-toast');
+                toast.textContent = "Connection is taking longer than usual. Are you on a VPN?";
+                toast.className = 'toast warning';
+                toast.style.display = 'block';
+
+                // Hide warning after 5 seconds
+                setTimeout(() => {
+                    toast.style.display = 'none';
+                }, 5000);
+            }, 5000);
+
+            try {
+                const stream = await navigator.mediaDevices.getUserMedia({
+                    audio: true
+                });
+
+                setupAudioVisualization(stream);
+
+                stream.getTracks().forEach(track => {
+                    peerConnection.addTrack(track, stream);
+                });
+
+                peerConnection.addEventListener('track', (evt) => {
+                    if (audioOutput.srcObject !== evt.streams[0]) {
+                        audioOutput.srcObject = evt.streams[0];
+                        audioOutput.play();
+                    }
+                });
+
+                const dataChannel = peerConnection.createDataChannel('text');
+                dataChannel.onmessage = (event) => {
+                    const eventJson = JSON.parse(event.data);
+                    if (eventJson.type === "error") {
+                        showError(eventJson.message);
+                    }
+                };
+
+                const offer = await peerConnection.createOffer();
+                await peerConnection.setLocalDescription(offer);
+
+                await new Promise((resolve) => {
+                    if (peerConnection.iceGatheringState === "complete") {
+                        resolve();
+                    } else {
+                        const checkState = () => {
+                            if (peerConnection.iceGatheringState === "complete") {
+                                peerConnection.removeEventListener("icegatheringstatechange", checkState);
+                                resolve();
+                            }
+                        };
+                        peerConnection.addEventListener("icegatheringstatechange", checkState);
+                    }
+                });
+
+                peerConnection.addEventListener('connectionstatechange', () => {
+                    console.log('connectionstatechange', peerConnection.connectionState);
+                    if (peerConnection.connectionState === 'connected') {
+                        clearTimeout(timeoutId);
+                        const toast = document.getElementById('error-toast');
+                        toast.style.display = 'none';
+                    }
+                    updateButtonState();
+                });
+
+                webrtc_id = Math.random().toString(36).substring(7);
+
+                const response = await fetch('/webrtc/offer', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({
+                        sdp: peerConnection.localDescription.sdp,
+                        type: peerConnection.localDescription.type,
+                        webrtc_id: webrtc_id
+                    })
+                });
+
+                const serverResponse = await response.json();
+
+                if (serverResponse.status === 'failed') {
+                    showError(serverResponse.meta.error === 'concurrency_limit_reached'
+                        ? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
+                        : serverResponse.meta.error);
+                    stop();
+                    return;
+                }
+
+                await peerConnection.setRemoteDescription(serverResponse);
+
+                const eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
+                eventSource.addEventListener("output", (event) => {
+                    const eventJson = JSON.parse(event.data);
+                    addMessage("assistant", eventJson.content);
+
+                });
+            } catch (err) {
+                clearTimeout(timeoutId);
+                console.error('Error setting up WebRTC:', err);
+                showError('Failed to establish connection. Please try again.');
+                stop();
+            }
+        }
+
+        function addMessage(role, content) {
+            const messageDiv = document.createElement('div');
+            messageDiv.classList.add('message', role);
+            messageDiv.textContent = content;
+            chatMessages.appendChild(messageDiv);
+            chatMessages.scrollTop = chatMessages.scrollHeight;
+        }
+
+        function stop() {
+            if (animationFrame) {
+                cancelAnimationFrame(animationFrame);
+            }
+            if (audioContext) {
+                audioContext.close();
+                audioContext = null;
+                analyser = null;
+                audioSource = null;
+            }
+            if (peerConnection) {
+                if (peerConnection.getTransceivers) {
+                    peerConnection.getTransceivers().forEach(transceiver => {
+                        if (transceiver.stop) {
+                            transceiver.stop();
+                        }
+                    });
+                }
+
+                if (peerConnection.getSenders) {
+                    peerConnection.getSenders().forEach(sender => {
+                        if (sender.track && sender.track.stop) sender.track.stop();
+                    });
+                }
+                console.log('closing');
+                peerConnection.close();
+            }
+            updateButtonState();
+            audioLevel = 0;
+        }
+
+        startButton.addEventListener('click', () => {
+            console.log('clicked');
+            console.log(peerConnection, peerConnection?.connectionState);
+            if (!peerConnection || peerConnection.connectionState !== 'connected') {
+                setupWebRTC();
+            } else {
+                console.log('stopping');
+                stop();
+            }
+        });
+    </script>
+</body>
+
+</html>
--- a/demo/talk_to_openai/requirements.txt
+++ b/demo/talk_to_openai/requirements.txt
@@ -0,0 +1,4 @@
+fastrtc[vad]
+openai
+twilio
+python-dotenv
--- a/demo/talk_to_sambanova/README.md
+++ b/demo/talk_to_sambanova/README.md
@@ -0,0 +1,15 @@
+---
+title: Talk to Sambanova
+emoji: 💻
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Llama 3.2 - SambaNova API
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|SAMBANOVA_API_KEY]
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/talk_to_sambanova/README_gradio.md
+++ b/demo/talk_to_sambanova/README_gradio.md
@@ -0,0 +1,15 @@
+---
+title: Talk to Sambanova (Gradio)
+emoji: 💻
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Llama 3.2 - SambaNova API (Gradio)
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|SAMBANOVA_API_KEY]
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/talk_to_sambanova/app.py
+++ b/demo/talk_to_sambanova/app.py
@@ -0,0 +1,144 @@
+import base64
+import json
+import os
+from pathlib import Path
+
+import gradio as gr
+import huggingface_hub
+import numpy as np
+from dotenv import load_dotenv
+from fastapi import FastAPI
+from fastapi.responses import HTMLResponse, StreamingResponse
+from fastrtc import (
+    AdditionalOutputs,
+    ReplyOnPause,
+    Stream,
+    get_stt_model,
+    get_twilio_turn_credentials,
+)
+from gradio.utils import get_space
+from pydantic import BaseModel
+
+load_dotenv()
+
+curr_dir = Path(__file__).parent
+
+
+client = huggingface_hub.InferenceClient(
+    api_key=os.environ.get("SAMBANOVA_API_KEY"),
+    provider="sambanova",
+)
+stt_model = get_stt_model()
+
+
+def response(
+    audio: tuple[int, np.ndarray],
+    gradio_chatbot: list[dict] | None = None,
+    conversation_state: list[dict] | None = None,
+):
+    gradio_chatbot = gradio_chatbot or []
+    conversation_state = conversation_state or []
+    print("chatbot", gradio_chatbot)
+
+    text = stt_model.stt(audio)
+    sample_rate, array = audio
+    gradio_chatbot.append(
+        {"role": "user", "content": gr.Audio((sample_rate, array.squeeze()))}
+    )
+    yield AdditionalOutputs(gradio_chatbot, conversation_state)
+
+    conversation_state.append({"role": "user", "content": text})
+    request = client.chat.completions.create(
+        model="meta-llama/Llama-3.2-3B-Instruct",
+        messages=conversation_state,  # type: ignore
+        temperature=0.1,
+        top_p=0.1,
+    )
+    response = {"role": "assistant", "content": request.choices[0].message.content}
+
+    conversation_state.append(response)
+    gradio_chatbot.append(response)
+
+    yield AdditionalOutputs(gradio_chatbot, conversation_state)
+
+
+chatbot = gr.Chatbot(type="messages", value=[])
+state = gr.State(value=[])
+stream = Stream(
+    ReplyOnPause(
+        response,  # type: ignore
+        input_sample_rate=16000,
+    ),
+    mode="send",
+    modality="audio",
+    additional_inputs=[chatbot, state],
+    additional_outputs=[chatbot, state],
+    additional_outputs_handler=lambda *a: (a[2], a[3]),
+    concurrency_limit=20 if get_space() else None,
+    rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
+)
+
+app = FastAPI()
+stream.mount(app)
+
+
+class Message(BaseModel):
+    role: str
+    content: str
+
+
+class InputData(BaseModel):
+    webrtc_id: str
+    chatbot: list[Message]
+    state: list[Message]
+
+
+@app.get("/")
+async def _():
+    rtc_config = get_twilio_turn_credentials() if get_space() else None
+    html_content = (curr_dir / "index.html").read_text()
+    html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
+    return HTMLResponse(content=html_content)
+
+
+@app.post("/input_hook")
+async def _(data: InputData):
+    body = data.model_dump()
+    stream.set_input(data.webrtc_id, body["chatbot"], body["state"])
+
+
+def audio_to_base64(file_path):
+    audio_format = "wav"
+    with open(file_path, "rb") as audio_file:
+        encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8")
+    return f"data:audio/{audio_format};base64,{encoded_audio}"
+
+
+@app.get("/outputs")
+async def _(webrtc_id: str):
+    async def output_stream():
+        async for output in stream.output_stream(webrtc_id):
+            chatbot = output.args[0]
+            state = output.args[1]
+            data = {
+                "message": state[-1],
+                "audio": audio_to_base64(chatbot[-1]["content"].value["path"])
+                if chatbot[-1]["role"] == "user"
+                else None,
+            }
+            yield f"event: output\ndata: {json.dumps(data)}\n\n"
+
+    return StreamingResponse(output_stream(), media_type="text/event-stream")
+
+
+if __name__ == "__main__":
+    import os
+
+    if (mode := os.getenv("MODE")) == "UI":
+        stream.ui.launch(server_port=7860)
+    elif mode == "PHONE":
+        raise ValueError("Phone mode not supported")
+    else:
+        import uvicorn
+
+        uvicorn.run(app, host="0.0.0.0", port=7860)
--- a/demo/talk_to_sambanova/index.html
+++ b/demo/talk_to_sambanova/index.html
@@ -0,0 +1,487 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Talk to Sambanova</title>
+    <style>
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
+            background-color: #f8f9fa;
+            color: #1a1a1a;
+            margin: 0;
+            padding: 20px;
+            height: 100vh;
+            box-sizing: border-box;
+        }
+
+        .container {
+            max-width: 800px;
+            margin: 0 auto;
+            height: 80%;
+        }
+
+        .logo {
+            text-align: center;
+            margin-bottom: 40px;
+        }
+
+        .chat-container {
+            background: white;
+            border-radius: 8px;
+            box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+            padding: 20px;
+            height: 90%;
+            box-sizing: border-box;
+            display: flex;
+            flex-direction: column;
+        }
+
+        .chat-messages {
+            flex-grow: 1;
+            overflow-y: auto;
+            margin-bottom: 20px;
+            padding: 10px;
+        }
+
+        .message {
+            margin-bottom: 20px;
+            padding: 12px;
+            border-radius: 8px;
+            font-size: 14px;
+            line-height: 1.5;
+        }
+
+        .message.user {
+            background-color: #e9ecef;
+            margin-left: 20%;
+        }
+
+        .message.assistant {
+            background-color: #f1f3f5;
+            margin-right: 20%;
+        }
+
+        .controls {
+            text-align: center;
+            margin-top: 20px;
+        }
+
+        button {
+            background-color: #0066cc;
+            color: white;
+            border: none;
+            padding: 12px 24px;
+            font-family: inherit;
+            font-size: 14px;
+            cursor: pointer;
+            transition: all 0.3s;
+            border-radius: 4px;
+            font-weight: 500;
+        }
+
+        button:hover {
+            background-color: #0052a3;
+        }
+
+        #audio-output {
+            display: none;
+        }
+
+        .icon-with-spinner {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 12px;
+            min-width: 180px;
+        }
+
+        .spinner {
+            width: 20px;
+            height: 20px;
+            border: 2px solid #ffffff;
+            border-top-color: transparent;
+            border-radius: 50%;
+            animation: spin 1s linear infinite;
+            flex-shrink: 0;
+        }
+
+        @keyframes spin {
+            to {
+                transform: rotate(360deg);
+            }
+        }
+
+        .pulse-container {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 12px;
+            min-width: 180px;
+        }
+
+        .pulse-circle {
+            width: 20px;
+            height: 20px;
+            border-radius: 50%;
+            background-color: #ffffff;
+            opacity: 0.2;
+            flex-shrink: 0;
+            transform: translateX(-0%) scale(var(--audio-level, 1));
+            transition: transform 0.1s ease;
+        }
+
+        /* Add styles for typing indicator */
+        .typing-indicator {
+            padding: 8px;
+            background-color: #f1f3f5;
+            border-radius: 8px;
+            margin-bottom: 10px;
+            display: none;
+        }
+
+        .dots {
+            display: inline-flex;
+            gap: 4px;
+        }
+
+        .dot {
+            width: 8px;
+            height: 8px;
+            background-color: #0066cc;
+            border-radius: 50%;
+            animation: pulse 1.5s infinite;
+            opacity: 0.5;
+        }
+
+        .dot:nth-child(2) {
+            animation-delay: 0.5s;
+        }
+
+        .dot:nth-child(3) {
+            animation-delay: 1s;
+        }
+
+        @keyframes pulse {
+
+            0%,
+            100% {
+                opacity: 0.5;
+                transform: scale(1);
+            }
+
+            50% {
+                opacity: 1;
+                transform: scale(1.2);
+            }
+        }
+
+        /* Add styles for toast notifications */
+        .toast {
+            position: fixed;
+            top: 20px;
+            left: 50%;
+            transform: translateX(-50%);
+            padding: 16px 24px;
+            border-radius: 4px;
+            font-size: 14px;
+            z-index: 1000;
+            display: none;
+            box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
+        }
+
+        .toast.error {
+            background-color: #f44336;
+            color: white;
+        }
+
+        .toast.warning {
+            background-color: #ffd700;
+            color: black;
+        }
+    </style>
+</head>
+
+<body>
+    <!-- Add toast element after body opening tag -->
+    <div id="error-toast" class="toast"></div>
+    <div class="container">
+        <div class="logo">
+            <h1>Talk to Sambanova 🗣️</h1>
+            <h2 style="font-size: 1.2em; color: #666; margin-top: 10px;">Speak to Llama 3.2 powered by Sambanova API
+            </h2>
+        </div>
+        <div class="chat-container">
+            <div class="chat-messages" id="chat-messages"></div>
+            <div class="typing-indicator" id="typing-indicator">
+                <div class="dots">
+                    <div class="dot"></div>
+                    <div class="dot"></div>
+                    <div class="dot"></div>
+                </div>
+            </div>
+        </div>
+        <div class="controls">
+            <button id="start-button">Start Conversation</button>
+        </div>
+    </div>
+    <audio id="audio-output"></audio>
+
+    <script>
+        let peerConnection;
+        let webrtc_id;
+        const startButton = document.getElementById('start-button');
+        const chatMessages = document.getElementById('chat-messages');
+
+        let audioLevel = 0;
+        let animationFrame;
+        let audioContext, analyser, audioSource;
+        let messages = [];
+        let eventSource;
+
+        function updateButtonState() {
+            const button = document.getElementById('start-button');
+            if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
+                button.innerHTML = `
+                    <div class="icon-with-spinner">
+                        <div class="spinner"></div>
+                        <span>Connecting...</span>
+                    </div>
+                `;
+            } else if (peerConnection && peerConnection.connectionState === 'connected') {
+                button.innerHTML = `
+                    <div class="pulse-container">
+                        <div class="pulse-circle"></div>
+                        <span>Stop Conversation</span>
+                    </div>
+                `;
+            } else {
+                button.innerHTML = 'Start Conversation';
+            }
+        }
+
+        function setupAudioVisualization(stream) {
+            audioContext = new (window.AudioContext || window.webkitAudioContext)();
+            analyser = audioContext.createAnalyser();
+            audioSource = audioContext.createMediaStreamSource(stream);
+            audioSource.connect(analyser);
+            analyser.fftSize = 64;
+            const dataArray = new Uint8Array(analyser.frequencyBinCount);
+
+            function updateAudioLevel() {
+                analyser.getByteFrequencyData(dataArray);
+                const average = Array.from(dataArray).reduce((a, b) => a + b, 0) / dataArray.length;
+                audioLevel = average / 255;
+
+                const pulseCircle = document.querySelector('.pulse-circle');
+                if (pulseCircle) {
+                    pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
+                }
+
+                animationFrame = requestAnimationFrame(updateAudioLevel);
+            }
+            updateAudioLevel();
+        }
+
+        function showError(message) {
+            const toast = document.getElementById('error-toast');
+            toast.textContent = message;
+            toast.className = 'toast error';
+            toast.style.display = 'block';
+
+            // Hide toast after 5 seconds
+            setTimeout(() => {
+                toast.style.display = 'none';
+            }, 5000);
+        }
+
+        function handleMessage(event) {
+            const eventJson = JSON.parse(event.data);
+            const typingIndicator = document.getElementById('typing-indicator');
+
+            if (eventJson.type === "error") {
+                showError(eventJson.message);
+            } else if (eventJson.type === "send_input") {
+                fetch('/input_hook', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify({
+                        webrtc_id: webrtc_id,
+                        chatbot: messages,
+                        state: messages
+                    })
+                });
+            } else if (eventJson.type === "log") {
+                if (eventJson.data === "pause_detected") {
+                    typingIndicator.style.display = 'block';
+                    chatMessages.scrollTop = chatMessages.scrollHeight;
+                } else if (eventJson.data === "response_starting") {
+                    typingIndicator.style.display = 'none';
+                }
+            }
+        }
+
+        async function setupWebRTC() {
+            const config = __RTC_CONFIGURATION__;
+            peerConnection = new RTCPeerConnection(config);
+
+            const timeoutId = setTimeout(() => {
+                const toast = document.getElementById('error-toast');
+                toast.textContent = "Connection is taking longer than usual. Are you on a VPN?";
+                toast.className = 'toast warning';
+                toast.style.display = 'block';
+
+                // Hide warning after 5 seconds
+                setTimeout(() => {
+                    toast.style.display = 'none';
+                }, 5000);
+            }, 5000);
+
+            try {
+                const stream = await navigator.mediaDevices.getUserMedia({
+                    audio: true
+                });
+
+                setupAudioVisualization(stream);
+
+                stream.getTracks().forEach(track => {
+                    peerConnection.addTrack(track, stream);
+                });
+
+                const dataChannel = peerConnection.createDataChannel('text');
+                dataChannel.onmessage = handleMessage;
+
+                const offer = await peerConnection.createOffer();
+                await peerConnection.setLocalDescription(offer);
+
+                await new Promise((resolve) => {
+                    if (peerConnection.iceGatheringState === "complete") {
+                        resolve();
+                    } else {
+                        const checkState = () => {
+                            if (peerConnection.iceGatheringState === "complete") {
+                                peerConnection.removeEventListener("icegatheringstatechange", checkState);
+                                resolve();
+                            }
+                        };
+                        peerConnection.addEventListener("icegatheringstatechange", checkState);
+                    }
+                });
+
+                peerConnection.addEventListener('connectionstatechange', () => {
+                    console.log('connectionstatechange', peerConnection.connectionState);
+                    if (peerConnection.connectionState === 'connected') {
+                        clearTimeout(timeoutId);
+                        const toast = document.getElementById('error-toast');
+                        toast.style.display = 'none';
+                    }
+                    updateButtonState();
+                });
+
+                webrtc_id = Math.random().toString(36).substring(7);
+
+                const response = await fetch('/webrtc/offer', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({
+                        sdp: peerConnection.localDescription.sdp,
+                        type: peerConnection.localDescription.type,
+                        webrtc_id: webrtc_id
+                    })
+                });
+
+                const serverResponse = await response.json();
+
+                if (serverResponse.status === 'failed') {
+                    showError(serverResponse.meta.error === 'concurrency_limit_reached'
+                        ? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
+                        : serverResponse.meta.error);
+                    stop();
+                    return;
+                }
+
+                await peerConnection.setRemoteDescription(serverResponse);
+
+                eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
+                eventSource.addEventListener("output", (event) => {
+                    const eventJson = JSON.parse(event.data);
+                    console.log(eventJson);
+                    messages.push(eventJson.message);
+                    addMessage(eventJson.message.role, eventJson.audio ?? eventJson.message.content);
+                });
+            } catch (err) {
+                clearTimeout(timeoutId);
+                console.error('Error setting up WebRTC:', err);
+                showError('Failed to establish connection. Please try again.');
+                stop();
+            }
+        }
+
+        function addMessage(role, content) {
+            const messageDiv = document.createElement('div');
+            messageDiv.classList.add('message', role);
+
+            if (role === 'user') {
+                // Create audio element for user messages
+                const audio = document.createElement('audio');
+                audio.controls = true;
+                audio.src = content;
+                messageDiv.appendChild(audio);
+            } else {
+                // Text content for assistant messages
+                messageDiv.textContent = content;
+            }
+
+            chatMessages.appendChild(messageDiv);
+            chatMessages.scrollTop = chatMessages.scrollHeight;
+        }
+
+        function stop() {
+            if (eventSource) {
+                eventSource.close();
+                eventSource = null;
+            }
+
+            if (animationFrame) {
+                cancelAnimationFrame(animationFrame);
+            }
+            if (audioContext) {
+                audioContext.close();
+                audioContext = null;
+                analyser = null;
+                audioSource = null;
+            }
+            if (peerConnection) {
+                if (peerConnection.getTransceivers) {
+                    peerConnection.getTransceivers().forEach(transceiver => {
+                        if (transceiver.stop) {
+                            transceiver.stop();
+                        }
+                    });
+                }
+
+                if (peerConnection.getSenders) {
+                    peerConnection.getSenders().forEach(sender => {
+                        if (sender.track && sender.track.stop) sender.track.stop();
+                    });
+                }
+                peerConnection.close();
+            }
+            updateButtonState();
+            audioLevel = 0;
+        }
+
+        startButton.addEventListener('click', () => {
+            if (!peerConnection || peerConnection.connectionState !== 'connected') {
+                setupWebRTC();
+            } else {
+                stop();
+            }
+        });
+    </script>
+</body>
+
+</html>
--- a/demo/talk_to_sambanova/requirements.txt
+++ b/demo/talk_to_sambanova/requirements.txt
@@ -0,0 +1,4 @@
+fastrtc[vad, stt]
+python-dotenv
+huggingface_hub>=0.29.0
+twilio
--- a/demo/talk_to_smolagents/README.md
+++ b/demo/talk_to_smolagents/README.md
@@ -0,0 +1,98 @@
+---
+title: Talk to Smolagents
+emoji: 💻
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: FastRTC Voice Agent with smolagents
+tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN]
+---
+
+# Voice LLM Agent with Image Generation
+
+A voice-enabled AI assistant powered by FastRTC that can:
+1. Stream audio in real-time using WebRTC
+2. Listen and respond with natural pauses in conversation
+3. Generate images based on your requests
+4. Maintain conversation context across exchanges
+
+This app combines the real-time communication capabilities of FastRTC with the powerful agent framework of smolagents.
+
+## Key Features
+
+- **Real-time Streaming**: Uses FastRTC's WebRTC-based audio streaming
+- **Voice Activation**: Automatic detection of speech pauses to trigger responses
+- **Multi-modal Interaction**: Combines voice and image generation in a single interface
+
+## Setup
+
+1. Install Python 3.9+ and create a virtual environment:
+   ```bash
+   python -m venv .venv
+   source .venv/bin/activate  # On Windows: .venv\Scripts\activate
+   ```
+
+2. Install dependencies:
+   ```bash
+   pip install -r requirements.txt
+   ```
+
+3. Create a `.env` file with the following:
+   ```
+   HF_TOKEN=your_huggingface_api_key
+   MODE=UI  # Use 'UI' for Gradio interface, leave blank for HTML interface
+   ```
+
+## Running the App
+
+### With Gradio UI (Recommended)
+
+```bash
+MODE=UI python app.py
+```
+
+This launches a Gradio UI at http://localhost:7860 with:
+- FastRTC's built-in streaming audio components
+- A chat interface showing the conversation
+- An image display panel for generated images
+
+## How to Use
+
+1. Click the microphone button to start streaming your voice.
+2. Speak naturally - the app will automatically detect when you pause.
+3. Ask the agent to generate an image, for example:
+   - "Create an image of a magical forest with glowing mushrooms."
+   - "Generate a picture of a futuristic city with flying cars."
+4. View the generated image and hear the agent's response.
+
+## Technical Architecture
+
+### FastRTC Components
+
+- **Stream**: Core component that handles WebRTC connections and audio streaming
+- **ReplyOnPause**: Detects when the user stops speaking to trigger a response
+- **get_stt_model/get_tts_model**: Provides optimized speech-to-text and text-to-speech models
+
+### smolagents Components
+
+- **CodeAgent**: Intelligent agent that can use tools based on natural language inputs
+- **Tool.from_space**: Integration with Hugging Face Spaces for image generation
+- **HfApiModel**: Connection to powerful language models for understanding requests
+
+### Integration Flow
+
+1. FastRTC streams and processes audio input in real-time
+2. Speech is converted to text and passed to the smolagents CodeAgent
+3. The agent processes the request and calls tools when needed
+4. Responses and generated images are streamed back through FastRTC
+5. The UI updates to show both text responses and generated images
+
+## Advanced Features
+
+- Conversation history is maintained across exchanges
+- Error handling ensures the app continues working even if agent processing fails
+- The application leverages FastRTC's streaming capabilities for efficient audio transmission
--- a/demo/talk_to_smolagents/app.py
+++ b/demo/talk_to_smolagents/app.py
@@ -0,0 +1,99 @@
+from pathlib import Path
+from typing import Dict, List
+
+from dotenv import load_dotenv
+from fastrtc import (
+    ReplyOnPause,
+    Stream,
+    get_stt_model,
+    get_tts_model,
+    get_twilio_turn_credentials,
+)
+from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel
+
+# Load environment variables
+load_dotenv()
+
+# Initialize file paths
+curr_dir = Path(__file__).parent
+
+# Initialize models
+stt_model = get_stt_model()
+tts_model = get_tts_model()
+
+# Conversation state to maintain history
+conversation_state: List[Dict[str, str]] = []
+
+# System prompt for agent
+system_prompt = """You are a helpful assistant that can helps with finding places to 
+workremotely from. You should specifically check against reviews and ratings of the 
+place. You should use this criteria to find the best place to work from:
+- Price
+- Reviews
+- Ratings
+- Location
+- WIFI
+Only return the name, address of the place, and a short description of the place.
+Always search for real places.
+Only return real places, not fake ones.
+If you receive anything other than a location, you should ask for a location.
+<example>
+User: I am in Paris, France. Can you find me a place to work from?
+Assistant: I found a place called "Le Café de la Paix" at 123 Rue de la Paix, 
+Paris, France. It has good reviews and is in a great location.
+</example>
+<example>
+User: I am in London, UK. Can you find me a place to work from?
+Assistant: I found a place called "The London Coffee Company".
+</example>
+<example>
+User: How many people are in the room?
+Assistant: I only respond to requests about finding places to work from.
+</example>
+
+"""
+
+model = HfApiModel(provider="together", model="Qwen/Qwen2.5-Coder-32B-Instruct")
+
+agent = CodeAgent(
+    tools=[
+        DuckDuckGoSearchTool(),
+    ],
+    model=model,
+    max_steps=10,
+    verbosity_level=2,
+    description="Search the web for cafes to work from.",
+)
+
+
+def process_response(audio):
+    """Process audio input and generate LLM response with TTS"""
+    # Convert speech to text using STT model
+    text = stt_model.stt(audio)
+    if not text.strip():
+        return
+
+    input_text = f"{system_prompt}\n\n{text}"
+    # Get response from agent
+    response_content = agent.run(input_text)
+
+    # Convert response to audio using TTS model
+    for audio_chunk in tts_model.stream_tts_sync(response_content or ""):
+        # Yield the audio chunk
+        yield audio_chunk
+
+
+stream = Stream(
+    handler=ReplyOnPause(process_response, input_sample_rate=16000),
+    modality="audio",
+    mode="send-receive",
+    ui_args={
+        "pulse_color": "rgb(255, 255, 255)",
+        "icon_button_color": "rgb(255, 255, 255)",
+        "title": "🧑‍💻The Coworking Agent",
+    },
+    rtc_configuration=get_twilio_turn_credentials(),
+)
+
+if __name__ == "__main__":
+    stream.ui.launch(server_port=7860)
--- a/demo/talk_to_smolagents/requirements.txt
+++ b/demo/talk_to_smolagents/requirements.txt
@@ -0,0 +1,136 @@
+# This file was autogenerated by uv via the following command:
+#    uv export --format requirements-txt --no-hashes
+aiofiles==23.2.1
+aiohappyeyeballs==2.4.6
+aiohttp==3.11.13
+aiohttp-retry==2.9.1
+aioice==0.9.0
+aiortc==1.10.1
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyio==4.8.0
+async-timeout==5.0.1 ; python_full_version < '3.11'
+attrs==25.1.0
+audioop-lts==0.2.1 ; python_full_version >= '3.13'
+audioread==3.0.1
+av==13.1.0
+babel==2.17.0
+beautifulsoup4==4.13.3
+certifi==2025.1.31
+cffi==1.17.1
+charset-normalizer==3.4.1
+click==8.1.8
+colorama==0.4.6
+coloredlogs==15.0.1
+colorlog==6.9.0
+cryptography==44.0.1
+csvw==3.5.1
+decorator==5.2.1
+dlinfo==2.0.0
+dnspython==2.7.0
+duckduckgo-search==7.5.0
+espeakng-loader==0.2.4
+exceptiongroup==1.2.2 ; python_full_version < '3.11'
+fastapi==0.115.8
+fastrtc==0.0.8.post1
+fastrtc-moonshine-onnx==20241016
+ffmpy==0.5.0
+filelock==3.17.0
+flatbuffers==25.2.10
+frozenlist==1.5.0
+fsspec==2025.2.0
+google-crc32c==1.6.0
+gradio==5.19.0
+gradio-client==1.7.2
+h11==0.14.0
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.29.1
+humanfriendly==10.0
+idna==3.10
+ifaddr==0.2.0
+isodate==0.7.2
+jinja2==3.1.5
+joblib==1.4.2
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+kokoro-onnx==0.4.3
+language-tags==1.2.0
+lazy-loader==0.4
+librosa==0.10.2.post1
+llvmlite==0.44.0
+lxml==5.3.1
+markdown-it-py==3.0.0
+markdownify==1.0.0
+markupsafe==2.1.5
+mdurl==0.1.2
+mpmath==1.3.0
+msgpack==1.1.0
+multidict==6.1.0
+numba==0.61.0
+numpy==2.1.3
+onnxruntime==1.20.1
+orjson==3.10.15
+packaging==24.2
+pandas==2.2.3
+phonemizer-fork==3.3.1
+pillow==11.1.0
+platformdirs==4.3.6
+pooch==1.8.2
+primp==0.14.0
+propcache==0.3.0
+protobuf==5.29.3
+pycparser==2.22
+pydantic==2.10.6
+pydantic-core==2.27.2
+pydub==0.25.1
+pyee==12.1.1
+pygments==2.19.1
+pyjwt==2.10.1
+pylibsrtp==0.11.0
+pyopenssl==25.0.0
+pyparsing==3.2.1
+pyreadline3==3.5.4 ; sys_platform == 'win32'
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.20
+pytz==2025.1
+pyyaml==6.0.2
+rdflib==7.1.3
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+rfc3986==1.5.0
+rich==13.9.4
+rpds-py==0.23.1
+ruff==0.9.7 ; sys_platform != 'emscripten'
+safehttpx==0.1.6
+scikit-learn==1.6.1
+scipy==1.15.2
+segments==2.3.0
+semantic-version==2.10.0
+shellingham==1.5.4 ; sys_platform != 'emscripten'
+six==1.17.0
+smolagents==1.9.2
+sniffio==1.3.1
+soundfile==0.13.1
+soupsieve==2.6
+soxr==0.5.0.post1
+standard-aifc==3.13.0 ; python_full_version >= '3.13'
+standard-chunk==3.13.0 ; python_full_version >= '3.13'
+standard-sunau==3.13.0 ; python_full_version >= '3.13'
+starlette==0.45.3
+sympy==1.13.3
+threadpoolctl==3.5.0
+tokenizers==0.21.0
+tomlkit==0.13.2
+tqdm==4.67.1
+twilio==9.4.6
+typer==0.15.1 ; sys_platform != 'emscripten'
+typing-extensions==4.12.2
+tzdata==2025.1
+uritemplate==4.1.1
+urllib3==2.3.0
+uvicorn==0.34.0 ; sys_platform != 'emscripten'
+websockets==15.0
+yarl==1.18.3
--- a/demo/video.mp4
+++ b/demo/video.mp4
--- a/demo/voice_text_editor/README.md
+++ b/demo/voice_text_editor/README.md
@@ -0,0 +1,19 @@
+---
+title: Voice Text Editor
+emoji: 📝
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Edit text documents with your voice!
+tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|SAMBANOVA_API_KEY]
+---
+
+# Voice Text Editor
+
+Edit text documents with your voice!
+
+
--- a/demo/voice_text_editor/app.py
+++ b/demo/voice_text_editor/app.py
@@ -0,0 +1,113 @@
+import os
+
+import gradio as gr
+from dotenv import load_dotenv
+from fastrtc import AdditionalOutputs, ReplyOnPause, Stream, get_stt_model
+from openai import OpenAI
+
+load_dotenv()
+
+sambanova_client = OpenAI(
+    api_key=os.getenv("SAMBANOVA_API_KEY"), base_url="https://api.sambanova.ai/v1"
+)
+stt_model = get_stt_model()
+
+
+SYSTEM_PROMPT = """You are an intelligent voice-activated text editor assistant. Your purpose is to help users create and modify text documents through voice commands.
+
+For each interaction:
+1. You will receive the current state of a text document and a voice input from the user.
+2. Determine if the input is:
+   a) A command to modify the document (e.g., "delete the last line", "capitalize that")
+   b) Content to be added to the document (e.g., "buy 12 eggs at the store")
+   c) A modification to existing content (e.g., "actually make that 24" to change "12" to "24")
+3. Return ONLY the new document state after the changes have been applied.
+
+Example:
+
+CURRENT DOCUMENT:
+
+
+Meeting notes:
+- Buy GPUs
+- Meet with Joe
+
+USER INPUT: Make that 100 GPUS
+
+NEW DOCUMENT STATE:
+
+Meeting notes:
+- Buy 100 GPUs
+- Meet with Joe
+
+Example 2:
+
+CURRENT DOCUMENT:
+
+Project Proposal
+
+USER INPUT: Make that a header
+
+NEW DOCUMENT STATE:
+
+# Project Proposal
+
+When handling commands:
+- Apply the requested changes precisely to the document
+- Support operations like adding, deleting, modifying, and moving text
+- Understand contextual references like "that", "the last line", "the second paragraph"
+
+When handling content additions:
+- Add the new text at the appropriate location (usually at the end or cursor position)
+- Format it appropriately based on the document context
+- If the user says to "add" or "insert" do not remove text that was already in the document.
+
+When handling content modifications:
+- Identify what part of the document the user is referring to
+- Apply the requested change while preserving the rest of the content
+- Be smart about contextual references (e.g., "make that 24" should know to replace a number)
+
+NEVER include any text in the new document state that is not part of the user's input.
+NEVER include the phrase "CURRENT DOCUMENT" in the new document state.
+NEVER reword the user's input unless you are explicitly asked to do so.
+"""
+
+
+def edit(audio, current_document: str):
+    prompt = stt_model.stt(audio)
+    print(f"Prompt: {prompt}")
+    response = sambanova_client.chat.completions.create(
+        model="Meta-Llama-3.3-70B-Instruct",
+        messages=[
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {
+                "role": "user",
+                "content": f"CURRENT DOCUMENT:\n\n{current_document}\n\nUSER INPUT: {prompt}",
+            },
+        ],
+        max_tokens=200,
+    )
+    doc = response.choices[0].message.content
+    yield AdditionalOutputs(doc)
+
+
+doc = gr.Textbox(value="", label="Current Document")
+
+
+stream = Stream(
+    ReplyOnPause(edit),
+    modality="audio",
+    mode="send",
+    additional_inputs=[doc],
+    additional_outputs=[doc],
+    additional_outputs_handler=lambda prev, current: current,
+    ui_args={"title": "Voice Text Editor with FastRTC 🗣️"},
+)
+
+if __name__ == "__main__":
+    if (mode := os.getenv("MODE")) == "UI":
+        stream.ui.launch(server_port=7860)
+    elif mode == "PHONE":
+        stream.fastphone(host="0.0.0.0", port=7860)
+    else:
+        stream.ui.launch(server_port=7860)
--- a/demo/voice_text_editor_local/app.py
+++ b/demo/voice_text_editor_local/app.py
@@ -0,0 +1,126 @@
+import os
+
+import gradio as gr
+import requests
+from dotenv import load_dotenv
+from fastrtc import AdditionalOutputs, ReplyOnPause, Stream, get_stt_model
+
+load_dotenv()
+
+stt_model = get_stt_model()
+
+SYSTEM_PROMPT = """You are an intelligent voice-activated text editor assistant. Your purpose is to help users create and modify text documents through voice commands.
+
+For each interaction:
+1. You will receive the current state of a text document and a voice input from the user.
+2. Determine if the input is:
+   a) A command to modify the document (e.g., "delete the last line", "capitalize that")
+   b) Content to be added to the document (e.g., "buy 12 eggs at the store")
+   c) A modification to existing content (e.g., "actually make that 24" to change "12" to "24")
+3. Return ONLY the new document state after the changes have been applied.
+
+Example:
+
+CURRENT DOCUMENT:
+
+Meeting notes:
+- Buy GPUs
+- Meet with Joe
+
+USER INPUT: Make that 100 GPUS
+
+NEW DOCUMENT STATE:
+
+Meeting notes:
+- Buy 100 GPUs
+- Meet with Joe
+
+Example 2:
+
+CURRENT DOCUMENT:
+
+Project Proposal
+
+USER INPUT: Make that a header
+
+NEW DOCUMENT STATE:
+
+# Project Proposal
+
+When handling commands:
+- Apply the requested changes precisely to the document
+- Support operations like adding, deleting, modifying, and moving text
+- Understand contextual references like "that", "the last line", "the second paragraph"
+
+When handling content additions:
+- Add the new text at the appropriate location (usually at the end or cursor position)
+- Format it appropriately based on the document context
+- If the user says to "add" or "insert" do not remove text that was already in the document.
+
+When handling content modifications:
+- Identify what part of the document the user is referring to
+- Apply the requested change while preserving the rest of the content
+- Be smart about contextual references (e.g., "make that 24" should know to replace a number)
+
+NEVER include any text in the new document state that is not part of the user's input.
+NEVER include the phrase "CURRENT DOCUMENT" in the new document state.
+NEVER reword the user's input unless you are explicitly asked to do so.
+"""
+
+
+def edit(audio, current_document: str):
+    prompt = stt_model.stt(audio)
+    print(f"Prompt: {prompt}")
+
+    # Construct the prompt for ollama
+    full_prompt = (
+        f"{SYSTEM_PROMPT}\n\n"
+        f"User: CURRENT DOCUMENT:\n\n{current_document}\n\nUSER INPUT: {prompt}\n\n"
+        f"Assistant:"
+    )
+
+    try:
+        # Send request to ollama's API
+        response = requests.post(
+            "http://localhost:11434/api/generate",
+            json={
+                "model": "qwen2.5",
+                "prompt": full_prompt,
+                "stream": False,
+                "max_tokens": 200,
+            },
+        )
+        response.raise_for_status()  # Raise an exception for bad status codes
+
+        # Parse the response
+        doc = response.json()["response"]
+        # Clean up the response to remove "Assistant:" and any extra whitespace
+        doc = doc.strip().lstrip("Assistant:").strip()
+        yield AdditionalOutputs(doc)
+
+    except requests.RequestException as e:
+        # Handle API errors gracefully
+        error_message = "Error: Could not connect to ollama. Please ensure it's running and qwen2.5 is loaded."
+        print(f"API Error: {e}")
+        yield AdditionalOutputs(error_message)
+
+
+doc = gr.Textbox(value="", label="Current Document")
+
+stream = Stream(
+    ReplyOnPause(edit),
+    modality="audio",
+    mode="send",
+    additional_inputs=[doc],
+    additional_outputs=[doc],
+    additional_outputs_handler=lambda prev, current: current,
+    ui_args={"title": "Voice Text Editor with FastRTC 🗣️"},
+)
+
+if __name__ == "__main__":
+    if (mode := os.getenv("MODE")) == "UI":
+        stream.ui.launch(server_port=7860)
+    elif mode == "PHONE":
+        stream.fastphone(host="0.0.0.0", port=7860)
+    else:
+        stream.ui.launch(server_port=7860)
--- a/demo/webrtc_vs_websocket/README.md
+++ b/demo/webrtc_vs_websocket/README.md
@@ -0,0 +1,15 @@
+---
+title: Webrtc Vs Websocket
+emoji: 🧪
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Compare Round Trip Times between WebRTC and Websockets
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|ELEVENLABS_API_KEY, secret|GROQ_API_KEY, secret|ANTHROPIC_API_KEY]
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/webrtc_vs_websocket/app.py
+++ b/demo/webrtc_vs_websocket/app.py
@@ -0,0 +1,147 @@
+import json
+import logging
+import os
+from pathlib import Path
+
+import anthropic
+import gradio as gr
+import numpy as np
+from dotenv import load_dotenv
+from elevenlabs import ElevenLabs
+from fastapi import FastAPI
+from fastapi.responses import HTMLResponse, StreamingResponse
+from fastrtc import AdditionalOutputs, ReplyOnPause, Stream, get_twilio_turn_credentials
+from fastrtc.utils import aggregate_bytes_to_16bit, audio_to_bytes
+from gradio.utils import get_space
+from groq import Groq
+from pydantic import BaseModel
+
+# Configure the root logger to WARNING to suppress debug messages from other libraries
+logging.basicConfig(level=logging.WARNING)
+
+# Create a console handler
+console_handler = logging.FileHandler("gradio_webrtc.log")
+console_handler.setLevel(logging.DEBUG)
+
+# Create a formatter
+formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+console_handler.setFormatter(formatter)
+
+# Configure the logger for your specific library
+logger = logging.getLogger("fastrtc")
+logger.setLevel(logging.DEBUG)
+logger.addHandler(console_handler)
+
+
+load_dotenv()
+
+groq_client = Groq()
+claude_client = anthropic.Anthropic()
+tts_client = ElevenLabs(api_key=os.environ["ELEVENLABS_API_KEY"])
+
+curr_dir = Path(__file__).parent
+
+
+def response(
+    audio: tuple[int, np.ndarray],
+    chatbot: list[dict] | None = None,
+):
+    chatbot = chatbot or []
+    messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
+    prompt = groq_client.audio.transcriptions.create(
+        file=("audio-file.mp3", audio_to_bytes(audio)),
+        model="whisper-large-v3-turbo",
+        response_format="verbose_json",
+    ).text
+    print("prompt", prompt)
+    chatbot.append({"role": "user", "content": prompt})
+    messages.append({"role": "user", "content": prompt})
+    response = claude_client.messages.create(
+        model="claude-3-5-haiku-20241022",
+        max_tokens=512,
+        messages=messages,  # type: ignore
+    )
+    response_text = " ".join(
+        block.text  # type: ignore
+        for block in response.content
+        if getattr(block, "type", None) == "text"
+    )
+    chatbot.append({"role": "assistant", "content": response_text})
+    yield AdditionalOutputs(chatbot)
+    iterator = tts_client.text_to_speech.convert_as_stream(
+        text=response_text,
+        voice_id="JBFqnCBsd6RMkjVDRZzb",
+        model_id="eleven_multilingual_v2",
+        output_format="pcm_24000",
+    )
+    for chunk in aggregate_bytes_to_16bit(iterator):
+        audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
+        yield (24000, audio_array, "mono")
+
+
+chatbot = gr.Chatbot(type="messages")
+stream = Stream(
+    modality="audio",
+    mode="send-receive",
+    handler=ReplyOnPause(response),
+    additional_outputs_handler=lambda a, b: b,
+    additional_inputs=[chatbot],
+    additional_outputs=[chatbot],
+    rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
+    concurrency_limit=20 if get_space() else None,
+)
+
+
+class Message(BaseModel):
+    role: str
+    content: str
+
+
+class InputData(BaseModel):
+    webrtc_id: str
+    chatbot: list[Message]
+
+
+app = FastAPI()
+
+stream.mount(app)
+
+
+@app.get("/")
+async def _():
+    rtc_config = get_twilio_turn_credentials() if get_space() else None
+    html_content = (curr_dir / "index.html").read_text()
+    html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
+    return HTMLResponse(content=html_content, status_code=200)
+
+
+@app.post("/input_hook")
+async def _(body: InputData):
+    stream.set_input(body.webrtc_id, body.model_dump()["chatbot"])
+    return {"status": "ok"}
+
+
+@app.get("/outputs")
+def _(webrtc_id: str):
+    print("outputs", webrtc_id)
+
+    async def output_stream():
+        async for output in stream.output_stream(webrtc_id):
+            chatbot = output.args[0]
+            yield f"event: output\ndata: {json.dumps(chatbot[-2])}\n\n"
+            yield f"event: output\ndata: {json.dumps(chatbot[-1])}\n\n"
+
+    return StreamingResponse(output_stream(), media_type="text/event-stream")
+
+
+if __name__ == "__main__":
+    import os
+
+    if (mode := os.getenv("MODE")) == "UI":
+        stream.ui.launch(server_port=7860, server_name="0.0.0.0")
+    elif mode == "PHONE":
+        stream.fastphone(host="0.0.0.0", port=7860)
+    else:
+        import uvicorn
+
+        uvicorn.run(app, host="0.0.0.0", port=7860)
--- a/demo/webrtc_vs_websocket/index.html
+++ b/demo/webrtc_vs_websocket/index.html
@@ -0,0 +1,630 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>WebRTC vs WebSocket Benchmark</title>
+    <script src="https://cdn.jsdelivr.net/npm/alawmulaw"></script>
+    <style>
+        body {
+            font-family: system-ui, -apple-system, sans-serif;
+            margin: 0;
+            padding: 20px;
+            background-color: #f5f5f5;
+        }
+
+        .container {
+            display: grid;
+            grid-template-columns: 1fr 1fr;
+            gap: 30px;
+            max-width: 1400px;
+            margin: 0 auto;
+        }
+
+        .panel {
+            background: white;
+            border-radius: 12px;
+            padding: 20px;
+            box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+        }
+
+        .chat-container {
+            height: 400px;
+            overflow-y: auto;
+            border: 1px solid #e0e0e0;
+            border-radius: 8px;
+            padding: 15px;
+            margin-bottom: 15px;
+        }
+
+        .message {
+            margin-bottom: 10px;
+            padding: 8px 12px;
+            border-radius: 8px;
+            max-width: 80%;
+        }
+
+        .message.user {
+            background-color: #e3f2fd;
+            margin-left: auto;
+        }
+
+        .message.assistant {
+            background-color: #f5f5f5;
+        }
+
+        .metrics {
+            margin-top: 15px;
+            padding: 10px;
+            background: #f8f9fa;
+            border-radius: 8px;
+        }
+
+        .metric {
+            margin: 5px 0;
+            font-size: 14px;
+        }
+
+        button {
+            background-color: #1976d2;
+            color: white;
+            border: none;
+            padding: 10px 20px;
+            border-radius: 6px;
+            cursor: pointer;
+            font-size: 14px;
+            transition: background-color 0.2s;
+        }
+
+        button:hover {
+            background-color: #1565c0;
+        }
+
+        button:disabled {
+            background-color: #bdbdbd;
+            cursor: not-allowed;
+        }
+
+        h2 {
+            margin-top: 0;
+            color: #1976d2;
+        }
+
+        .visualizer {
+            width: 100%;
+            height: 100px;
+            margin: 10px 0;
+            background: #fafafa;
+            border-radius: 8px;
+        }
+
+        /* Add styles for disclaimer */
+        .disclaimer {
+            background-color: #fff3e0;
+            padding: 15px;
+            border-radius: 8px;
+            margin-bottom: 20px;
+            font-size: 14px;
+            line-height: 1.5;
+            max-width: 1400px;
+            margin: 0 auto 20px auto;
+        }
+
+        /* Update nav bar styles */
+        .nav-bar {
+            background-color: #f5f5f5;
+            padding: 10px 20px;
+            margin-bottom: 20px;
+        }
+
+        .nav-container {
+            max-width: 1400px;
+            margin: 0 auto;
+            display: flex;
+            gap: 10px;
+        }
+
+        .nav-button {
+            background-color: #1976d2;
+            color: white;
+            border: none;
+            padding: 8px 16px;
+            border-radius: 4px;
+            cursor: pointer;
+            text-decoration: none;
+            font-size: 14px;
+            transition: background-color 0.2s;
+        }
+
+        .nav-button:hover {
+            background-color: #1565c0;
+        }
+
+        /* Add styles for toast notifications */
+        .toast {
+            position: fixed;
+            top: 20px;
+            left: 50%;
+            transform: translateX(-50%);
+            padding: 16px 24px;
+            border-radius: 4px;
+            font-size: 14px;
+            z-index: 1000;
+            display: none;
+            box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
+        }
+
+        .toast.error {
+            background-color: #f44336;
+            color: white;
+        }
+
+        .toast.warning {
+            background-color: #ffd700;
+            color: black;
+        }
+    </style>
+</head>
+
+<body>
+    <nav class="nav-bar">
+        <div class="nav-container">
+            <a href="./webrtc/docs" class="nav-button">WebRTC Docs</a>
+            <a href="./websocket/docs" class="nav-button">WebSocket Docs</a>
+            <a href="./telephone/docs" class="nav-button">Telephone Docs</a>
+            <a href="./ui" class="nav-button">UI</a>
+        </div>
+    </nav>
+
+    <div class="disclaimer">
+        This page compares the WebRTC Round-Trip-Time calculated from <code>getStats()</code> to the time taken to
+        process a ping/pong response pattern over websockets. It may not be a gold standard benchmark. Both WebRTC and
+        Websockets have their merits/advantages which is why FastRTC supports both. Artifacts in the WebSocket playback
+        audio are due to gaps in my frontend processing code and not FastRTC web server.
+    </div>
+
+    <div class="container">
+        <div class="panel">
+            <h2>WebRTC Connection</h2>
+            <div id="webrtc-chat" class="chat-container"></div>
+            <div id="webrtc-metrics" class="metrics">
+                <div class="metric">RTT (Round Trip Time): <span id="webrtc-rtt">-</span></div>
+            </div>
+            <button id="webrtc-button">Connect WebRTC</button>
+        </div>
+
+        <div class="panel">
+            <h2>WebSocket Connection</h2>
+            <div id="ws-chat" class="chat-container"></div>
+            <div id="ws-metrics" class="metrics">
+                <div class="metric">RTT (Round Trip Time): <span id="ws-rtt">0</span></div>
+            </div>
+            <button id="ws-button">Connect WebSocket</button>
+        </div>
+    </div>
+
+    <audio id="webrtc-audio" style="display: none;"></audio>
+    <audio id="ws-audio" style="display: none;"></audio>
+
+    <div id="error-toast" class="toast"></div>
+
+    <script>
+        // Shared utilities
+        function generateId() {
+            return Math.random().toString(36).substring(7);
+        }
+
+        function sendInput(id) {
+
+            return function handleMessage(event) {
+                const eventJson = JSON.parse(event.data);
+                if (eventJson.type === "send_input") {
+                    fetch('/input_hook', {
+                        method: 'POST',
+                        headers: {
+                            'Content-Type': 'application/json',
+                        },
+                        body: JSON.stringify({
+                            webrtc_id: id,
+                            chatbot: chatHistoryWebRTC
+                        })
+                    });
+                }
+            }
+        }
+
+        let chatHistoryWebRTC = [];
+        let chatHistoryWebSocket = [];
+
+        function addMessage(containerId, role, content) {
+            const container = document.getElementById(containerId);
+            const messageDiv = document.createElement('div');
+            messageDiv.classList.add('message', role);
+            messageDiv.textContent = content;
+            container.appendChild(messageDiv);
+            container.scrollTop = container.scrollHeight;
+            if (containerId === 'webrtc-chat') {
+                chatHistoryWebRTC.push({ role, content });
+            } else {
+                chatHistoryWebSocket.push({ role, content });
+            }
+        }
+
+        // WebRTC Implementation
+        let webrtcPeerConnection;
+
+        // Add this function to collect RTT stats
+        async function updateWebRTCStats() {
+            if (!webrtcPeerConnection) return;
+
+            const stats = await webrtcPeerConnection.getStats();
+            stats.forEach(report => {
+                if (report.type === 'candidate-pair' && report.state === 'succeeded') {
+                    const rtt = report.currentRoundTripTime * 1000; // Convert to ms
+                    document.getElementById('webrtc-rtt').textContent = `${rtt.toFixed(2)}ms`;
+                }
+            });
+        }
+
+        async function setupWebRTC() {
+            const button = document.getElementById('webrtc-button');
+            button.textContent = "Stop";
+
+            const config = __RTC_CONFIGURATION__;
+            webrtcPeerConnection = new RTCPeerConnection(config);
+            const webrtcId = generateId();
+
+            const timeoutId = setTimeout(() => {
+                const toast = document.getElementById('error-toast');
+                toast.textContent = "Connection is taking longer than usual. Are you on a VPN?";
+                toast.className = 'toast warning';
+                toast.style.display = 'block';
+
+                // Hide warning after 5 seconds
+                setTimeout(() => {
+                    toast.style.display = 'none';
+                }, 5000);
+            }, 5000);
+
+            try {
+                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+                stream.getTracks().forEach(track => {
+                    webrtcPeerConnection.addTrack(track, stream);
+                });
+
+                webrtcPeerConnection.addEventListener('track', (evt) => {
+                    const audio = document.getElementById('webrtc-audio');
+                    if (audio.srcObject !== evt.streams[0]) {
+                        audio.srcObject = evt.streams[0];
+                        audio.play();
+                    }
+                });
+
+                const dataChannel = webrtcPeerConnection.createDataChannel('text');
+                dataChannel.onmessage = sendInput(webrtcId);
+
+                const offer = await webrtcPeerConnection.createOffer();
+                await webrtcPeerConnection.setLocalDescription(offer);
+
+                await new Promise((resolve) => {
+                    if (webrtcPeerConnection.iceGatheringState === "complete") {
+                        resolve();
+                    } else {
+                        const checkState = () => {
+                            if (webrtcPeerConnection.iceGatheringState === "complete") {
+                                webrtcPeerConnection.removeEventListener("icegatheringstatechange", checkState);
+                                resolve();
+                            }
+                        };
+                        webrtcPeerConnection.addEventListener("icegatheringstatechange", checkState);
+                    }
+                });
+
+                const response = await fetch('/webrtc/offer', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({
+                        sdp: webrtcPeerConnection.localDescription.sdp,
+                        type: webrtcPeerConnection.localDescription.type,
+                        webrtc_id: webrtcId
+                    })
+                });
+
+                const serverResponse = await response.json();
+                await webrtcPeerConnection.setRemoteDescription(serverResponse);
+
+                // Setup event source for messages
+                const eventSource = new EventSource('/outputs?webrtc_id=' + webrtcId);
+                eventSource.addEventListener("output", (event) => {
+                    const eventJson = JSON.parse(event.data);
+                    addMessage('webrtc-chat', eventJson.role, eventJson.content);
+                });
+
+                // Add periodic stats collection
+                const statsInterval = setInterval(updateWebRTCStats, 1000);
+
+                // Store the interval ID on the connection
+                webrtcPeerConnection.statsInterval = statsInterval;
+
+                webrtcPeerConnection.addEventListener('connectionstatechange', () => {
+                    if (webrtcPeerConnection.connectionState === 'connected') {
+                        clearTimeout(timeoutId);
+                        const toast = document.getElementById('error-toast');
+                        toast.style.display = 'none';
+                    }
+                });
+
+            } catch (err) {
+                clearTimeout(timeoutId);
+                console.error('WebRTC setup error:', err);
+            }
+        }
+
+        function webrtc_stop() {
+            if (webrtcPeerConnection) {
+                // Clear the stats interval
+                if (webrtcPeerConnection.statsInterval) {
+                    clearInterval(webrtcPeerConnection.statsInterval);
+                }
+
+                // Close all tracks
+                webrtcPeerConnection.getSenders().forEach(sender => {
+                    if (sender.track) {
+                        sender.track.stop();
+                    }
+                });
+
+                webrtcPeerConnection.close();
+                webrtcPeerConnection = null;
+
+                // Reset metrics display
+                document.getElementById('webrtc-rtt').textContent = '-';
+            }
+        }
+
+        // WebSocket Implementation
+        let webSocket;
+        let wsMetrics = {
+            pingStartTime: 0,
+            rttValues: []
+        };
+
+        // Load mu-law library
+
+        // Add load promise to track when the script is ready
+
+
+        function resample(audioData, fromSampleRate, toSampleRate) {
+            const ratio = fromSampleRate / toSampleRate;
+            const newLength = Math.round(audioData.length / ratio);
+            const result = new Float32Array(newLength);
+
+            for (let i = 0; i < newLength; i++) {
+                const position = i * ratio;
+                const index = Math.floor(position);
+                const fraction = position - index;
+
+                if (index + 1 < audioData.length) {
+                    result[i] = audioData[index] * (1 - fraction) + audioData[index + 1] * fraction;
+                } else {
+                    result[i] = audioData[index];
+                }
+            }
+            return result;
+        }
+
+        function convertToMulaw(audioData, sampleRate) {
+            // Resample to 8000 Hz if needed
+            if (sampleRate !== 8000) {
+                audioData = resample(audioData, sampleRate, 8000);
+            }
+
+            // Convert float32 [-1,1] to int16 [-32768,32767]
+            const int16Data = new Int16Array(audioData.length);
+            for (let i = 0; i < audioData.length; i++) {
+                int16Data[i] = Math.floor(audioData[i] * 32768);
+            }
+
+            // Convert to mu-law using the library
+            return alawmulaw.mulaw.encode(int16Data);
+        }
+
+        async function setupWebSocket() {
+            const button = document.getElementById('ws-button');
+            button.textContent = "Stop";
+
+            try {
+                const stream = await navigator.mediaDevices.getUserMedia({
+                    audio: {
+                        "echoCancellation": true,
+                        "noiseSuppression": { "exact": true },
+                        "autoGainControl": { "exact": true },
+                        "sampleRate": { "ideal": 24000 },
+                        "sampleSize": { "ideal": 16 },
+                        "channelCount": { "exact": 1 },
+                    }
+                });
+                const wsId = generateId();
+                wsMetrics.startTime = performance.now();
+
+                // Create audio context and analyser for visualization
+                const audioContext = new AudioContext();
+                const analyser = audioContext.createAnalyser();
+                const source = audioContext.createMediaStreamSource(stream);
+                source.connect(analyser);
+
+                // Connect to websocket endpoint
+                webSocket = new WebSocket(`${window.location.protocol === 'https:' ? 'wss:' : 'ws:'}//${window.location.host}/websocket/offer`);
+
+                webSocket.onopen = () => {
+                    // Send initial start message
+                    webSocket.send(JSON.stringify({
+                        event: "start",
+                        websocket_id: wsId
+                    }));
+
+                    // Setup audio processing
+                    const processor = audioContext.createScriptProcessor(2048, 1, 1);
+                    source.connect(processor);
+                    processor.connect(audioContext.destination);
+
+                    processor.onaudioprocess = (e) => {
+                        const inputData = e.inputBuffer.getChannelData(0);
+                        const mulawData = convertToMulaw(inputData, audioContext.sampleRate);
+                        const base64Audio = btoa(String.fromCharCode.apply(null, mulawData));
+                        if (webSocket.readyState === WebSocket.OPEN) {
+                            webSocket.send(JSON.stringify({
+                                event: "media",
+                                media: {
+                                    payload: base64Audio
+                                }
+                            }));
+                        }
+                    };
+
+                    // Add ping interval
+                    webSocket.pingInterval = setInterval(() => {
+                        wsMetrics.pingStartTime = performance.now();
+                        webSocket.send(JSON.stringify({
+                            event: "ping"
+                        }));
+                    }, 500);
+                };
+
+                // Setup audio output context
+                const outputContext = new AudioContext({ sampleRate: 24000 });
+                const sampleRate = 24000; // Updated to match server sample rate
+                let audioQueue = [];
+                let isPlaying = false;
+
+                webSocket.onmessage = (event) => {
+                    const data = JSON.parse(event.data);
+                    if (data?.type === "send_input") {
+                        console.log("sending input")
+                        fetch('/input_hook', {
+                            method: 'POST',
+                            headers: { 'Content-Type': 'application/json' },
+                            body: JSON.stringify({ webrtc_id: wsId, chatbot: chatHistoryWebSocket })
+                        });
+                    }
+                    if (data.event === "media") {
+                        // Process received audio
+                        const audioData = atob(data.media.payload);
+                        const mulawData = new Uint8Array(audioData.length);
+                        for (let i = 0; i < audioData.length; i++) {
+                            mulawData[i] = audioData.charCodeAt(i);
+                        }
+
+                        // Convert mu-law to linear PCM
+                        const linearData = alawmulaw.mulaw.decode(mulawData);
+
+                        // Create an AudioBuffer
+                        const audioBuffer = outputContext.createBuffer(1, linearData.length, sampleRate);
+                        const channelData = audioBuffer.getChannelData(0);
+
+                        // Fill the buffer with the decoded data
+                        for (let i = 0; i < linearData.length; i++) {
+                            channelData[i] = linearData[i] / 32768.0;
+                        }
+
+                        // Queue the audio buffer
+                        audioQueue.push(audioBuffer);
+
+                        // Start playing if not already playing
+                        if (!isPlaying) {
+                            playNextBuffer();
+                        }
+                    }
+
+                    // Add pong handler
+                    if (data.event === "pong") {
+                        const rtt = performance.now() - wsMetrics.pingStartTime;
+                        wsMetrics.rttValues.push(rtt);
+                        // Keep only last 20 values for running mean
+                        if (wsMetrics.rttValues.length > 20) {
+                            wsMetrics.rttValues.shift();
+                        }
+                        const avgRtt = wsMetrics.rttValues.reduce((a, b) => a + b, 0) / wsMetrics.rttValues.length;
+                        document.getElementById('ws-rtt').textContent = `${avgRtt.toFixed(2)}ms`;
+                        return;
+                    }
+                };
+
+                function playNextBuffer() {
+                    if (audioQueue.length === 0) {
+                        isPlaying = false;
+                        return;
+                    }
+
+                    isPlaying = true;
+                    const bufferSource = outputContext.createBufferSource();
+                    bufferSource.buffer = audioQueue.shift();
+                    bufferSource.connect(outputContext.destination);
+
+                    bufferSource.onended = playNextBuffer;
+                    bufferSource.start();
+                }
+
+                const eventSource = new EventSource('/outputs?webrtc_id=' + wsId);
+                eventSource.addEventListener("output", (event) => {
+                    console.log("ws output", event);
+                    const eventJson = JSON.parse(event.data);
+                    addMessage('ws-chat', eventJson.role, eventJson.content);
+                });
+
+            } catch (err) {
+                console.error('WebSocket setup error:', err);
+                button.disabled = false;
+            }
+        }
+
+        function ws_stop() {
+            if (webSocket) {
+                webSocket.send(JSON.stringify({
+                    event: "stop"
+                }));
+                // Clear ping interval
+                if (webSocket.pingInterval) {
+                    clearInterval(webSocket.pingInterval);
+                }
+                // Reset RTT display
+                document.getElementById('ws-rtt').textContent = '-';
+                wsMetrics.rttValues = [];
+
+                // Clear the stats interval
+                if (webSocket.statsInterval) {
+                    clearInterval(webSocket.statsInterval);
+                }
+                webSocket.close();
+            }
+        }
+
+        // Event Listeners
+        document.getElementById('webrtc-button').addEventListener('click', () => {
+            const button = document.getElementById('webrtc-button');
+            if (button.textContent === 'Connect WebRTC') {
+                setupWebRTC();
+            } else {
+                webrtc_stop();
+                button.textContent = 'Connect WebRTC';
+            }
+        });
+        const ws_start_button = document.getElementById('ws-button')
+        ws_start_button.addEventListener('click', () => {
+            if (ws_start_button.textContent === 'Connect WebSocket') {
+                setupWebSocket();
+                ws_start_button.textContent = 'Stop';
+            } else {
+                ws_stop();
+                ws_start_button.textContent = 'Connect WebSocket';
+            }
+        });
+        document.addEventListener("beforeunload", () => {
+            ws_stop();
+            webrtc_stop();
+        });
+    </script>
+</body>
+
+</html>
--- a/demo/webrtc_vs_websocket/requirements.txt
+++ b/demo/webrtc_vs_websocket/requirements.txt
@@ -0,0 +1,6 @@
+fastrtc[vad]
+elevenlabs
+groq
+anthropic
+twilio
+python-dotenv
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`<svg fill="none" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg"><path d="M14.5 13.5V5.41a1 1 0 0 0-.3-.7L9.8.29A1 1 0 0 0 9.08 0H1.5v13.5A2.5 2.5 0 0 0 4 16h8a2.5 2.5 0 0 0 2.5-2.5m-1.5 0v-7H8v-5H3v12a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1M9.5 5V2.12L12.38 5zM5.13 5h-.62v1.25h2.12V5zm-.62 3h7.12v1.25H4.5zm.62 3h-.62v1.25h7.12V11z" clip-rule="evenodd" fill="#666" fill-rule="evenodd"/></svg>`
				`@@ -0,0 +1 @@`
				<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><g clip-path="url(#a)"><path fill-rule="evenodd" clip-rule="evenodd" d="M10.27 14.1a6.5 6.5 0 0 0 3.67-3.45q-1.24.21-2.7.34-.31 1.83-.97 3.1M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16m.48-1.52a7 7 0 0 1-.96 0H7.5a4 4 0 0 1-.84-1.32q-.38-.89-.63-2.08a40 40 0 0 0 3.92 0q-.25 1.2-.63 2.08a4 4 0 0 1-.84 1.31zm2.94-4.76q1.66-.15 2.95-.43a7 7 0 0 0 0-2.58q-1.3-.27-2.95-.43a18 18 0 0 1 0 3.44m-1.27-3.54a17 17 0 0 1 0 3.64 39 39 0 0 1-4.3 0 17 17 0 0 1 0-3.64 39 39 0 0 1 4.3 0m1.1-1.17q1.45.13 2.69.34a6.5 6.5 0 0 0-3.67-3.44q.65 1.26.98 3.1M8.48 1.5l.01.02q.41.37.84 1.31.38.89.63 2.08a40 40 0 0 0-3.92 0q.25-1.2.63-2.08a4 4 0 0 1 .85-1.32 7 7 0 0 1 .96 0m-2.75.4a6.5 6.5 0 0 0-3.67 3.44 29 29 0 0 1 2.7-.34q.31-1.83.97-3.1M4.58 6.28q-1.66.16-2.95.43a7 7 0 0 0 0 2.58q1.3.27 2.95.43a18 18 0 0 1 0-3.44m.17 4.71q-1.45-.12-2.69-.34a6.5 6.5 0 0 0 3.67 3.44q-.65-1.27-.98-3.1" fill="#666"/></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h16v16H0z"/></clipPath></defs></svg>
				`@@ -0,0 +1 @@`
				<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>
				`@@ -0,0 +1 @@`
				`<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1155 1000"><path d="m577.3 0 577.4 1000H0z" fill="#fff"/></svg>`
				`@@ -0,0 +1 @@`
				`uvicorn backend.server:app --host 0.0.0.0 --port 8000`