Rebrand to FastRTC (#60)

* Add code * add code * add code * Rename messages * rename * add code * Add demo * docs + demos + bug fixes * add code * styles * user guide * Styles * Add code * misc docs updates * print nit * whisper + pr * url for images * whsiper update * Fix bugs * remove demo files * version number * Fix pypi readme * Fix * demos * Add llama code editor * Update llama code editor and object detection cookbook * Add more cookbook demos * add code * Fix links for PR deploys * add code * Fix the install * add tts * TTS docs * Typo * Pending bubbles for reply on pause * Stream redesign (#63) * better error handling * Websocket error handling * add code --------- Co-authored-by: Freddy Boulton <freddyboulton@hf-freddy.local> * remove docs from dist * Some docs typos * more typos * upload changes + docs * docs * better phone * update docs * add code * Make demos better * fix docs + websocket start_up * remove mention of FastAPI app * fastphone tweaks * add code * ReplyOnStopWord fixes * Fix cookbook * Fix pypi readme * add code * bump versions * sambanova cookbook * Fix tags * Llm voice chat * kyutai tag * Add error message to all index.html * STT module uses Moonshine * Not required from typing extensions * fix llm voice chat * Add vpn warning * demo fixes * demos * Add more ui args and gemini audio-video * update cookbook * version 9 --------- Co-authored-by: Freddy Boulton <freddyboulton@hf-freddy.local>
2026-02-05 18:09:23 +08:00 · 2025-02-24 01:13:42 -05:00
parent 36190066ec
commit 853d6a06b5
131 changed files with 12349 additions and 4741 deletions
--- a/demo/hello_computer/app.py
+++ b/demo/hello_computer/app.py
@@ -0,0 +1,153 @@
+import base64
+import json
+import os
+from pathlib import Path
+
+import gradio as gr
+import numpy as np
+import openai
+from dotenv import load_dotenv
+from fastapi import FastAPI
+from fastapi.responses import HTMLResponse, StreamingResponse
+from fastrtc import (
+    AdditionalOutputs,
+    ReplyOnStopWords,
+    Stream,
+    WebRTCError,
+    get_stt_model,
+    get_twilio_turn_credentials,
+)
+from gradio.utils import get_space
+from pydantic import BaseModel
+
+load_dotenv()
+
+curr_dir = Path(__file__).parent
+
+
+client = openai.OpenAI(
+    api_key=os.environ.get("SAMBANOVA_API_KEY"),
+    base_url="https://api.sambanova.ai/v1",
+)
+model = get_stt_model()
+
+
+def response(
+    audio: tuple[int, np.ndarray],
+    gradio_chatbot: list[dict] | None = None,
+    conversation_state: list[dict] | None = None,
+):
+    gradio_chatbot = gradio_chatbot or []
+    conversation_state = conversation_state or []
+    try:
+        text = model.stt(audio)
+        print("STT in handler", text)
+        sample_rate, array = audio
+        gradio_chatbot.append(
+            {"role": "user", "content": gr.Audio((sample_rate, array.squeeze()))}
+        )
+        yield AdditionalOutputs(gradio_chatbot, conversation_state)
+
+        conversation_state.append({"role": "user", "content": text})
+
+        request = client.chat.completions.create(
+            model="Meta-Llama-3.2-3B-Instruct",
+            messages=conversation_state,  # type: ignore
+            temperature=0.1,
+            top_p=0.1,
+        )
+        response = {"role": "assistant", "content": request.choices[0].message.content}
+
+    except Exception as e:
+        import traceback
+
+        traceback.print_exc()
+        raise WebRTCError(str(e) + "\n" + traceback.format_exc())
+
+    conversation_state.append(response)
+    gradio_chatbot.append(response)
+
+    yield AdditionalOutputs(gradio_chatbot, conversation_state)
+
+
+chatbot = gr.Chatbot(type="messages", value=[])
+state = gr.State(value=[])
+stream = Stream(
+    ReplyOnStopWords(
+        response,  # type: ignore
+        stop_words=["computer"],
+        input_sample_rate=16000,
+    ),
+    mode="send",
+    modality="audio",
+    additional_inputs=[chatbot, state],
+    additional_outputs=[chatbot, state],
+    additional_outputs_handler=lambda *a: (a[2], a[3]),
+    concurrency_limit=5 if get_space() else None,
+    time_limit=90 if get_space() else None,
+    rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
+)
+app = FastAPI()
+stream.mount(app)
+
+
+class Message(BaseModel):
+    role: str
+    content: str
+
+
+class InputData(BaseModel):
+    webrtc_id: str
+    chatbot: list[Message]
+    state: list[Message]
+
+
+@app.get("/")
+async def _():
+    rtc_config = get_twilio_turn_credentials() if get_space() else None
+    html_content = (curr_dir / "index.html").read_text()
+    html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
+    return HTMLResponse(content=html_content)
+
+
+@app.post("/input_hook")
+async def _(data: InputData):
+    body = data.model_dump()
+    stream.set_input(data.webrtc_id, body["chatbot"], body["state"])
+
+
+def audio_to_base64(file_path):
+    audio_format = "wav"
+    with open(file_path, "rb") as audio_file:
+        encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8")
+    return f"data:audio/{audio_format};base64,{encoded_audio}"
+
+
+@app.get("/outputs")
+async def _(webrtc_id: str):
+    async def output_stream():
+        async for output in stream.output_stream(webrtc_id):
+            chatbot = output.args[0]
+            state = output.args[1]
+            data = {
+                "message": state[-1],
+                "audio": audio_to_base64(chatbot[-1]["content"].value["path"])
+                if chatbot[-1]["role"] == "user"
+                else None,
+            }
+            yield f"event: output\ndata: {json.dumps(data)}\n\n"
+
+    return StreamingResponse(output_stream(), media_type="text/event-stream")
+
+
+if __name__ == "__main__":
+    import os
+
+    if (mode := os.getenv("MODE")) == "UI":
+        stream.ui.launch(server_port=7860)
+    elif mode == "PHONE":
+        raise ValueError("Phone mode not supported")
+    else:
+        import uvicorn
+
+        uvicorn.run(app, host="0.0.0.0", port=7860)