[feat] update some feature

sync code of fastrtc, add text support through datachannel, fix safari connect problem support chat without camera or mic
2026-02-05 01:49:23 +08:00 · 2025-03-25 18:05:10 +08:00
parent e1fb40a8a8
commit aefb08150f
222 changed files with 28698 additions and 5889 deletions
--- a/demo/llama_code_editor/README.md
+++ b/demo/llama_code_editor/README.md
@@ -0,0 +1,16 @@
+---
+title: Llama Code Editor
+emoji: 🦙
+colorFrom: indigo
+colorTo: pink
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Create interactive HTML web pages with your voice
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN,
+secret|SAMBANOVA_API_KEY, secret|GROQ_API_KEY]
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/llama_code_editor/app.py
+++ b/demo/llama_code_editor/app.py
@@ -0,0 +1,45 @@
+from fastapi import FastAPI
+from fastapi.responses import RedirectResponse
+from fastrtc import Stream
+from gradio.utils import get_space
+
+try:
+    from demo.llama_code_editor.handler import (
+        CodeHandler,
+    )
+    from demo.llama_code_editor.ui import demo as ui
+except (ImportError, ModuleNotFoundError):
+    from handler import CodeHandler
+    from ui import demo as ui
+
+
+stream = Stream(
+    handler=CodeHandler,
+    modality="audio",
+    mode="send-receive",
+    concurrency_limit=10 if get_space() else None,
+    time_limit=90 if get_space() else None,
+)
+
+stream.ui = ui
+
+app = FastAPI()
+
+
+@app.get("/")
+async def _():
+    url = "/ui" if not get_space() else "https://fastrtc-llama-code-editor.hf.space/ui/"
+    return RedirectResponse(url)
+
+
+if __name__ == "__main__":
+    import os
+
+    if (mode := os.getenv("MODE")) == "UI":
+        stream.ui.launch(server_port=7860, server_name="0.0.0.0")
+    elif mode == "PHONE":
+        stream.fastphone(host="0.0.0.0", port=7860)
+    else:
+        import uvicorn
+
+        uvicorn.run(app, host="0.0.0.0", port=7860)
--- a/demo/llama_code_editor/assets/sandbox.html
+++ b/demo/llama_code_editor/assets/sandbox.html
@@ -0,0 +1,37 @@
+<div style="
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  justify-content: center;
+  min-height: 400px;
+  background: linear-gradient(135deg, #f5f7fa 0%, #e4e8ec 100%);
+  border-radius: 8px;
+  border: 2px dashed #cbd5e1;
+  padding: 2rem;
+  text-align: center;
+  color: #64748b;
+  font-family: system-ui, -apple-system, sans-serif;
+">
+  <div style="
+    width: 80px;
+    height: 80px;
+    margin-bottom: 1.5rem;
+    border: 3px solid #cbd5e1;
+    border-radius: 12px;
+    position: relative;
+  ">
+    <div style="
+      position: absolute;
+      top: 50%;
+      left: 50%;
+      transform: translate(-50%, -50%);
+      font-size: 2rem;
+    ">📦</div>
+  </div>
+  <h2 style="
+    margin: 0 0 0.5rem 0;
+    font-size: 1.5rem;
+    font-weight: 600;
+    color: #475569;
+  ">No Application Created</h2>
+</div>
--- a/demo/llama_code_editor/assets/spinner.html
+++ b/demo/llama_code_editor/assets/spinner.html
@@ -0,0 +1,60 @@
+<div style="
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  justify-content: center;
+  min-height: 400px;
+  background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%);
+  border-radius: 8px;
+  padding: 2rem;
+  text-align: center;
+  font-family: system-ui, -apple-system, sans-serif;
+">
+  <!-- Spinner container -->
+  <div style="
+    position: relative;
+    width: 64px;
+    height: 64px;
+    margin-bottom: 1.5rem;
+  ">
+    <!-- Static ring -->
+    <div style="
+      position: absolute;
+      width: 100%;
+      height: 100%;
+      border: 4px solid #e2e8f0;
+      border-radius: 50%;
+    "></div>
+    <!-- Animated spinner -->
+    <div style="
+      position: absolute;
+      width: 100%;
+      height: 100%;
+      border: 4px solid transparent;
+      border-top-color: #3b82f6;
+      border-radius: 50%;
+      animation: spin 1s linear infinite;
+    "></div>
+  </div>
+
+  <!-- Text content -->
+  <h2 style="
+    margin: 0 0 0.5rem 0;
+    font-size: 1.25rem;
+    font-weight: 600;
+    color: #475569;
+  ">Generating your application...</h2>
+  
+  <p style="
+    margin: 0;
+    font-size: 0.875rem;
+    color: #64748b;
+  ">This may take a few moments</p>
+
+  <style>
+    @keyframes spin {
+      0% { transform: rotate(0deg); }
+      100% { transform: rotate(360deg); }
+    }
+  </style>
+</div>
--- a/demo/llama_code_editor/handler.py
+++ b/demo/llama_code_editor/handler.py
@@ -0,0 +1,73 @@
+import base64
+import os
+import re
+from pathlib import Path
+
+import numpy as np
+import openai
+from dotenv import load_dotenv
+from fastrtc import (
+    AdditionalOutputs,
+    ReplyOnPause,
+    audio_to_bytes,
+)
+from groq import Groq
+
+load_dotenv()
+
+groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+
+client = openai.OpenAI(
+    api_key=os.environ.get("SAMBANOVA_API_KEY"),
+    base_url="https://api.sambanova.ai/v1",
+)
+
+path = Path(__file__).parent / "assets"
+
+spinner_html = open(path / "spinner.html").read()
+
+
+system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response."
+user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}"
+
+
+def extract_html_content(text):
+    """
+    Extract content including HTML tags.
+    """
+    match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL)
+    return match.group(0) if match else None
+
+
+def display_in_sandbox(code):
+    encoded_html = base64.b64encode(code.encode("utf-8")).decode("utf-8")
+    data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
+    return f'<iframe src="{data_uri}" width="100%" height="600px"></iframe>'
+
+
+def generate(user_message: tuple[int, np.ndarray], history: list[dict], code: str):
+    yield AdditionalOutputs(history, spinner_html)
+
+    text = groq_client.audio.transcriptions.create(
+        file=("audio-file.mp3", audio_to_bytes(user_message)),
+        model="whisper-large-v3-turbo",
+        response_format="verbose_json",
+    ).text
+
+    user_msg_formatted = user_prompt.format(user_message=text, code=code)
+    history.append({"role": "user", "content": user_msg_formatted})
+
+    response = client.chat.completions.create(
+        model="Meta-Llama-3.1-70B-Instruct",
+        messages=history,  # type: ignore
+        temperature=0.1,
+        top_p=0.1,
+    )
+
+    output = response.choices[0].message.content
+    html_code = extract_html_content(output)
+    history.append({"role": "assistant", "content": output})
+    yield AdditionalOutputs(history, html_code)
+
+
+CodeHandler = ReplyOnPause(generate)  # type: ignore
--- a/demo/llama_code_editor/requirements.in
+++ b/demo/llama_code_editor/requirements.in
@@ -0,0 +1,5 @@
+fastrtc[vad]
+groq
+openai
+python-dotenv
+twilio
--- a/demo/llama_code_editor/requirements.txt
+++ b/demo/llama_code_editor/requirements.txt
@@ -0,0 +1,295 @@
+# This file was autogenerated by uv via the following command:
+#    uv pip compile demo/llama_code_editor/requirements.in -o demo/llama_code_editor/requirements.txt
+aiofiles==23.2.1
+    # via gradio
+aiohappyeyeballs==2.4.6
+    # via aiohttp
+aiohttp==3.11.12
+    # via
+    #   aiohttp-retry
+    #   twilio
+aiohttp-retry==2.9.1
+    # via twilio
+aioice==0.9.0
+    # via aiortc
+aiortc==1.10.1
+    # via fastrtc
+aiosignal==1.3.2
+    # via aiohttp
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.6.2.post1
+    # via
+    #   gradio
+    #   groq
+    #   httpx
+    #   openai
+    #   starlette
+attrs==25.1.0
+    # via aiohttp
+audioread==3.0.1
+    # via librosa
+av==12.3.0
+    # via aiortc
+certifi==2024.8.30
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+cffi==1.17.1
+    # via
+    #   aiortc
+    #   cryptography
+    #   pylibsrtp
+    #   soundfile
+charset-normalizer==3.4.0
+    # via requests
+click==8.1.7
+    # via
+    #   typer
+    #   uvicorn
+coloredlogs==15.0.1
+    # via onnxruntime
+cryptography==43.0.3
+    # via
+    #   aiortc
+    #   pyopenssl
+decorator==5.1.1
+    # via librosa
+distro==1.9.0
+    # via
+    #   groq
+    #   openai
+dnspython==2.7.0
+    # via aioice
+fastapi==0.115.5
+    # via gradio
+fastrtc==0.0.2.post4
+    # via -r demo/llama_code_editor/requirements.in
+ffmpy==0.4.0
+    # via gradio
+filelock==3.16.1
+    # via huggingface-hub
+flatbuffers==24.3.25
+    # via onnxruntime
+frozenlist==1.5.0
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2024.10.0
+    # via
+    #   gradio-client
+    #   huggingface-hub
+google-crc32c==1.6.0
+    # via aiortc
+gradio==5.16.0
+    # via fastrtc
+gradio-client==1.7.0
+    # via gradio
+groq==0.18.0
+    # via -r demo/llama_code_editor/requirements.in
+h11==0.14.0
+    # via
+    #   httpcore
+    #   uvicorn
+httpcore==1.0.7
+    # via httpx
+httpx==0.27.2
+    # via
+    #   gradio
+    #   gradio-client
+    #   groq
+    #   openai
+    #   safehttpx
+huggingface-hub==0.28.1
+    # via
+    #   gradio
+    #   gradio-client
+humanfriendly==10.0
+    # via coloredlogs
+idna==3.10
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+    #   yarl
+ifaddr==0.2.0
+    # via aioice
+jinja2==3.1.4
+    # via gradio
+jiter==0.7.1
+    # via openai
+joblib==1.4.2
+    # via
+    #   librosa
+    #   scikit-learn
+lazy-loader==0.4
+    # via librosa
+librosa==0.10.2.post1
+    # via fastrtc
+llvmlite==0.43.0
+    # via numba
+markdown-it-py==3.0.0
+    # via rich
+markupsafe==2.1.5
+    # via
+    #   gradio
+    #   jinja2
+mdurl==0.1.2
+    # via markdown-it-py
+mpmath==1.3.0
+    # via sympy
+msgpack==1.1.0
+    # via librosa
+multidict==6.1.0
+    # via
+    #   aiohttp
+    #   yarl
+numba==0.60.0
+    # via librosa
+numpy==2.0.2
+    # via
+    #   gradio
+    #   librosa
+    #   numba
+    #   onnxruntime
+    #   pandas
+    #   scikit-learn
+    #   scipy
+    #   soxr
+onnxruntime==1.20.1
+    # via fastrtc
+openai==1.54.4
+    # via -r demo/llama_code_editor/requirements.in
+orjson==3.10.11
+    # via gradio
+packaging==24.2
+    # via
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   lazy-loader
+    #   onnxruntime
+    #   pooch
+pandas==2.2.3
+    # via gradio
+pillow==11.0.0
+    # via gradio
+platformdirs==4.3.6
+    # via pooch
+pooch==1.8.2
+    # via librosa
+propcache==0.2.1
+    # via
+    #   aiohttp
+    #   yarl
+protobuf==5.28.3
+    # via onnxruntime
+pycparser==2.22
+    # via cffi
+pydantic==2.9.2
+    # via
+    #   fastapi
+    #   gradio
+    #   groq
+    #   openai
+pydantic-core==2.23.4
+    # via pydantic
+pydub==0.25.1
+    # via gradio
+pyee==12.1.1
+    # via aiortc
+pygments==2.18.0
+    # via rich
+pyjwt==2.10.1
+    # via twilio
+pylibsrtp==0.10.0
+    # via aiortc
+pyopenssl==24.2.1
+    # via aiortc
+python-dateutil==2.9.0.post0
+    # via pandas
+python-dotenv==1.0.1
+    # via -r demo/llama_code_editor/requirements.in
+python-multipart==0.0.20
+    # via gradio
+pytz==2024.2
+    # via pandas
+pyyaml==6.0.2
+    # via
+    #   gradio
+    #   huggingface-hub
+requests==2.32.3
+    # via
+    #   huggingface-hub
+    #   pooch
+    #   twilio
+rich==13.9.4
+    # via typer
+ruff==0.9.6
+    # via gradio
+safehttpx==0.1.6
+    # via gradio
+scikit-learn==1.5.2
+    # via librosa
+scipy==1.14.1
+    # via
+    #   librosa
+    #   scikit-learn
+semantic-version==2.10.0
+    # via gradio
+shellingham==1.5.4
+    # via typer
+six==1.16.0
+    # via python-dateutil
+sniffio==1.3.1
+    # via
+    #   anyio
+    #   groq
+    #   httpx
+    #   openai
+soundfile==0.12.1
+    # via librosa
+soxr==0.5.0.post1
+    # via librosa
+starlette==0.41.3
+    # via
+    #   fastapi
+    #   gradio
+sympy==1.13.3
+    # via onnxruntime
+threadpoolctl==3.5.0
+    # via scikit-learn
+tomlkit==0.12.0
+    # via gradio
+tqdm==4.67.0
+    # via
+    #   huggingface-hub
+    #   openai
+twilio==9.4.5
+    # via -r demo/llama_code_editor/requirements.in
+typer==0.13.1
+    # via gradio
+typing-extensions==4.12.2
+    # via
+    #   fastapi
+    #   gradio
+    #   gradio-client
+    #   groq
+    #   huggingface-hub
+    #   librosa
+    #   openai
+    #   pydantic
+    #   pydantic-core
+    #   pyee
+    #   typer
+tzdata==2024.2
+    # via pandas
+urllib3==2.2.3
+    # via requests
+uvicorn==0.32.0
+    # via gradio
+websockets==12.0
+    # via gradio-client
+yarl==1.18.3
+    # via aiohttp
--- a/demo/llama_code_editor/ui.py
+++ b/demo/llama_code_editor/ui.py
@@ -0,0 +1,75 @@
+from pathlib import Path
+
+import gradio as gr
+from dotenv import load_dotenv
+from fastrtc import WebRTC, get_twilio_turn_credentials
+from gradio.utils import get_space
+
+try:
+    from demo.llama_code_editor.handler import (
+        CodeHandler,
+        display_in_sandbox,
+        system_prompt,
+    )
+except (ImportError, ModuleNotFoundError):
+    from handler import CodeHandler, display_in_sandbox, system_prompt
+
+load_dotenv()
+
+path = Path(__file__).parent / "assets"
+
+with gr.Blocks(css=".code-component {max-height: 500px !important}") as demo:
+    history = gr.State([{"role": "system", "content": system_prompt}])
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.HTML(
+                """
+                <h1 style='text-align: center'>
+                Llama Code Editor
+                </h1>
+                <h2 style='text-align: center'>
+                Powered by SambaNova and Gradio-WebRTC ⚡️
+                </h2>
+                <p style='text-align: center'>
+                Create and edit single-file HTML applications with just your voice!
+                </p>
+                <p style='text-align: center'>
+                Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.
+                </p>
+                """
+            )
+            webrtc = WebRTC(
+                rtc_configuration=get_twilio_turn_credentials()
+                if get_space()
+                else None,
+                mode="send",
+                modality="audio",
+            )
+        with gr.Column(scale=10):
+            with gr.Tabs():
+                with gr.Tab("Sandbox"):
+                    sandbox = gr.HTML(value=open(path / "sandbox.html").read())
+                with gr.Tab("Code"):
+                    code = gr.Code(
+                        language="html",
+                        max_lines=50,
+                        interactive=False,
+                        elem_classes="code-component",
+                    )
+                with gr.Tab("Chat"):
+                    cb = gr.Chatbot(type="messages")
+
+    webrtc.stream(
+        CodeHandler,
+        inputs=[webrtc, history, code],
+        outputs=[webrtc],
+        time_limit=90 if get_space() else None,
+        concurrency_limit=10 if get_space() else None,
+    )
+    webrtc.on_additional_outputs(
+        lambda history, code: (history, code, history), outputs=[history, code, cb]
+    )
+    code.change(display_in_sandbox, code, sandbox, queue=False)
+
+if __name__ == "__main__":
+    demo.launch()