[feat] update some feature

sync code of  fastrtc,
add text support through datachannel,
fix safari connect problem
support chat without camera or mic
This commit is contained in:
huangbinchao.hbc
2025-03-25 18:05:10 +08:00
parent e1fb40a8a8
commit aefb08150f
222 changed files with 28698 additions and 5889 deletions

View File

@@ -0,0 +1,16 @@
---
title: Llama Code Editor
emoji: 🦙
colorFrom: indigo
colorTo: pink
sdk: gradio
sdk_version: 5.16.0
app_file: app.py
pinned: false
license: mit
short_description: Create interactive HTML web pages with your voice
tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN,
secret|SAMBANOVA_API_KEY, secret|GROQ_API_KEY]
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

View File

@@ -0,0 +1,45 @@
from fastapi import FastAPI
from fastapi.responses import RedirectResponse
from fastrtc import Stream
from gradio.utils import get_space
try:
from demo.llama_code_editor.handler import (
CodeHandler,
)
from demo.llama_code_editor.ui import demo as ui
except (ImportError, ModuleNotFoundError):
from handler import CodeHandler
from ui import demo as ui
stream = Stream(
handler=CodeHandler,
modality="audio",
mode="send-receive",
concurrency_limit=10 if get_space() else None,
time_limit=90 if get_space() else None,
)
stream.ui = ui
app = FastAPI()
@app.get("/")
async def _():
url = "/ui" if not get_space() else "https://fastrtc-llama-code-editor.hf.space/ui/"
return RedirectResponse(url)
if __name__ == "__main__":
import os
if (mode := os.getenv("MODE")) == "UI":
stream.ui.launch(server_port=7860, server_name="0.0.0.0")
elif mode == "PHONE":
stream.fastphone(host="0.0.0.0", port=7860)
else:
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)

View File

@@ -0,0 +1,37 @@
<div style="
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
min-height: 400px;
background: linear-gradient(135deg, #f5f7fa 0%, #e4e8ec 100%);
border-radius: 8px;
border: 2px dashed #cbd5e1;
padding: 2rem;
text-align: center;
color: #64748b;
font-family: system-ui, -apple-system, sans-serif;
">
<div style="
width: 80px;
height: 80px;
margin-bottom: 1.5rem;
border: 3px solid #cbd5e1;
border-radius: 12px;
position: relative;
">
<div style="
position: absolute;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
font-size: 2rem;
">📦</div>
</div>
<h2 style="
margin: 0 0 0.5rem 0;
font-size: 1.5rem;
font-weight: 600;
color: #475569;
">No Application Created</h2>
</div>

View File

@@ -0,0 +1,60 @@
<div style="
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
min-height: 400px;
background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%);
border-radius: 8px;
padding: 2rem;
text-align: center;
font-family: system-ui, -apple-system, sans-serif;
">
<!-- Spinner container -->
<div style="
position: relative;
width: 64px;
height: 64px;
margin-bottom: 1.5rem;
">
<!-- Static ring -->
<div style="
position: absolute;
width: 100%;
height: 100%;
border: 4px solid #e2e8f0;
border-radius: 50%;
"></div>
<!-- Animated spinner -->
<div style="
position: absolute;
width: 100%;
height: 100%;
border: 4px solid transparent;
border-top-color: #3b82f6;
border-radius: 50%;
animation: spin 1s linear infinite;
"></div>
</div>
<!-- Text content -->
<h2 style="
margin: 0 0 0.5rem 0;
font-size: 1.25rem;
font-weight: 600;
color: #475569;
">Generating your application...</h2>
<p style="
margin: 0;
font-size: 0.875rem;
color: #64748b;
">This may take a few moments</p>
<style>
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
</style>
</div>

View File

@@ -0,0 +1,73 @@
import base64
import os
import re
from pathlib import Path
import numpy as np
import openai
from dotenv import load_dotenv
from fastrtc import (
AdditionalOutputs,
ReplyOnPause,
audio_to_bytes,
)
from groq import Groq
load_dotenv()
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
client = openai.OpenAI(
api_key=os.environ.get("SAMBANOVA_API_KEY"),
base_url="https://api.sambanova.ai/v1",
)
path = Path(__file__).parent / "assets"
spinner_html = open(path / "spinner.html").read()
system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response."
user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}"
def extract_html_content(text):
"""
Extract content including HTML tags.
"""
match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL)
return match.group(0) if match else None
def display_in_sandbox(code):
encoded_html = base64.b64encode(code.encode("utf-8")).decode("utf-8")
data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
return f'<iframe src="{data_uri}" width="100%" height="600px"></iframe>'
def generate(user_message: tuple[int, np.ndarray], history: list[dict], code: str):
yield AdditionalOutputs(history, spinner_html)
text = groq_client.audio.transcriptions.create(
file=("audio-file.mp3", audio_to_bytes(user_message)),
model="whisper-large-v3-turbo",
response_format="verbose_json",
).text
user_msg_formatted = user_prompt.format(user_message=text, code=code)
history.append({"role": "user", "content": user_msg_formatted})
response = client.chat.completions.create(
model="Meta-Llama-3.1-70B-Instruct",
messages=history, # type: ignore
temperature=0.1,
top_p=0.1,
)
output = response.choices[0].message.content
html_code = extract_html_content(output)
history.append({"role": "assistant", "content": output})
yield AdditionalOutputs(history, html_code)
CodeHandler = ReplyOnPause(generate) # type: ignore

View File

@@ -0,0 +1,5 @@
fastrtc[vad]
groq
openai
python-dotenv
twilio

View File

@@ -0,0 +1,295 @@
# This file was autogenerated by uv via the following command:
# uv pip compile demo/llama_code_editor/requirements.in -o demo/llama_code_editor/requirements.txt
aiofiles==23.2.1
# via gradio
aiohappyeyeballs==2.4.6
# via aiohttp
aiohttp==3.11.12
# via
# aiohttp-retry
# twilio
aiohttp-retry==2.9.1
# via twilio
aioice==0.9.0
# via aiortc
aiortc==1.10.1
# via fastrtc
aiosignal==1.3.2
# via aiohttp
annotated-types==0.7.0
# via pydantic
anyio==4.6.2.post1
# via
# gradio
# groq
# httpx
# openai
# starlette
attrs==25.1.0
# via aiohttp
audioread==3.0.1
# via librosa
av==12.3.0
# via aiortc
certifi==2024.8.30
# via
# httpcore
# httpx
# requests
cffi==1.17.1
# via
# aiortc
# cryptography
# pylibsrtp
# soundfile
charset-normalizer==3.4.0
# via requests
click==8.1.7
# via
# typer
# uvicorn
coloredlogs==15.0.1
# via onnxruntime
cryptography==43.0.3
# via
# aiortc
# pyopenssl
decorator==5.1.1
# via librosa
distro==1.9.0
# via
# groq
# openai
dnspython==2.7.0
# via aioice
fastapi==0.115.5
# via gradio
fastrtc==0.0.2.post4
# via -r demo/llama_code_editor/requirements.in
ffmpy==0.4.0
# via gradio
filelock==3.16.1
# via huggingface-hub
flatbuffers==24.3.25
# via onnxruntime
frozenlist==1.5.0
# via
# aiohttp
# aiosignal
fsspec==2024.10.0
# via
# gradio-client
# huggingface-hub
google-crc32c==1.6.0
# via aiortc
gradio==5.16.0
# via fastrtc
gradio-client==1.7.0
# via gradio
groq==0.18.0
# via -r demo/llama_code_editor/requirements.in
h11==0.14.0
# via
# httpcore
# uvicorn
httpcore==1.0.7
# via httpx
httpx==0.27.2
# via
# gradio
# gradio-client
# groq
# openai
# safehttpx
huggingface-hub==0.28.1
# via
# gradio
# gradio-client
humanfriendly==10.0
# via coloredlogs
idna==3.10
# via
# anyio
# httpx
# requests
# yarl
ifaddr==0.2.0
# via aioice
jinja2==3.1.4
# via gradio
jiter==0.7.1
# via openai
joblib==1.4.2
# via
# librosa
# scikit-learn
lazy-loader==0.4
# via librosa
librosa==0.10.2.post1
# via fastrtc
llvmlite==0.43.0
# via numba
markdown-it-py==3.0.0
# via rich
markupsafe==2.1.5
# via
# gradio
# jinja2
mdurl==0.1.2
# via markdown-it-py
mpmath==1.3.0
# via sympy
msgpack==1.1.0
# via librosa
multidict==6.1.0
# via
# aiohttp
# yarl
numba==0.60.0
# via librosa
numpy==2.0.2
# via
# gradio
# librosa
# numba
# onnxruntime
# pandas
# scikit-learn
# scipy
# soxr
onnxruntime==1.20.1
# via fastrtc
openai==1.54.4
# via -r demo/llama_code_editor/requirements.in
orjson==3.10.11
# via gradio
packaging==24.2
# via
# gradio
# gradio-client
# huggingface-hub
# lazy-loader
# onnxruntime
# pooch
pandas==2.2.3
# via gradio
pillow==11.0.0
# via gradio
platformdirs==4.3.6
# via pooch
pooch==1.8.2
# via librosa
propcache==0.2.1
# via
# aiohttp
# yarl
protobuf==5.28.3
# via onnxruntime
pycparser==2.22
# via cffi
pydantic==2.9.2
# via
# fastapi
# gradio
# groq
# openai
pydantic-core==2.23.4
# via pydantic
pydub==0.25.1
# via gradio
pyee==12.1.1
# via aiortc
pygments==2.18.0
# via rich
pyjwt==2.10.1
# via twilio
pylibsrtp==0.10.0
# via aiortc
pyopenssl==24.2.1
# via aiortc
python-dateutil==2.9.0.post0
# via pandas
python-dotenv==1.0.1
# via -r demo/llama_code_editor/requirements.in
python-multipart==0.0.20
# via gradio
pytz==2024.2
# via pandas
pyyaml==6.0.2
# via
# gradio
# huggingface-hub
requests==2.32.3
# via
# huggingface-hub
# pooch
# twilio
rich==13.9.4
# via typer
ruff==0.9.6
# via gradio
safehttpx==0.1.6
# via gradio
scikit-learn==1.5.2
# via librosa
scipy==1.14.1
# via
# librosa
# scikit-learn
semantic-version==2.10.0
# via gradio
shellingham==1.5.4
# via typer
six==1.16.0
# via python-dateutil
sniffio==1.3.1
# via
# anyio
# groq
# httpx
# openai
soundfile==0.12.1
# via librosa
soxr==0.5.0.post1
# via librosa
starlette==0.41.3
# via
# fastapi
# gradio
sympy==1.13.3
# via onnxruntime
threadpoolctl==3.5.0
# via scikit-learn
tomlkit==0.12.0
# via gradio
tqdm==4.67.0
# via
# huggingface-hub
# openai
twilio==9.4.5
# via -r demo/llama_code_editor/requirements.in
typer==0.13.1
# via gradio
typing-extensions==4.12.2
# via
# fastapi
# gradio
# gradio-client
# groq
# huggingface-hub
# librosa
# openai
# pydantic
# pydantic-core
# pyee
# typer
tzdata==2024.2
# via pandas
urllib3==2.2.3
# via requests
uvicorn==0.32.0
# via gradio
websockets==12.0
# via gradio-client
yarl==1.18.3
# via aiohttp

View File

@@ -0,0 +1,75 @@
from pathlib import Path
import gradio as gr
from dotenv import load_dotenv
from fastrtc import WebRTC, get_twilio_turn_credentials
from gradio.utils import get_space
try:
from demo.llama_code_editor.handler import (
CodeHandler,
display_in_sandbox,
system_prompt,
)
except (ImportError, ModuleNotFoundError):
from handler import CodeHandler, display_in_sandbox, system_prompt
load_dotenv()
path = Path(__file__).parent / "assets"
with gr.Blocks(css=".code-component {max-height: 500px !important}") as demo:
history = gr.State([{"role": "system", "content": system_prompt}])
with gr.Row():
with gr.Column(scale=1):
gr.HTML(
"""
<h1 style='text-align: center'>
Llama Code Editor
</h1>
<h2 style='text-align: center'>
Powered by SambaNova and Gradio-WebRTC ⚡️
</h2>
<p style='text-align: center'>
Create and edit single-file HTML applications with just your voice!
</p>
<p style='text-align: center'>
Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.
</p>
"""
)
webrtc = WebRTC(
rtc_configuration=get_twilio_turn_credentials()
if get_space()
else None,
mode="send",
modality="audio",
)
with gr.Column(scale=10):
with gr.Tabs():
with gr.Tab("Sandbox"):
sandbox = gr.HTML(value=open(path / "sandbox.html").read())
with gr.Tab("Code"):
code = gr.Code(
language="html",
max_lines=50,
interactive=False,
elem_classes="code-component",
)
with gr.Tab("Chat"):
cb = gr.Chatbot(type="messages")
webrtc.stream(
CodeHandler,
inputs=[webrtc, history, code],
outputs=[webrtc],
time_limit=90 if get_space() else None,
concurrency_limit=10 if get_space() else None,
)
webrtc.on_additional_outputs(
lambda history, code: (history, code, history), outputs=[history, code, cb]
)
code.change(display_in_sandbox, code, sandbox, queue=False)
if __name__ == "__main__":
demo.launch()