mirror of
https://github.com/HumanAIGC-Engineering/gradio-webrtc.git
synced 2026-02-05 18:09:23 +08:00
[feat] update some feature
sync code of fastrtc, add text support through datachannel, fix safari connect problem support chat without camera or mic
This commit is contained in:
99
demo/talk_to_smolagents/app.py
Normal file
99
demo/talk_to_smolagents/app.py
Normal file
@@ -0,0 +1,99 @@
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from fastrtc import (
|
||||
ReplyOnPause,
|
||||
Stream,
|
||||
get_stt_model,
|
||||
get_tts_model,
|
||||
get_twilio_turn_credentials,
|
||||
)
|
||||
from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize file paths
|
||||
curr_dir = Path(__file__).parent
|
||||
|
||||
# Initialize models
|
||||
stt_model = get_stt_model()
|
||||
tts_model = get_tts_model()
|
||||
|
||||
# Conversation state to maintain history
|
||||
conversation_state: List[Dict[str, str]] = []
|
||||
|
||||
# System prompt for agent
|
||||
system_prompt = """You are a helpful assistant that can helps with finding places to
|
||||
workremotely from. You should specifically check against reviews and ratings of the
|
||||
place. You should use this criteria to find the best place to work from:
|
||||
- Price
|
||||
- Reviews
|
||||
- Ratings
|
||||
- Location
|
||||
- WIFI
|
||||
Only return the name, address of the place, and a short description of the place.
|
||||
Always search for real places.
|
||||
Only return real places, not fake ones.
|
||||
If you receive anything other than a location, you should ask for a location.
|
||||
<example>
|
||||
User: I am in Paris, France. Can you find me a place to work from?
|
||||
Assistant: I found a place called "Le Café de la Paix" at 123 Rue de la Paix,
|
||||
Paris, France. It has good reviews and is in a great location.
|
||||
</example>
|
||||
<example>
|
||||
User: I am in London, UK. Can you find me a place to work from?
|
||||
Assistant: I found a place called "The London Coffee Company".
|
||||
</example>
|
||||
<example>
|
||||
User: How many people are in the room?
|
||||
Assistant: I only respond to requests about finding places to work from.
|
||||
</example>
|
||||
|
||||
"""
|
||||
|
||||
model = HfApiModel(provider="together", model="Qwen/Qwen2.5-Coder-32B-Instruct")
|
||||
|
||||
agent = CodeAgent(
|
||||
tools=[
|
||||
DuckDuckGoSearchTool(),
|
||||
],
|
||||
model=model,
|
||||
max_steps=10,
|
||||
verbosity_level=2,
|
||||
description="Search the web for cafes to work from.",
|
||||
)
|
||||
|
||||
|
||||
def process_response(audio):
|
||||
"""Process audio input and generate LLM response with TTS"""
|
||||
# Convert speech to text using STT model
|
||||
text = stt_model.stt(audio)
|
||||
if not text.strip():
|
||||
return
|
||||
|
||||
input_text = f"{system_prompt}\n\n{text}"
|
||||
# Get response from agent
|
||||
response_content = agent.run(input_text)
|
||||
|
||||
# Convert response to audio using TTS model
|
||||
for audio_chunk in tts_model.stream_tts_sync(response_content or ""):
|
||||
# Yield the audio chunk
|
||||
yield audio_chunk
|
||||
|
||||
|
||||
stream = Stream(
|
||||
handler=ReplyOnPause(process_response, input_sample_rate=16000),
|
||||
modality="audio",
|
||||
mode="send-receive",
|
||||
ui_args={
|
||||
"pulse_color": "rgb(255, 255, 255)",
|
||||
"icon_button_color": "rgb(255, 255, 255)",
|
||||
"title": "🧑💻The Coworking Agent",
|
||||
},
|
||||
rtc_configuration=get_twilio_turn_credentials(),
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
stream.ui.launch(server_port=7860)
|
||||
Reference in New Issue
Block a user