Files
gradio-webrtc/demo/phonic_chat/app.py
Freddy Boulton 853d6a06b5 Rebrand to FastRTC (#60)
* Add code

* add code

* add code

* Rename messages

* rename

* add code

* Add demo

* docs + demos + bug fixes

* add code

* styles

* user guide

* Styles

* Add code

* misc docs updates

* print nit

* whisper + pr

* url for images

* whsiper update

* Fix bugs

* remove demo files

* version number

* Fix pypi readme

* Fix

* demos

* Add llama code editor

* Update llama code editor and object detection cookbook

* Add more cookbook demos

* add code

* Fix links for PR deploys

* add code

* Fix the install

* add tts

* TTS docs

* Typo

* Pending bubbles for reply on pause

* Stream redesign (#63)

* better error handling

* Websocket error handling

* add code

---------

Co-authored-by: Freddy Boulton <freddyboulton@hf-freddy.local>

* remove docs from dist

* Some docs typos

* more typos

* upload changes + docs

* docs

* better phone

* update docs

* add code

* Make demos better

* fix docs + websocket start_up

* remove mention of FastAPI app

* fastphone tweaks

* add code

* ReplyOnStopWord fixes

* Fix cookbook

* Fix pypi readme

* add code

* bump versions

* sambanova cookbook

* Fix tags

* Llm voice chat

* kyutai tag

* Add error message to all index.html

* STT module uses Moonshine

* Not required from typing extensions

* fix llm voice chat

* Add vpn warning

* demo fixes

* demos

* Add more ui args and gemini audio-video

* update cookbook

* version 9

---------

Co-authored-by: Freddy Boulton <freddyboulton@hf-freddy.local>
2025-02-24 01:13:42 -05:00

135 lines
4.3 KiB
Python

import subprocess
subprocess.run(["pip", "install", "fastrtc==0.0.3.post7"])
import asyncio
import base64
import os
import gradio as gr
from gradio.utils import get_space
import numpy as np
from dotenv import load_dotenv
from fastrtc import (
AdditionalOutputs,
AsyncStreamHandler,
Stream,
get_twilio_turn_credentials,
WebRTCError,
audio_to_float32,
)
from fastapi import FastAPI
from phonic.client import PhonicSTSClient, get_voices
load_dotenv()
STS_URI = "wss://api.phonic.co/v1/sts/ws"
API_KEY = os.environ["PHONIC_API_KEY"]
SAMPLE_RATE = 44_100
voices = get_voices(API_KEY)
voice_ids = [voice["id"] for voice in voices]
class PhonicHandler(AsyncStreamHandler):
def __init__(self):
super().__init__(input_sample_rate=SAMPLE_RATE, output_sample_rate=SAMPLE_RATE)
self.output_queue = asyncio.Queue()
self.client = None
def copy(self) -> AsyncStreamHandler:
return PhonicHandler()
async def start_up(self):
await self.wait_for_args()
voice_id = self.latest_args[1]
try:
async with PhonicSTSClient(STS_URI, API_KEY) as client:
self.client = client
sts_stream = client.sts( # type: ignore
input_format="pcm_44100",
output_format="pcm_44100",
system_prompt="You are a helpful voice assistant. Respond conversationally.",
# welcome_message="Hello! I'm your voice assistant. How can I help you today?",
voice_id=voice_id,
)
async for message in sts_stream:
message_type = message.get("type")
if message_type == "audio_chunk":
audio_b64 = message["audio"]
audio_bytes = base64.b64decode(audio_b64)
await self.output_queue.put(
(SAMPLE_RATE, np.frombuffer(audio_bytes, dtype=np.int16))
)
if text := message.get("text"):
msg = {"role": "assistant", "content": text}
await self.output_queue.put(AdditionalOutputs(msg))
elif message_type == "input_text":
msg = {"role": "user", "content": message["text"]}
await self.output_queue.put(AdditionalOutputs(msg))
except Exception as e:
raise WebRTCError(f"Error starting up: {e}")
async def emit(self):
try:
return await self.output_queue.get()
except Exception as e:
raise WebRTCError(f"Error emitting: {e}")
async def receive(self, frame: tuple[int, np.ndarray]) -> None:
try:
if not self.client:
return
audio_float32 = audio_to_float32(frame)
await self.client.send_audio(audio_float32) # type: ignore
except Exception as e:
raise WebRTCError(f"Error sending audio: {e}")
async def shutdown(self):
if self.client:
await self.client._websocket.close()
return super().shutdown()
def add_to_chatbot(state, chatbot, message):
state.append(message)
return state, gr.skip()
state = gr.State(value=[])
chatbot = gr.Chatbot(type="messages", value=[])
stream = Stream(
handler=PhonicHandler(),
mode="send-receive",
modality="audio",
additional_inputs=[
gr.Dropdown(
choices=voice_ids,
value="katherine",
label="Voice",
info="Select a voice from the dropdown",
)
],
additional_outputs=[state, chatbot],
additional_outputs_handler=add_to_chatbot,
ui_args={
"title": "Phonic Chat (Powered by FastRTC ⚡️)",
},
rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
concurrency_limit=5 if get_space() else None,
time_limit=90 if get_space() else None,
)
with stream.ui:
state.change(lambda s: s, inputs=state, outputs=chatbot)
app = FastAPI()
stream.mount(app)
if __name__ == "__main__":
if (mode := os.getenv("MODE")) == "UI":
stream.ui.launch(server_port=7860)
elif mode == "PHONE":
stream.fastphone(host="0.0.0.0", port=7860)
else:
stream.ui.launch(server_port=7860)