[feat] update some feature

sync code of fastrtc, add text support through datachannel, fix safari connect problem support chat without camera or mic
2026-02-05 18:09:23 +08:00 · 2025-03-25 18:05:10 +08:00
parent e1fb40a8a8
commit aefb08150f
222 changed files with 28698 additions and 5889 deletions
--- a/demo/phonic_chat/README.md
+++ b/demo/phonic_chat/README.md
@@ -0,0 +1,16 @@
+---
+title: Phonic AI Chat
+emoji: 🎙️
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Talk to Phonic AI's speech-to-speech model
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|PHONIC_API_KEY]
+python_version: 3.11
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
--- a/demo/phonic_chat/app.py
+++ b/demo/phonic_chat/app.py
@@ -0,0 +1,116 @@
+import asyncio
+import base64
+import os
+
+import gradio as gr
+from gradio.utils import get_space
+import numpy as np
+from dotenv import load_dotenv
+from fastrtc import (
+    AdditionalOutputs,
+    AsyncStreamHandler,
+    Stream,
+    get_twilio_turn_credentials,
+    audio_to_float32,
+    wait_for_item,
+)
+from phonic.client import PhonicSTSClient, get_voices
+
+load_dotenv()
+
+STS_URI = "wss://api.phonic.co/v1/sts/ws"
+API_KEY = os.environ["PHONIC_API_KEY"]
+SAMPLE_RATE = 44_100
+voices = get_voices(API_KEY)
+voice_ids = [voice["id"] for voice in voices]
+
+
+class PhonicHandler(AsyncStreamHandler):
+    def __init__(self):
+        super().__init__(input_sample_rate=SAMPLE_RATE, output_sample_rate=SAMPLE_RATE)
+        self.output_queue = asyncio.Queue()
+        self.client = None
+
+    def copy(self) -> AsyncStreamHandler:
+        return PhonicHandler()
+
+    async def start_up(self):
+        await self.wait_for_args()
+        voice_id = self.latest_args[1]
+        async with PhonicSTSClient(STS_URI, API_KEY) as client:
+            self.client = client
+            sts_stream = client.sts(  # type: ignore
+                input_format="pcm_44100",
+                output_format="pcm_44100",
+                system_prompt="You are a helpful voice assistant. Respond conversationally.",
+                # welcome_message="Hello! I'm your voice assistant. How can I help you today?",
+                voice_id=voice_id,
+            )
+            async for message in sts_stream:
+                message_type = message.get("type")
+                if message_type == "audio_chunk":
+                    audio_b64 = message["audio"]
+                    audio_bytes = base64.b64decode(audio_b64)
+                    await self.output_queue.put(
+                        (SAMPLE_RATE, np.frombuffer(audio_bytes, dtype=np.int16))
+                    )
+                    if text := message.get("text"):
+                        msg = {"role": "assistant", "content": text}
+                        await self.output_queue.put(AdditionalOutputs(msg))
+                elif message_type == "input_text":
+                    msg = {"role": "user", "content": message["text"]}
+                    await self.output_queue.put(AdditionalOutputs(msg))
+
+    async def emit(self):
+        return await wait_for_item(self.output_queue)
+
+    async def receive(self, frame: tuple[int, np.ndarray]) -> None:
+        if not self.client:
+            return
+        audio_float32 = audio_to_float32(frame)
+        await self.client.send_audio(audio_float32)  # type: ignore
+
+    async def shutdown(self):
+        if self.client:
+            await self.client._websocket.close()
+        return super().shutdown()
+
+
+def add_to_chatbot(chatbot, message):
+    chatbot.append(message)
+    return chatbot
+
+
+chatbot = gr.Chatbot(type="messages", value=[])
+stream = Stream(
+    handler=PhonicHandler(),
+    mode="send-receive",
+    modality="audio",
+    additional_inputs=[
+        gr.Dropdown(
+            choices=voice_ids,
+            value="victoria",
+            label="Voice",
+            info="Select a voice from the dropdown",
+        )
+    ],
+    additional_outputs=[chatbot],
+    additional_outputs_handler=add_to_chatbot,
+    ui_args={
+        "title": "Phonic Chat (Powered by FastRTC ⚡️)",
+    },
+    rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
+    concurrency_limit=5 if get_space() else None,
+    time_limit=90 if get_space() else None,
+)
+
+# with stream.ui:
+#     state.change(lambda s: s, inputs=state, outputs=chatbot)
+
+if __name__ == "__main__":
+    if (mode := os.getenv("MODE")) == "UI":
+        stream.ui.launch(server_port=7860)
+    elif mode == "PHONE":
+        stream.fastphone(host="0.0.0.0", port=7860)
+    else:
+        stream.ui.launch(server_port=7860)
--- a/demo/phonic_chat/requirements.txt
+++ b/demo/phonic_chat/requirements.txt
@@ -0,0 +1,74 @@
+# This file was autogenerated by uv via the following command:
+#    uv pip compile requirements.in -o requirements.txt
+aiohappyeyeballs==2.4.6
+    # via aiohttp
+aiohttp==3.11.12
+    # via
+    #   aiohttp-retry
+    #   twilio
+aiohttp-retry==2.9.1
+    # via twilio
+aiosignal==1.3.2
+    # via aiohttp
+attrs==25.1.0
+    # via aiohttp
+certifi==2025.1.31
+    # via requests
+cffi==1.17.1
+    # via sounddevice
+charset-normalizer==3.4.1
+    # via requests
+fastrtc==0.0.1
+    # via -r requirements.in
+frozenlist==1.5.0
+    # via
+    #   aiohttp
+    #   aiosignal
+idna==3.10
+    # via
+    #   requests
+    #   yarl
+isort==6.0.0
+    # via phonic-python
+loguru==0.7.3
+    # via phonic-python
+multidict==6.1.0
+    # via
+    #   aiohttp
+    #   yarl
+numpy==2.2.3
+    # via
+    #   phonic-python
+    #   scipy
+phonic-python==0.1.3
+    # via -r requirements.in
+propcache==0.3.0
+    # via
+    #   aiohttp
+    #   yarl
+pycparser==2.22
+    # via cffi
+pyjwt==2.10.1
+    # via twilio
+python-dotenv==1.0.1
+    # via
+    #   -r requirements.in
+    #   phonic-python
+requests==2.32.3
+    # via
+    #   phonic-python
+    #   twilio
+scipy==1.15.2
+    # via phonic-python
+sounddevice==0.5.1
+    # via phonic-python
+twilio==9.4.6
+    # via -r requirements.in
+typing-extensions==4.12.2
+    # via phonic-python
+urllib3==2.3.0
+    # via requests
+websockets==15.0
+    # via phonic-python
+yarl==1.18.3
+    # via aiohttp