mirror of
https://github.com/HumanAIGC-Engineering/gradio-webrtc.git
synced 2026-02-05 18:09:23 +08:00
Fix websocket interruption (#291)
* Code * Fix * add code * interruptions * Add code * code * Add code * Add code * code
This commit is contained in:
@@ -928,6 +928,7 @@ class Stream(WebRTCConnectionMixin):
|
|||||||
json={"url": host},
|
json={"url": host},
|
||||||
headers={"Authorization": token or get_token() or ""},
|
headers={"Authorization": token or get_token() or ""},
|
||||||
)
|
)
|
||||||
|
r.raise_for_status()
|
||||||
except Exception:
|
except Exception:
|
||||||
URL = "https://fastrtc-fastphone.hf.space"
|
URL = "https://fastrtc-fastphone.hf.space"
|
||||||
r = httpx.post(
|
r = httpx.post(
|
||||||
@@ -936,6 +937,14 @@ class Stream(WebRTCConnectionMixin):
|
|||||||
headers={"Authorization": token or get_token() or ""},
|
headers={"Authorization": token or get_token() or ""},
|
||||||
)
|
)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
|
if r.status_code == 202:
|
||||||
|
print(
|
||||||
|
click.style("INFO", fg="orange")
|
||||||
|
+ ":\t You have "
|
||||||
|
+ "run out of your quota"
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
data = r.json()
|
data = r.json()
|
||||||
code = f"{data['code']}"
|
code = f"{data['code']}"
|
||||||
phone_number = data["phone"]
|
phone_number = data["phone"]
|
||||||
|
|||||||
@@ -81,17 +81,11 @@ class WebSocketHandler:
|
|||||||
self._graceful_shutdown_task: asyncio.Task | None = None
|
self._graceful_shutdown_task: asyncio.Task | None = None
|
||||||
|
|
||||||
def _clear_queue(self):
|
def _clear_queue(self):
|
||||||
old_queue = self.queue
|
|
||||||
self.queue = asyncio.Queue()
|
|
||||||
logger.debug("clearing queue")
|
|
||||||
i = 0
|
i = 0
|
||||||
while not old_queue.empty():
|
while not self.queue.empty():
|
||||||
try:
|
self.queue.get_nowait()
|
||||||
old_queue.get_nowait()
|
i += 1
|
||||||
i += 1
|
logger.debug("websocket: popped %d items from queue", i)
|
||||||
except asyncio.QueueEmpty:
|
|
||||||
break
|
|
||||||
logger.debug("popped %d items from queue", i)
|
|
||||||
|
|
||||||
def set_args(self, args: list[Any]):
|
def set_args(self, args: list[Any]):
|
||||||
self.stream_handler.set_args(args)
|
self.stream_handler.set_args(args)
|
||||||
@@ -260,8 +254,8 @@ class WebSocketHandler:
|
|||||||
async def _emit_loop(self):
|
async def _emit_loop(self):
|
||||||
try:
|
try:
|
||||||
while not self.quit.is_set():
|
while not self.quit.is_set():
|
||||||
|
wait_duration = 0.02
|
||||||
output = await self.queue.get()
|
output = await self.queue.get()
|
||||||
|
|
||||||
if output is not None:
|
if output is not None:
|
||||||
frame, output = split_output(output)
|
frame, output = split_output(output)
|
||||||
if isinstance(output, AdditionalOutputs):
|
if isinstance(output, AdditionalOutputs):
|
||||||
@@ -279,6 +273,7 @@ class WebSocketHandler:
|
|||||||
if self.stream_handler.phone_mode
|
if self.stream_handler.phone_mode
|
||||||
else self.stream_handler.output_sample_rate
|
else self.stream_handler.output_sample_rate
|
||||||
)
|
)
|
||||||
|
duration = np.atleast_2d(frame[1]).shape[1] / frame[0]
|
||||||
mulaw_audio = convert_to_mulaw(
|
mulaw_audio = convert_to_mulaw(
|
||||||
frame[1],
|
frame[1],
|
||||||
frame[0],
|
frame[0],
|
||||||
@@ -287,9 +282,6 @@ class WebSocketHandler:
|
|||||||
audio_payload = base64.b64encode(mulaw_audio).decode("utf-8")
|
audio_payload = base64.b64encode(mulaw_audio).decode("utf-8")
|
||||||
|
|
||||||
if self.websocket and self.stream_id:
|
if self.websocket and self.stream_id:
|
||||||
sample_rate, audio_array = frame[:2]
|
|
||||||
duration = len(audio_array) / sample_rate
|
|
||||||
|
|
||||||
self.playing_durations.append(duration)
|
self.playing_durations.append(duration)
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
@@ -298,9 +290,10 @@ class WebSocketHandler:
|
|||||||
}
|
}
|
||||||
if self.stream_handler.phone_mode:
|
if self.stream_handler.phone_mode:
|
||||||
payload["streamSid"] = self.stream_id
|
payload["streamSid"] = self.stream_id
|
||||||
|
# yield audio slightly faster than real-time
|
||||||
|
wait_duration = 0.75 * duration
|
||||||
await self.websocket.send_json(payload)
|
await self.websocket.send_json(payload)
|
||||||
|
await asyncio.sleep(wait_duration)
|
||||||
await asyncio.sleep(0.02)
|
|
||||||
|
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
logger.debug("Emit loop cancelled")
|
logger.debug("Emit loop cancelled")
|
||||||
|
|||||||
14
demo/qwen_phone_chat/README.md
Normal file
14
demo/qwen_phone_chat/README.md
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
---
|
||||||
|
title: Qwen Phone Chat
|
||||||
|
emoji: 📞
|
||||||
|
colorFrom: pink
|
||||||
|
colorTo: green
|
||||||
|
sdk: gradio
|
||||||
|
sdk_version: 5.25.2
|
||||||
|
app_file: app.py
|
||||||
|
pinned: false
|
||||||
|
license: mit
|
||||||
|
short_description: Talk with Qwen 2.5 Omni over the Phone
|
||||||
|
---
|
||||||
|
|
||||||
|
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
||||||
217
demo/qwen_phone_chat/app.py
Normal file
217
demo/qwen_phone_chat/app.py
Normal file
@@ -0,0 +1,217 @@
|
|||||||
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import secrets
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import gradio as gr
|
||||||
|
import numpy as np
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from fastapi import FastAPI, Request
|
||||||
|
from fastapi.responses import HTMLResponse
|
||||||
|
from fastrtc import (
|
||||||
|
AdditionalOutputs,
|
||||||
|
AsyncStreamHandler,
|
||||||
|
Stream,
|
||||||
|
get_cloudflare_turn_credentials_async,
|
||||||
|
wait_for_item,
|
||||||
|
)
|
||||||
|
from websockets.asyncio.client import connect
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
cur_dir = Path(__file__).parent
|
||||||
|
|
||||||
|
API_KEY = os.getenv("MODELSCOPE_API_KEY", "")
|
||||||
|
API_URL = "wss://dashscope.aliyuncs.com/api-ws/v1/realtime?model=qwen-omni-turbo-realtime-2025-03-26"
|
||||||
|
VOICES = ["Chelsie", "Serena", "Ethan", "Cherry"]
|
||||||
|
headers = {"Authorization": "Bearer " + API_KEY}
|
||||||
|
|
||||||
|
|
||||||
|
class QwenOmniHandler(AsyncStreamHandler):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
) -> None:
|
||||||
|
super().__init__(
|
||||||
|
expected_layout="mono",
|
||||||
|
output_sample_rate=24_000,
|
||||||
|
input_sample_rate=16_000,
|
||||||
|
)
|
||||||
|
self.connection = None
|
||||||
|
self.output_queue = asyncio.Queue()
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
return QwenOmniHandler()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def msg_id() -> str:
|
||||||
|
return f"event_{secrets.token_hex(10)}"
|
||||||
|
|
||||||
|
async def start_up(
|
||||||
|
self,
|
||||||
|
):
|
||||||
|
"""Connect to realtime API. Run forever in separate thread to keep connection open."""
|
||||||
|
voice_id = "Serena"
|
||||||
|
print("voice_id", voice_id)
|
||||||
|
async with connect(
|
||||||
|
API_URL,
|
||||||
|
additional_headers=headers,
|
||||||
|
) as conn:
|
||||||
|
self.client = conn
|
||||||
|
await conn.send(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"event_id": self.msg_id(),
|
||||||
|
"type": "session.update",
|
||||||
|
"session": {
|
||||||
|
"modalities": [
|
||||||
|
"text",
|
||||||
|
"audio",
|
||||||
|
],
|
||||||
|
"voice": voice_id,
|
||||||
|
"input_audio_format": "pcm16",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.connection = conn
|
||||||
|
try:
|
||||||
|
async for data in self.connection:
|
||||||
|
event = json.loads(data)
|
||||||
|
print("event", event["type"])
|
||||||
|
if "type" not in event:
|
||||||
|
continue
|
||||||
|
# Handle interruptions
|
||||||
|
if event["type"] == "input_audio_buffer.speech_started":
|
||||||
|
self.clear_queue()
|
||||||
|
if event["type"] == "response.audio.delta":
|
||||||
|
print("putting output")
|
||||||
|
await self.output_queue.put(
|
||||||
|
(
|
||||||
|
self.output_sample_rate,
|
||||||
|
np.frombuffer(
|
||||||
|
base64.b64decode(event["delta"]), dtype=np.int16
|
||||||
|
).reshape(1, -1),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print("error", e)
|
||||||
|
|
||||||
|
async def receive(self, frame: tuple[int, np.ndarray]) -> None:
|
||||||
|
if not self.connection:
|
||||||
|
return
|
||||||
|
_, array = frame
|
||||||
|
array = array.squeeze()
|
||||||
|
audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
|
||||||
|
try:
|
||||||
|
await self.connection.send(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"event_id": self.msg_id(),
|
||||||
|
"type": "input_audio_buffer.append",
|
||||||
|
"audio": audio_message,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print("error", e)
|
||||||
|
|
||||||
|
async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
|
||||||
|
return await wait_for_item(self.output_queue)
|
||||||
|
|
||||||
|
async def shutdown(self) -> None:
|
||||||
|
if self.connection:
|
||||||
|
await self.connection.close()
|
||||||
|
self.connection = None
|
||||||
|
|
||||||
|
|
||||||
|
voice = gr.Dropdown(choices=VOICES, value=VOICES[0], type="value", label="Voice")
|
||||||
|
stream = Stream(
|
||||||
|
QwenOmniHandler(),
|
||||||
|
mode="send-receive",
|
||||||
|
modality="audio",
|
||||||
|
additional_inputs=[voice],
|
||||||
|
additional_outputs=None,
|
||||||
|
rtc_configuration=get_cloudflare_turn_credentials_async,
|
||||||
|
concurrency_limit=20,
|
||||||
|
time_limit=180,
|
||||||
|
)
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/telephone/incoming")
|
||||||
|
async def handle_incoming_call(request: Request):
|
||||||
|
"""
|
||||||
|
Handle incoming telephone calls (e.g., via Twilio).
|
||||||
|
|
||||||
|
Generates TwiML instructions to connect the incoming call to the
|
||||||
|
WebSocket handler (`/telephone/handler`) for audio streaming.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request: The FastAPI Request object for the incoming call webhook.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
An HTMLResponse containing the TwiML instructions as XML.
|
||||||
|
"""
|
||||||
|
from twilio.twiml.voice_response import Connect, VoiceResponse
|
||||||
|
|
||||||
|
if len(stream.connections) > (stream.concurrency_limit or 20):
|
||||||
|
response = VoiceResponse()
|
||||||
|
response.say("Qwen is busy please try again later!")
|
||||||
|
return HTMLResponse(content=str(response), media_type="application/xml")
|
||||||
|
|
||||||
|
response = VoiceResponse()
|
||||||
|
response.say("Connecting to Qwen")
|
||||||
|
connect = Connect()
|
||||||
|
print("request.url.hostname", request.url.hostname)
|
||||||
|
connect.stream(url=f"wss://{request.url.hostname}/telephone/handler")
|
||||||
|
response.append(connect)
|
||||||
|
response.say("The call has been disconnected.")
|
||||||
|
return HTMLResponse(content=str(response), media_type="application/xml")
|
||||||
|
|
||||||
|
|
||||||
|
stream.mount(app)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
async def _():
|
||||||
|
html_content = """
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Qwen Phone Chat</title>
|
||||||
|
<style>
|
||||||
|
body {
|
||||||
|
font-family: Arial, sans-serif;
|
||||||
|
max-width: 800px;
|
||||||
|
margin: 0 auto;
|
||||||
|
padding: 20px;
|
||||||
|
line-height: 1.6;
|
||||||
|
}
|
||||||
|
pre {
|
||||||
|
background-color: #f5f5f5;
|
||||||
|
padding: 15px;
|
||||||
|
border-radius: 5px;
|
||||||
|
overflow-x: auto;
|
||||||
|
}
|
||||||
|
h1 {
|
||||||
|
color: #333;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Qwen Phone Chat</h1>
|
||||||
|
<p>Call +1 (877) 853-7936</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
return HTMLResponse(content=html_content)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# stream.fastphone(host="0.0.0.0", port=7860)
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
uvicorn.run(app, host="0.0.0.0", port=7860)
|
||||||
2
demo/qwen_phone_chat/requirements.txt
Normal file
2
demo/qwen_phone_chat/requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
fastrtc
|
||||||
|
websockets>=14.0
|
||||||
@@ -17,7 +17,6 @@ from fastrtc import (
|
|||||||
wait_for_item,
|
wait_for_item,
|
||||||
)
|
)
|
||||||
from gradio.utils import get_space
|
from gradio.utils import get_space
|
||||||
from openai.types.beta.realtime import ResponseAudioTranscriptDoneEvent
|
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
@@ -103,8 +102,8 @@ class OpenAIHandler(AsyncStreamHandler):
|
|||||||
self.connection = None
|
self.connection = None
|
||||||
|
|
||||||
|
|
||||||
def update_chatbot(chatbot: list[dict], response: ResponseAudioTranscriptDoneEvent):
|
def update_chatbot(chatbot: list[dict], response: dict):
|
||||||
chatbot.append({"role": "assistant", "content": response.transcript})
|
chatbot.append(response)
|
||||||
return chatbot
|
return chatbot
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -76,7 +76,7 @@ def response(
|
|||||||
)
|
)
|
||||||
for chunk in aggregate_bytes_to_16bit(iterator):
|
for chunk in aggregate_bytes_to_16bit(iterator):
|
||||||
audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
|
audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
|
||||||
yield (24000, audio_array, "mono")
|
yield (24000, audio_array)
|
||||||
|
|
||||||
|
|
||||||
chatbot = gr.Chatbot(type="messages")
|
chatbot = gr.Chatbot(type="messages")
|
||||||
|
|||||||
32
justfile
32
justfile
@@ -42,6 +42,38 @@ publish:
|
|||||||
print(f"Uploading {latest_wheel}")
|
print(f"Uploading {latest_wheel}")
|
||||||
os.system(f"twine upload {latest_wheel}")
|
os.system(f"twine upload {latest_wheel}")
|
||||||
|
|
||||||
|
# Upload the latest wheel to HF space with a random ID
|
||||||
|
publish-dev:
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
import uuid
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
# Find all wheel files in dist directory
|
||||||
|
wheels = glob.glob('dist/*.whl')
|
||||||
|
if not wheels:
|
||||||
|
print("No wheel files found in dist directory")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
# Sort by creation time to get the latest
|
||||||
|
latest_wheel = max(wheels, key=os.path.getctime)
|
||||||
|
wheel_name = os.path.basename(latest_wheel)
|
||||||
|
|
||||||
|
# Generate random ID
|
||||||
|
random_id = str(uuid.uuid4())[:8]
|
||||||
|
|
||||||
|
# Define the HF path
|
||||||
|
hf_space = "freddyaboulton/bucket"
|
||||||
|
hf_path = f"wheels/fastrtc/{random_id}/"
|
||||||
|
|
||||||
|
# Upload to Hugging Face space
|
||||||
|
cmd = f"huggingface-cli upload {hf_space} {latest_wheel} {hf_path}{wheel_name} --repo-type dataset"
|
||||||
|
subprocess.run(cmd, shell=True, check=True)
|
||||||
|
|
||||||
|
# Print the URL
|
||||||
|
print(f"Wheel uploaded successfully!")
|
||||||
|
print(f"URL: https://huggingface.co/datasets/{hf_space}/resolve/main/{hf_path}{wheel_name}")
|
||||||
|
|
||||||
# Build the package
|
# Build the package
|
||||||
build:
|
build:
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ build-backend = "hatchling.build"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "fastrtc"
|
name = "fastrtc"
|
||||||
version = "0.0.22.rc2"
|
version = "0.0.22.rc5"
|
||||||
description = "The realtime communication library for Python"
|
description = "The realtime communication library for Python"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
|||||||
Reference in New Issue
Block a user