mirror of
https://github.com/HumanAIGC-Engineering/gradio-webrtc.git
synced 2026-02-05 18:09:23 +08:00
Fix TURN credentials for interactive video + other Gemini Audio Video demo tweaks (#297)
* Gemini * Add code * demo tweaks
This commit is contained in:
@@ -4,12 +4,12 @@ emoji: ♊️
|
||||
colorFrom: purple
|
||||
colorTo: red
|
||||
sdk: gradio
|
||||
sdk_version: 5.16.0
|
||||
sdk_version: 5.25.2
|
||||
app_file: app.py
|
||||
pinned: false
|
||||
license: mit
|
||||
short_description: Gemini understands audio and video!
|
||||
tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GEMINI_API_KEY]
|
||||
tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GEMINI_API_KEY]
|
||||
---
|
||||
|
||||
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
||||
@@ -6,12 +6,14 @@ from io import BytesIO
|
||||
|
||||
import gradio as gr
|
||||
import numpy as np
|
||||
import websockets
|
||||
from dotenv import load_dotenv
|
||||
from fastrtc import (
|
||||
AsyncAudioVideoStreamHandler,
|
||||
Stream,
|
||||
WebRTC,
|
||||
get_twilio_turn_credentials,
|
||||
get_cloudflare_turn_credentials_async,
|
||||
wait_for_item,
|
||||
)
|
||||
from google import genai
|
||||
from gradio.utils import get_space
|
||||
@@ -61,18 +63,24 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
|
||||
)
|
||||
config = {"response_modalities": ["AUDIO"]}
|
||||
async with client.aio.live.connect(
|
||||
model="gemini-2.0-flash-exp", config=config
|
||||
model="gemini-2.0-flash-exp",
|
||||
config=config, # type: ignore
|
||||
) as session:
|
||||
self.session = session
|
||||
print("set session")
|
||||
while not self.quit.is_set():
|
||||
turn = self.session.receive()
|
||||
async for response in turn:
|
||||
if data := response.data:
|
||||
audio = np.frombuffer(data, dtype=np.int16).reshape(1, -1)
|
||||
try:
|
||||
async for response in turn:
|
||||
if data := response.data:
|
||||
audio = np.frombuffer(data, dtype=np.int16).reshape(1, -1)
|
||||
self.audio_queue.put_nowait(audio)
|
||||
except websockets.exceptions.ConnectionClosedOK:
|
||||
print("connection closed")
|
||||
break
|
||||
|
||||
async def video_receive(self, frame: np.ndarray):
|
||||
self.video_queue.put_nowait(frame)
|
||||
|
||||
if self.session:
|
||||
# send image every 1 second
|
||||
print(time.time() - self.last_frame_time)
|
||||
@@ -82,10 +90,12 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
|
||||
if self.latest_args[1] is not None:
|
||||
await self.session.send(input=encode_image(self.latest_args[1]))
|
||||
|
||||
self.video_queue.put_nowait(frame)
|
||||
|
||||
async def video_emit(self):
|
||||
return await self.video_queue.get()
|
||||
frame = await wait_for_item(self.video_queue, 0.01)
|
||||
if frame is not None:
|
||||
return frame
|
||||
else:
|
||||
return np.zeros((100, 100, 3), dtype=np.uint8)
|
||||
|
||||
async def receive(self, frame: tuple[int, np.ndarray]) -> None:
|
||||
_, array = frame
|
||||
@@ -95,13 +105,15 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
|
||||
await self.session.send(input=audio_message)
|
||||
|
||||
async def emit(self):
|
||||
array = await self.audio_queue.get()
|
||||
return (self.output_sample_rate, array)
|
||||
array = await wait_for_item(self.audio_queue, 0.01)
|
||||
if array is not None:
|
||||
return (self.output_sample_rate, array)
|
||||
return array
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
if self.session:
|
||||
self.quit.set()
|
||||
await self.session._websocket.close()
|
||||
await self.session.close()
|
||||
self.quit.clear()
|
||||
|
||||
|
||||
@@ -109,10 +121,8 @@ stream = Stream(
|
||||
handler=GeminiHandler(),
|
||||
modality="audio-video",
|
||||
mode="send-receive",
|
||||
rtc_configuration=get_twilio_turn_credentials()
|
||||
if get_space() == "spaces"
|
||||
else None,
|
||||
time_limit=90 if get_space() else None,
|
||||
rtc_configuration=get_cloudflare_turn_credentials_async,
|
||||
time_limit=180 if get_space() else None,
|
||||
additional_inputs=[
|
||||
gr.Image(label="Image", type="numpy", sources=["upload", "clipboard"])
|
||||
],
|
||||
@@ -151,9 +161,7 @@ with gr.Blocks(css=css) as demo:
|
||||
modality="audio-video",
|
||||
mode="send-receive",
|
||||
elem_id="video-source",
|
||||
rtc_configuration=get_twilio_turn_credentials()
|
||||
if get_space() == "spaces"
|
||||
else None,
|
||||
rtc_configuration=get_cloudflare_turn_credentials_async,
|
||||
icon="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png",
|
||||
pulse_color="rgb(255, 255, 255)",
|
||||
icon_button_color="rgb(255, 255, 255)",
|
||||
@@ -167,7 +175,7 @@ with gr.Blocks(css=css) as demo:
|
||||
GeminiHandler(),
|
||||
inputs=[webrtc, image_input],
|
||||
outputs=[webrtc],
|
||||
time_limit=60 if get_space() else None,
|
||||
time_limit=180 if get_space() else None,
|
||||
concurrency_limit=2 if get_space() else None,
|
||||
)
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
fastrtc
|
||||
fastrtc==0.0.23.rc1
|
||||
python-dotenv
|
||||
google-genai
|
||||
twilio
|
||||
|
||||
Reference in New Issue
Block a user