mirror of
https://github.com/HumanAIGC-Engineering/gradio-webrtc.git
synced 2026-02-05 01:49:23 +08:00
Additional outputs tweaks + fix track constraints (#28)
* code * add code * add code
This commit is contained in:
@@ -1,37 +1,21 @@
|
||||
import logging
|
||||
import tempfile
|
||||
|
||||
import gradio as gr
|
||||
import numpy as np
|
||||
from dotenv import load_dotenv
|
||||
from gradio_webrtc import AdditionalOutputs, ReplyOnPause, WebRTC
|
||||
from openai import OpenAI
|
||||
from pydub import AudioSegment
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
# Configure the root logger to WARNING to suppress debug messages from other libraries
|
||||
logging.basicConfig(level=logging.WARNING)
|
||||
|
||||
# Create a console handler
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setLevel(logging.DEBUG)
|
||||
|
||||
# Create a formatter
|
||||
formatter = logging.Formatter("%(name)s - %(levelname)s - %(message)s")
|
||||
console_handler.setFormatter(formatter)
|
||||
|
||||
# Configure the logger for your specific library
|
||||
logger = logging.getLogger("gradio_webrtc")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
|
||||
client = OpenAI()
|
||||
|
||||
|
||||
def transcribe(audio: tuple[int, np.ndarray], transcript: list[dict]):
|
||||
print("audio", audio)
|
||||
segment = AudioSegment(
|
||||
audio[1].tobytes(),
|
||||
frame_rate=audio[0],
|
||||
@@ -39,12 +23,14 @@ def transcribe(audio: tuple[int, np.ndarray], transcript: list[dict]):
|
||||
channels=1,
|
||||
)
|
||||
|
||||
transcript.append({"role": "user", "content": gr.Audio((audio[0], audio[1].squeeze()))})
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp3") as temp_audio:
|
||||
segment.export(temp_audio.name, format="mp3")
|
||||
next_chunk = client.audio.transcriptions.create(
|
||||
model="whisper-1", file=open(temp_audio.name, "rb")
|
||||
).text
|
||||
transcript.append({"role": "user", "content": next_chunk})
|
||||
transcript.append({"role": "assistant", "content": next_chunk})
|
||||
yield AdditionalOutputs(transcript)
|
||||
|
||||
|
||||
|
||||
@@ -49,17 +49,14 @@ else:
|
||||
|
||||
|
||||
def detection(frame, conf_threshold=0.3):
|
||||
print("frame.shape", frame.shape)
|
||||
frame = cv2.flip(frame, 0)
|
||||
global count
|
||||
if random.random() > 0.98:
|
||||
return AdditionalOutputs(count)
|
||||
count += 1
|
||||
return AdditionalOutputs(1)
|
||||
|
||||
|
||||
css = """.my-group {max-width: 600px !important; max-height: 600 !important;}
|
||||
.my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""
|
||||
|
||||
|
||||
with gr.Blocks(css=css) as demo:
|
||||
gr.HTML(
|
||||
"""
|
||||
@@ -78,7 +75,13 @@ with gr.Blocks(css=css) as demo:
|
||||
with gr.Column(elem_classes=["my-column"]):
|
||||
with gr.Group(elem_classes=["my-group"]):
|
||||
image = WebRTC(
|
||||
label="Stream", rtc_configuration=rtc_configuration, mode="send"
|
||||
label="Stream", rtc_configuration=rtc_configuration,
|
||||
mode="send",
|
||||
track_constraints={"width": {"exact": 800},
|
||||
"height": {"exact": 600},
|
||||
"aspectRatio": {"exact": 1.33333}
|
||||
},
|
||||
rtp_params={"degradationPreference": "maintain-resolution"}
|
||||
)
|
||||
conf_threshold = gr.Slider(
|
||||
label="Confidence Threshold",
|
||||
@@ -92,6 +95,6 @@ with gr.Blocks(css=css) as demo:
|
||||
image.stream(
|
||||
fn=detection, inputs=[image, conf_threshold], outputs=[image], time_limit=10
|
||||
)
|
||||
image.change(lambda n: n, outputs=[number])
|
||||
image.on_additional_outputs(lambda n: n, outputs=number)
|
||||
|
||||
demo.launch()
|
||||
|
||||
Reference in New Issue
Block a user