mirror of
https://github.com/HumanAIGC-Engineering/gradio-webrtc.git
synced 2026-02-05 18:09:23 +08:00
t :# 请为您的变更输入提交说明。以 '#' 开始的行将被忽略,而一个空的提交
This commit is contained in:
53
demo/stream_whisper.py
Normal file
53
demo/stream_whisper.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import tempfile
|
||||
|
||||
import gradio as gr
|
||||
import numpy as np
|
||||
from gradio_webrtc import AdditionalOutputs, ReplyOnPause, WebRTC
|
||||
from openai import OpenAI
|
||||
from pydub import AudioSegment
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
client = OpenAI()
|
||||
|
||||
|
||||
def transcribe(audio: tuple[int, np.ndarray], transcript: list[dict]):
|
||||
print("audio", audio)
|
||||
segment = AudioSegment(
|
||||
audio[1].tobytes(),
|
||||
frame_rate=audio[0],
|
||||
sample_width=audio[1].dtype.itemsize,
|
||||
channels=1,
|
||||
)
|
||||
|
||||
transcript.append({"role": "user", "content": gr.Audio((audio[0], audio[1].squeeze()))})
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp3") as temp_audio:
|
||||
segment.export(temp_audio.name, format="mp3")
|
||||
next_chunk = client.audio.transcriptions.create(
|
||||
model="whisper-1", file=open(temp_audio.name, "rb")
|
||||
).text
|
||||
transcript.append({"role": "assistant", "content": next_chunk})
|
||||
yield AdditionalOutputs(transcript)
|
||||
|
||||
|
||||
with gr.Blocks() as demo:
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
audio = WebRTC(
|
||||
label="Stream",
|
||||
mode="send",
|
||||
modality="audio",
|
||||
)
|
||||
with gr.Column():
|
||||
transcript = gr.Chatbot(label="transcript", type="messages")
|
||||
|
||||
audio.stream(ReplyOnPause(transcribe), inputs=[audio, transcript], outputs=[audio],
|
||||
time_limit=30)
|
||||
audio.on_additional_outputs(lambda s: s, outputs=transcript)
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.launch()
|
||||
Reference in New Issue
Block a user