mirror of
https://github.com/HumanAIGC-Engineering/gradio-webrtc.git
synced 2026-02-04 17:39:23 +08:00
Moonshine live (#86)
* add code * moonshine --------- Co-authored-by: Freddy Boulton <freddyboulton@hf-freddy.local>
This commit is contained in:
16
demo/moonshine_live/README.md
Normal file
16
demo/moonshine_live/README.md
Normal file
@@ -0,0 +1,16 @@
|
||||
---
|
||||
title: Moonshine Live Transcription
|
||||
emoji: 🌕
|
||||
colorFrom: purple
|
||||
colorTo: red
|
||||
sdk: gradio
|
||||
sdk_version: 5.17.0
|
||||
app_file: app.py
|
||||
pinned: false
|
||||
license: mit
|
||||
short_description: Real-time captions with Moonshine ONNX
|
||||
tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN]
|
||||
models: [onnx-community/moonshine-base-ONNX, UsefulSensors/moonshine-base]
|
||||
---
|
||||
|
||||
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
||||
64
demo/moonshine_live/app.py
Normal file
64
demo/moonshine_live/app.py
Normal file
@@ -0,0 +1,64 @@
|
||||
from fastrtc import (
|
||||
Stream,
|
||||
AdditionalOutputs,
|
||||
audio_to_float32,
|
||||
ReplyOnPause,
|
||||
get_twilio_turn_credentials,
|
||||
)
|
||||
from functools import lru_cache
|
||||
import gradio as gr
|
||||
from typing import Generator, Literal
|
||||
from numpy.typing import NDArray
|
||||
import numpy as np
|
||||
from moonshine_onnx import MoonshineOnnxModel, load_tokenizer
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def load_moonshine(
|
||||
model_name: Literal["moonshine/base", "moonshine/tiny"],
|
||||
) -> MoonshineOnnxModel:
|
||||
return MoonshineOnnxModel(model_name=model_name)
|
||||
|
||||
|
||||
tokenizer = load_tokenizer()
|
||||
|
||||
|
||||
def stt(
|
||||
audio: tuple[int, NDArray[np.int16 | np.float32]],
|
||||
model_name: Literal["moonshine/base", "moonshine/tiny"],
|
||||
) -> Generator[AdditionalOutputs, None, None]:
|
||||
moonshine = load_moonshine(model_name)
|
||||
sr, audio_np = audio # type: ignore
|
||||
if audio_np.dtype == np.int16:
|
||||
audio_np = audio_to_float32(audio)
|
||||
if audio_np.ndim == 1:
|
||||
audio_np = audio_np.reshape(1, -1)
|
||||
tokens = moonshine.generate(audio_np)
|
||||
yield AdditionalOutputs(tokenizer.decode_batch(tokens)[0])
|
||||
|
||||
|
||||
stream = Stream(
|
||||
ReplyOnPause(stt, input_sample_rate=16000),
|
||||
modality="audio",
|
||||
mode="send",
|
||||
ui_args={
|
||||
"title": "Live Captions by Moonshine",
|
||||
"icon": "default-favicon.ico",
|
||||
"icon_button_color": "#5c5c5c",
|
||||
"pulse_color": "#a7c6fc",
|
||||
"icon_radius": 0,
|
||||
},
|
||||
rtc_configuration=get_twilio_turn_credentials(),
|
||||
additional_inputs=[
|
||||
gr.Radio(
|
||||
choices=["moonshine/base", "moonshine/tiny"],
|
||||
value="moonshine/base",
|
||||
label="Model",
|
||||
)
|
||||
],
|
||||
additional_outputs=[gr.Textbox(label="Captions")],
|
||||
additional_outputs_handler=lambda prev, current: (prev + "\n" + current).strip(),
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
stream.ui.launch()
|
||||
BIN
demo/moonshine_live/default-favicon.ico
Normal file
BIN
demo/moonshine_live/default-favicon.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 6.4 KiB |
3
demo/moonshine_live/requirements.txt
Normal file
3
demo/moonshine_live/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
fastrtc[vad]
|
||||
useful-moonshine-onnx@git+https://git@github.com/usefulsensors/moonshine.git#subdirectory=moonshine-onnx
|
||||
twilio
|
||||
Reference in New Issue
Block a user