diff --git a/demo/moonshine_live/README.md b/demo/moonshine_live/README.md new file mode 100644 index 0000000..d541a3d --- /dev/null +++ b/demo/moonshine_live/README.md @@ -0,0 +1,16 @@ +--- +title: Moonshine Live Transcription +emoji: 🌕 +colorFrom: purple +colorTo: red +sdk: gradio +sdk_version: 5.17.0 +app_file: app.py +pinned: false +license: mit +short_description: Real-time captions with Moonshine ONNX +tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN] +models: [onnx-community/moonshine-base-ONNX, UsefulSensors/moonshine-base] +--- + +Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference \ No newline at end of file diff --git a/demo/moonshine_live/app.py b/demo/moonshine_live/app.py new file mode 100644 index 0000000..bbb05cb --- /dev/null +++ b/demo/moonshine_live/app.py @@ -0,0 +1,64 @@ +from fastrtc import ( + Stream, + AdditionalOutputs, + audio_to_float32, + ReplyOnPause, + get_twilio_turn_credentials, +) +from functools import lru_cache +import gradio as gr +from typing import Generator, Literal +from numpy.typing import NDArray +import numpy as np +from moonshine_onnx import MoonshineOnnxModel, load_tokenizer + + +@lru_cache(maxsize=None) +def load_moonshine( + model_name: Literal["moonshine/base", "moonshine/tiny"], +) -> MoonshineOnnxModel: + return MoonshineOnnxModel(model_name=model_name) + + +tokenizer = load_tokenizer() + + +def stt( + audio: tuple[int, NDArray[np.int16 | np.float32]], + model_name: Literal["moonshine/base", "moonshine/tiny"], +) -> Generator[AdditionalOutputs, None, None]: + moonshine = load_moonshine(model_name) + sr, audio_np = audio # type: ignore + if audio_np.dtype == np.int16: + audio_np = audio_to_float32(audio) + if audio_np.ndim == 1: + audio_np = audio_np.reshape(1, -1) + tokens = moonshine.generate(audio_np) + yield AdditionalOutputs(tokenizer.decode_batch(tokens)[0]) + + +stream = Stream( + ReplyOnPause(stt, input_sample_rate=16000), + modality="audio", + mode="send", + ui_args={ + "title": "Live Captions by Moonshine", + "icon": "default-favicon.ico", + "icon_button_color": "#5c5c5c", + "pulse_color": "#a7c6fc", + "icon_radius": 0, + }, + rtc_configuration=get_twilio_turn_credentials(), + additional_inputs=[ + gr.Radio( + choices=["moonshine/base", "moonshine/tiny"], + value="moonshine/base", + label="Model", + ) + ], + additional_outputs=[gr.Textbox(label="Captions")], + additional_outputs_handler=lambda prev, current: (prev + "\n" + current).strip(), +) + +if __name__ == "__main__": + stream.ui.launch() diff --git a/demo/moonshine_live/default-favicon.ico b/demo/moonshine_live/default-favicon.ico new file mode 100644 index 0000000..0a7c372 Binary files /dev/null and b/demo/moonshine_live/default-favicon.ico differ diff --git a/demo/moonshine_live/requirements.txt b/demo/moonshine_live/requirements.txt new file mode 100644 index 0000000..acde84b --- /dev/null +++ b/demo/moonshine_live/requirements.txt @@ -0,0 +1,3 @@ +fastrtc[vad] +useful-moonshine-onnx@git+https://git@github.com/usefulsensors/moonshine.git#subdirectory=moonshine-onnx +twilio \ No newline at end of file