diff --git a/demo/integrated_textbox/README.md b/demo/integrated_textbox/README.md new file mode 100644 index 0000000..1784e1d --- /dev/null +++ b/demo/integrated_textbox/README.md @@ -0,0 +1,19 @@ +--- +title: Integrated Text Box +emoji: 📝 +colorFrom: purple +colorTo: red +sdk: gradio +sdk_version: 5.31.0 +app_file: app.py +pinned: false +license: mit +short_description: Talk or type to ANY LLM! +tags: [webrtc, websocket, gradio, secret|HF_TOKEN] +--- + +# Integrated Textbox + +Talk or type to ANY LLM! + + diff --git a/demo/text_mode/app.py b/demo/integrated_textbox/app.py similarity index 87% rename from demo/text_mode/app.py rename to demo/integrated_textbox/app.py index f224b3b..bdd0f76 100644 --- a/demo/text_mode/app.py +++ b/demo/integrated_textbox/app.py @@ -1,6 +1,6 @@ # /// script # dependencies = [ -# "fastrtc[vad, stt]==0.0.26.rc1", +# "fastrtc[vad, stt]">=0.0.26", # "openai", # ] # /// @@ -14,8 +14,10 @@ from fastrtc import ( WebRTC, WebRTCData, WebRTCError, + get_hf_turn_credentials, get_stt_model, ) +from gradio.utils import get_space from openai import OpenAI stt_model = get_stt_model() @@ -118,9 +120,17 @@ with gr.Blocks(css=css) as demo: ) provider.change(hide_token, inputs=[provider], outputs=[token]) cb = gr.Chatbot(type="messages", height=600) - webrtc = WebRTC(modality="audio", mode="send", variant="textbox") + webrtc = WebRTC( + modality="audio", + mode="send", + variant="textbox", + rtc_configuration=get_hf_turn_credentials if get_space() else None, + server_rtc_configuration=get_hf_turn_credentials(ttl=3_600 * 24 * 30) + if get_space() + else None, + ) webrtc.stream( - ReplyOnPause(response), + ReplyOnPause(response), # type: ignore inputs=[webrtc, cb, token, model, provider], outputs=[cb], concurrency_limit=100, @@ -130,4 +140,4 @@ with gr.Blocks(css=css) as demo: ) if __name__ == "__main__": - demo.launch(server_port=6980) + demo.launch(server_port=7860) diff --git a/demo/integrated_textbox/requirements.txt b/demo/integrated_textbox/requirements.txt new file mode 100644 index 0000000..88a409f --- /dev/null +++ b/demo/integrated_textbox/requirements.txt @@ -0,0 +1,2 @@ +fastrtc[vad, stt] +openai \ No newline at end of file diff --git a/docs/cookbook.md b/docs/cookbook.md index 4b6cfbb..47f8055 100644 --- a/docs/cookbook.md +++ b/docs/cookbook.md @@ -23,6 +23,7 @@ A collection of applications built with FastRTC. Click on the tags below to find + @@ -74,6 +75,19 @@ document.querySelectorAll('.tag-button').forEach(button => { [:octicons-code-16: Code](https://huggingface.co/spaces/fastrtc/talk-to-llama4/blob/main/app.py) +- :speaking_head:{ .lg .middle }:llama:{ .lg .middle } __Integrated Textbox__ +{: data-tags="audio,llm,text,voice-chat"} + + --- + + Talk or type to any LLM with FastRTC's integrated audio + text textbox. + + + + [:octicons-arrow-right-24: Demo](https://huggingface.co/spaces/fastrtc/integrated-textbox) + + [:octicons-code-16: Code](https://huggingface.co/spaces/fastrtc/integrated-textbox/blob/main/app.py) + - :speaking_head:{ .lg .middle }:eyes:{ .lg .middle } __Gemini Audio Video Chat__ {: data-tags="audio,video,real-time-api"} diff --git a/docs/userguide/gradio.md b/docs/userguide/gradio.md index ba1682b..76740fd 100644 --- a/docs/userguide/gradio.md +++ b/docs/userguide/gradio.md @@ -93,4 +93,24 @@ This is common for displaying a multimodal text/audio conversation in a Chatbot === "Notes" 1. Pass your data to `AdditionalOutputs` and yield it. 2. In this case, no audio is being returned, so we set `mode="send"`. However, if we set `mode="send-receive"`, we could also yield generated audio and `AdditionalOutputs`. - 3. The `on_additional_outputs` event does not take `inputs`. It's common practice to not run this event on the queue since it is just a quick UI update. \ No newline at end of file + 3. The `on_additional_outputs` event does not take `inputs`. It's common practice to not run this event on the queue since it is just a quick UI update. + + +## Integrated Textbox + +For audio usecases, you may want to allow your users to type or speak. You can set the `variant="textbox"` argument in the WebRTC component to place a Textbox with a microphone input in the UI. See the `Integrated Textbox` demo in the cookbook or in the `demo` directory of the github repository. + + ``` py +webrtc = WebRTC( + modality="audio", + mode="send-receive", + variant="textbox", +) + ``` + + +!!! tip "Stream Class" + To use the "textbox" variant via the `Stream` class, set it in the `UIArgs` class and pass it to the stream via the `ui_args` parameter. + + +