mirror of
https://github.com/HumanAIGC-Engineering/gradio-webrtc.git
synced 2026-02-04 09:29:23 +08:00
Add Integrated Textbox to the docs + spaces (#343)
* Add to docs * Fix requirements
This commit is contained in:
19
demo/integrated_textbox/README.md
Normal file
19
demo/integrated_textbox/README.md
Normal file
@@ -0,0 +1,19 @@
|
||||
---
|
||||
title: Integrated Text Box
|
||||
emoji: 📝
|
||||
colorFrom: purple
|
||||
colorTo: red
|
||||
sdk: gradio
|
||||
sdk_version: 5.31.0
|
||||
app_file: app.py
|
||||
pinned: false
|
||||
license: mit
|
||||
short_description: Talk or type to ANY LLM!
|
||||
tags: [webrtc, websocket, gradio, secret|HF_TOKEN]
|
||||
---
|
||||
|
||||
# Integrated Textbox
|
||||
|
||||
Talk or type to ANY LLM!
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "fastrtc[vad, stt]==0.0.26.rc1",
|
||||
# "fastrtc[vad, stt]">=0.0.26",
|
||||
# "openai",
|
||||
# ]
|
||||
# ///
|
||||
@@ -14,8 +14,10 @@ from fastrtc import (
|
||||
WebRTC,
|
||||
WebRTCData,
|
||||
WebRTCError,
|
||||
get_hf_turn_credentials,
|
||||
get_stt_model,
|
||||
)
|
||||
from gradio.utils import get_space
|
||||
from openai import OpenAI
|
||||
|
||||
stt_model = get_stt_model()
|
||||
@@ -118,9 +120,17 @@ with gr.Blocks(css=css) as demo:
|
||||
)
|
||||
provider.change(hide_token, inputs=[provider], outputs=[token])
|
||||
cb = gr.Chatbot(type="messages", height=600)
|
||||
webrtc = WebRTC(modality="audio", mode="send", variant="textbox")
|
||||
webrtc = WebRTC(
|
||||
modality="audio",
|
||||
mode="send",
|
||||
variant="textbox",
|
||||
rtc_configuration=get_hf_turn_credentials if get_space() else None,
|
||||
server_rtc_configuration=get_hf_turn_credentials(ttl=3_600 * 24 * 30)
|
||||
if get_space()
|
||||
else None,
|
||||
)
|
||||
webrtc.stream(
|
||||
ReplyOnPause(response),
|
||||
ReplyOnPause(response), # type: ignore
|
||||
inputs=[webrtc, cb, token, model, provider],
|
||||
outputs=[cb],
|
||||
concurrency_limit=100,
|
||||
@@ -130,4 +140,4 @@ with gr.Blocks(css=css) as demo:
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.launch(server_port=6980)
|
||||
demo.launch(server_port=7860)
|
||||
2
demo/integrated_textbox/requirements.txt
Normal file
2
demo/integrated_textbox/requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
fastrtc[vad, stt]
|
||||
openai
|
||||
@@ -23,6 +23,7 @@ A collection of applications built with FastRTC. Click on the tags below to find
|
||||
<button class="tag-button" data-tag="audio"><code>Audio</code></button>
|
||||
<button class="tag-button" data-tag="video"><code>Video</code></button>
|
||||
<button class="tag-button" data-tag="llm"><code>LLM</code></button>
|
||||
<button class="tag-button" data-tag="text"><code>Text</code></button>
|
||||
<button class="tag-button" data-tag="computer-vision"><code>Computer Vision</code></button>
|
||||
<button class="tag-button" data-tag="real-time-api"><code>Real-time API</code></button>
|
||||
<button class="tag-button" data-tag="voice-chat"><code>Voice Chat</code></button>
|
||||
@@ -74,6 +75,19 @@ document.querySelectorAll('.tag-button').forEach(button => {
|
||||
|
||||
[:octicons-code-16: Code](https://huggingface.co/spaces/fastrtc/talk-to-llama4/blob/main/app.py)
|
||||
|
||||
- :speaking_head:{ .lg .middle }:llama:{ .lg .middle } __Integrated Textbox__
|
||||
{: data-tags="audio,llm,text,voice-chat"}
|
||||
|
||||
---
|
||||
|
||||
Talk or type to any LLM with FastRTC's integrated audio + text textbox.
|
||||
|
||||
<video width=98% src="https://github.com/user-attachments/assets/35c982a1-4a58-4947-af89-7ff287070ef5" controls style="text-align: center"></video>
|
||||
|
||||
[:octicons-arrow-right-24: Demo](https://huggingface.co/spaces/fastrtc/integrated-textbox)
|
||||
|
||||
[:octicons-code-16: Code](https://huggingface.co/spaces/fastrtc/integrated-textbox/blob/main/app.py)
|
||||
|
||||
- :speaking_head:{ .lg .middle }:eyes:{ .lg .middle } __Gemini Audio Video Chat__
|
||||
{: data-tags="audio,video,real-time-api"}
|
||||
|
||||
|
||||
@@ -93,4 +93,24 @@ This is common for displaying a multimodal text/audio conversation in a Chatbot
|
||||
=== "Notes"
|
||||
1. Pass your data to `AdditionalOutputs` and yield it.
|
||||
2. In this case, no audio is being returned, so we set `mode="send"`. However, if we set `mode="send-receive"`, we could also yield generated audio and `AdditionalOutputs`.
|
||||
3. The `on_additional_outputs` event does not take `inputs`. It's common practice to not run this event on the queue since it is just a quick UI update.
|
||||
3. The `on_additional_outputs` event does not take `inputs`. It's common practice to not run this event on the queue since it is just a quick UI update.
|
||||
|
||||
|
||||
## Integrated Textbox
|
||||
|
||||
For audio usecases, you may want to allow your users to type or speak. You can set the `variant="textbox"` argument in the WebRTC component to place a Textbox with a microphone input in the UI. See the `Integrated Textbox` demo in the cookbook or in the `demo` directory of the github repository.
|
||||
|
||||
``` py
|
||||
webrtc = WebRTC(
|
||||
modality="audio",
|
||||
mode="send-receive",
|
||||
variant="textbox",
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
!!! tip "Stream Class"
|
||||
To use the "textbox" variant via the `Stream` class, set it in the `UIArgs` class and pass it to the stream via the `ui_args` parameter.
|
||||
|
||||
|
||||
<video width=98% src="https://github.com/user-attachments/assets/35c982a1-4a58-4947-af89-7ff287070ef5" controls style="text-align: center"></video>
|
||||
|
||||
Reference in New Issue
Block a user