From e92efb1c7dc913452e39d6bc24ebbb1b0035ec19 Mon Sep 17 00:00:00 2001 From: Freddy Boulton Date: Fri, 13 Dec 2024 16:53:35 -0800 Subject: [PATCH] Customizable icon also fix a bug where you could not import the lib without silero (#39) * commit * Add code * Add docs --- backend/gradio_webrtc/speech_to_text/stt_.py | 3 +- backend/gradio_webrtc/webrtc.py | 12 + docs/advanced-configuration.md | 36 ++- frontend/Index.svelte | 9 + frontend/shared/AudioWave.svelte | 222 +++++++++++++------ frontend/shared/InteractiveAudio.svelte | 5 +- frontend/shared/StaticAudio.svelte | 5 +- frontend/shared/webrtc_utils.ts | 8 +- 8 files changed, 229 insertions(+), 71 deletions(-) diff --git a/backend/gradio_webrtc/speech_to_text/stt_.py b/backend/gradio_webrtc/speech_to_text/stt_.py index 9987e2b..6b2f696 100644 --- a/backend/gradio_webrtc/speech_to_text/stt_.py +++ b/backend/gradio_webrtc/speech_to_text/stt_.py @@ -4,7 +4,6 @@ from typing import Callable import numpy as np from numpy.typing import NDArray -from silero import silero_stt from ..utils import AudioChunk @@ -17,6 +16,8 @@ class STTModel: @lru_cache def get_stt_model() -> STTModel: + from silero import silero_stt + model, decoder, _ = silero_stt(language="en", version="v6", jit_model="jit_xlarge") return STTModel(model, decoder) diff --git a/backend/gradio_webrtc/webrtc.py b/backend/gradio_webrtc/webrtc.py index 6cabbac..bbd6674 100644 --- a/backend/gradio_webrtc/webrtc.py +++ b/backend/gradio_webrtc/webrtc.py @@ -533,6 +533,9 @@ class WebRTC(Component): mode: Literal["send-receive", "receive", "send"] = "send-receive", modality: Literal["video", "audio"] = "video", rtp_params: dict[str, Any] | None = None, + icon: str | None = None, + icon_button_color: str | None = None, + pulse_color: str | None = None, ): """ Parameters: @@ -560,6 +563,9 @@ class WebRTC(Component): mode: WebRTC mode - "send-receive", "receive", or "send". modality: Type of media - "video" or "audio". rtp_params: See https://developer.mozilla.org/en-US/docs/Web/API/RTCRtpSender/setParameters. If you are changing the video resolution, you can set this to {"degradationPreference": "maintain-framerate"} to keep the frame rate consistent. + icon: Icon to display on the button instead of the wave animation. The icon should be a path/url to a .svg/.png/.jpeg file. + icon_button_color: Color of the icon button. Default is var(--color-accent) of the demo theme. + pulse_color: Color of the pulse animation. Default is var(--color-accent) of the demo theme. """ self.time_limit = time_limit self.height = height @@ -569,6 +575,8 @@ class WebRTC(Component): self.rtc_configuration = rtc_configuration self.mode = mode self.modality = modality + self.icon_button_color = icon_button_color + self.pulse_color = pulse_color self.rtp_params = rtp_params or {} if track_constraints is None and modality == "audio": track_constraints = { @@ -604,6 +612,10 @@ class WebRTC(Component): key=key, value=value, ) + # need to do this here otherwise the proxy_url is not set + self.icon = ( + icon if not icon else cast(dict, self.serve_static_file(icon)).get("url") + ) def set_additional_outputs( self, webrtc_id: str diff --git a/docs/advanced-configuration.md b/docs/advanced-configuration.md index ac4fd53..87ee406 100644 --- a/docs/advanced-configuration.md +++ b/docs/advanced-configuration.md @@ -31,7 +31,6 @@ webrtc = WebRTC(track_constraints=track_constraints, ) ``` - ## The RTC Configuration You can configure how the connection is created on the client by passing an `rtc_configuration` parameter to the `WebRTC` component constructor. @@ -108,4 +107,37 @@ demo.launch() !!! tip In general it is best to leave these settings untouched. In some cases, - lowering the output_frame_size can yield smoother audio playback. \ No newline at end of file + lowering the output_frame_size can yield smoother audio playback. + + +## Audio Icon + +You can display an icon of your choice instead of the default wave animation for audio streaming. +Pass any local path or url to an image (svg, png, jpeg) to the components `icon` parameter. This will display the icon as a circular button. When audio is sent or recevied (depending on the `mode` parameter) a pulse animation will emanate from the button. + +You can control the button color and pulse color with `icon_button_color` and `pulse_color` parameters. They can take any valid css color. + +=== "Code" + ``` python + audio = WebRTC( + label="Stream", + rtc_configuration=rtc_configuration, + mode="receive", + modality="audio", + icon="phone-solid.svg", + ) + ``` + +=== "Code Custom colors" + ``` python + audio = WebRTC( + label="Stream", + rtc_configuration=rtc_configuration, + mode="receive", + modality="audio", + icon="phone-solid.svg", + icon_button_color="black", + pulse_color="black", + ) + ``` + \ No newline at end of file diff --git a/frontend/Index.svelte b/frontend/Index.svelte index 50499e0..6a20159 100644 --- a/frontend/Index.svelte +++ b/frontend/Index.svelte @@ -34,6 +34,9 @@ export let mode: "send-receive" | "receive" | "send" = "send-receive"; export let rtp_params: RTCRtpParameters = {} as RTCRtpParameters; export let track_constraints: MediaTrackConstraints = {}; + export let icon: string | undefined = undefined; + export let icon_button_color: string = "var(--color-accent)"; + export let pulse_color: string = "var(--color-accent)"; const on_change_cb = (msg: "change" | "tick") => { gradio.dispatch(msg === "change" ? "state_change" : "tick"); @@ -84,6 +87,9 @@ {show_label} {server} {rtc_configuration} + {icon} + {icon_button_color} + {pulse_color} i18n={gradio.i18n} on:tick={() => gradio.dispatch("tick")} on:error={({ detail }) => gradio.dispatch("error", detail)} @@ -130,6 +136,9 @@ {mode} {rtp_params} i18n={gradio.i18n} + {icon} + {icon_button_color} + {pulse_color} on:tick={() => gradio.dispatch("tick")} on:error={({ detail }) => gradio.dispatch("error", detail)} /> diff --git a/frontend/shared/AudioWave.svelte b/frontend/shared/AudioWave.svelte index 465370b..2464ef1 100644 --- a/frontend/shared/AudioWave.svelte +++ b/frontend/shared/AudioWave.svelte @@ -1,85 +1,179 @@ - +
+{#if icon} +
+ {#if pulseIntensity > 0} + {#each Array(3) as _, i} +
+ {/each} + {/if} + +
+ Audio visualization icon +
+
+{:else}
{#each Array(numBars) as _}
{/each}
+{/if}
- + \ No newline at end of file diff --git a/frontend/shared/InteractiveAudio.svelte b/frontend/shared/InteractiveAudio.svelte index 15e7d8e..dc852ed 100644 --- a/frontend/shared/InteractiveAudio.svelte +++ b/frontend/shared/InteractiveAudio.svelte @@ -31,6 +31,9 @@ export let track_constraints: MediaTrackConstraints = {}; export let rtp_params: RTCRtpParameters = {} as RTCRtpParameters; export let on_change_cb: (mg: "tick" | "change") => void; + export let icon: string | undefined = undefined; + export let icon_button_color: string = "var(--color-accent)"; + export let pulse_color: string = "var(--color-accent)"; let stopword_recognized = false; @@ -240,7 +243,7 @@ access_mic()} />
{:else} - +