Add text mode (#321)

* Pretty good spot

* Working draft

* Fix other mode

* Add js to git

* Working

* Add code

* fix

* Fix

* Add code

* Fix submit race condition

* demo

* fix

* Fix

* Fix
This commit is contained in:
Freddy Boulton
2025-06-03 19:24:21 -04:00
committed by GitHub
parent 1179f8ef21
commit 1877720231
69 changed files with 110161 additions and 22889 deletions

View File

@@ -15,14 +15,16 @@
Microphone,
} from "@gradio/icons";
import MicrophoneMuted from "./MicrophoneMuted.svelte";
import type { WebRTCValue } from "./utils";
import { start, stop } from "./webrtc_utils";
import { get_devices, set_available_devices } from "./stream_utils";
import AudioWave from "./AudioWave.svelte";
import TextboxWithMic from "./TextboxWithMic.svelte";
import WebcamPermissions from "./WebcamPermissions.svelte";
import PulsingIcon from "./PulsingIcon.svelte";
export let mode: "send-receive" | "send";
export let value: string | null = null;
export let value: WebRTCValue | null = null;
export let label: string | undefined = undefined;
export let show_label = true;
export let rtc_configuration: Object | null = null;
@@ -37,6 +39,8 @@
export let pulse_color: string = "var(--color-accent)";
export let icon_radius: number = 50;
export let button_labels: { start: string; stop: string; waiting: string };
export let variant: "textbox" | "wave" = "wave";
let pending = false;
let stopword_recognized = false;
@@ -44,7 +48,7 @@
let notification_sound;
onMount(() => {
if (value === "__webrtc_value__") {
if (value?.webrtc_id === "__webrtc_value__") {
notification_sound = new Audio(
"https://huggingface.co/datasets/freddyaboulton/bucket/resolve/main/pop-sounds.mp3",
);
@@ -74,6 +78,8 @@
export let server: {
offer: (body: any) => Promise<any>;
turn: () => Promise<any>;
trigger_response: (body: any) => Promise<any>;
quit_output_stream: (body: any) => Promise<any>;
};
let stream_state: "open" | "closed" | "waiting" = "closed";
@@ -145,10 +151,11 @@
stream_state = "closed";
_time_limit = null;
await access_mic();
await server.quit_output_stream({ webrtc_id: _webrtc_id });
return;
}
_webrtc_id = Math.random().toString(36).substring(2);
value = _webrtc_id;
value.webrtc_id = _webrtc_id;
stream_state = "waiting";
await server.turn().then((rtc_configuration_) => {
if (rtc_configuration_.error) {
@@ -296,24 +303,48 @@
$: if (stopword_recognized) {
notification_sound.play();
}
function input_audio_source_callback(): MediaStream {
return stream;
}
</script>
<BlockLabel
{show_label}
Icon={Music}
float={false}
label={label || i18n("audio.audio")}
/>
{#if variant !== "textbox"}
<BlockLabel
{show_label}
Icon={Music}
float={false}
label={label || i18n("audio.audio")}
/>
{/if}
<div class="audio-container">
<audio
class="standard-player"
class:hidden={value === "__webrtc_value__"}
class:hidden={true}
on:load
bind:this={audio_player}
on:ended={() => dispatch("stop")}
on:play={() => dispatch("play")}
/>
{#if !mic_accessed}
{#if variant === "textbox"}
<TextboxWithMic
bind:value
bind:stream_state
{start_stream}
{access_mic}
{audio_source_callback}
{input_audio_source_callback}
{toggleMuteMicrophone}
{toggleMute}
{on_change_cb}
{mode}
{icon_button_color}
{pulse_color}
bind:is_muted
bind:is_mic_muted
{pending}
/>
{:else if !mic_accessed}
<div
in:fade={{ delay: 100, duration: 200 }}
title="grant webcam access"
@@ -353,7 +384,7 @@
style={`fill: ${icon_button_color}; stroke: ${icon_button_color}; color: ${icon_button_color};`}
>
<PulsingIcon
audio_source_callback={() => stream}
audio_source_callback={input_audio_source_callback}
stream_state={"open"}
icon={Circle}
{icon_button_color}

View File

@@ -5,10 +5,11 @@
import { BlockLabel } from "@gradio/atoms";
import Webcam from "./Webcam.svelte";
import { Video } from "@gradio/icons";
import type { WebRTCValue } from "./utils";
import type { I18nFormatter } from "@gradio/utils";
export let value: string = null;
export let value: string | WebRTCValue | null = null;
export let label: string | undefined = undefined;
export let show_label = true;
export let include_audio: boolean;
@@ -20,6 +21,7 @@
export let button_labels: { start: string; stop: string; waiting: string };
export let server: {
offer: (body: any) => Promise<any>;
turn: () => Promise<any>;
};
export let rtc_configuration: Object;
export let track_constraints: MediaTrackConstraints = {};
@@ -48,6 +50,7 @@
let dragging = false;
$: dispatch("drag", dragging);
$: webrtc_id = typeof value === "string" ? value : value.webrtc_id;
</script>
<BlockLabel {show_label} Icon={Video} label={label || "Video"} />
@@ -72,7 +75,7 @@
{i18n}
stream_every={0.5}
{server}
bind:webrtc_id={value}
bind:webrtc_id
{reject_cb}
/>

View File

@@ -8,6 +8,7 @@
export let icon_button_color: string = "var(--color-accent)";
export let pulse_color: string = "var(--color-accent)";
export let icon_radius: number = 50;
export let pulse_intensity_threshold: number = 0;
let audioContext: AudioContext;
let analyser: AnalyserNode;
@@ -61,7 +62,7 @@
<div class="gradio-webrtc-icon-wrapper">
<div class="gradio-webrtc-pulsing-icon-container">
{#if pulseIntensity > 0}
{#if pulseIntensity > pulse_intensity_threshold}
{#each Array(3) as _, i}
<div
class="pulse-ring"
@@ -76,7 +77,7 @@
<div
class="gradio-webrtc-pulsing-icon"
style:transform={`scale(${pulseScale})`}
style:background={icon_button_color}
style:background={"none"}
>
{#if typeof icon === "string"}
<img

View File

@@ -4,12 +4,12 @@
import { Music } from "@gradio/icons";
import type { I18nFormatter } from "@gradio/utils";
import { createEventDispatcher } from "svelte";
import { onMount } from "svelte";
import type { WebRTCValue } from "./utils";
import { start, stop } from "./webrtc_utils";
import AudioWave from "./AudioWave.svelte";
export let value: string | null = null;
export let value: string | WebRTCValue | null = null;
export let label: string | undefined = undefined;
export let show_label = true;
export let rtc_configuration: Object | null = null;
@@ -102,7 +102,7 @@
return value;
}
$: start_stream(value).then((val) => {
$: start_stream(value as string).then((val) => {
value = val;
});
</script>

View File

@@ -2,10 +2,10 @@
import { createEventDispatcher, onMount } from "svelte";
import { BlockLabel, Empty } from "@gradio/atoms";
import { Video } from "@gradio/icons";
import type { WebRTCValue } from "./utils";
import { start, stop } from "./webrtc_utils";
export let value: string | null = null;
export let value: string | WebRTCValue | null = null;
export let label: string | undefined = undefined;
export let show_label = true;
export let rtc_configuration: Object | null = null;

View File

@@ -0,0 +1,203 @@
<script lang="ts">
import type { WebRTCValue } from "./utils";
import { Block } from "@gradio/atoms";
import {
Spinner,
VolumeMuted,
VolumeHigh,
Microphone,
Square,
} from "@gradio/icons";
import MicrophoneMuted from "./MicrophoneMuted.svelte";
import PulsingIcon from "./PulsingIcon.svelte";
import { BaseTextbox } from "@gradio/textbox";
export let value: WebRTCValue | null = null;
export let access_mic: () => Promise<void>;
export let start_stream: () => Promise<void>;
export let audio_source_callback: () => MediaStream;
export let input_audio_source_callback: () => MediaStream;
export let toggleMuteMicrophone: () => void;
export let toggleMute: () => void;
export let on_change_cb: (mg: "tick" | "change") => void;
export let icon_button_color: string = "var(--color-accent)";
export let pulse_color: string = "var(--color-accent)";
export let stream_state: "open" | "closed" | "waiting" | "pending" = "closed";
export let mode: "send-receive" | "send" = "send-receive";
export let pending = false;
export let is_muted = false;
export let is_mic_muted = false;
function click_mic_icon() {
if (stream_state === "open") {
toggleMuteMicrophone();
} else {
access_mic();
start_stream();
}
}
async function click_stop_stream() {
await start_stream();
}
$: console.log("value", value);
</script>
<Block>
<div class="input-container">
<BaseTextbox
bind:value={value.textbox}
label=""
show_label={false}
root={undefined}
info={undefined}
submit_btn={!pending}
disabled={pending}
on:submit={async () => {
if (stream_state === "closed") {
await start_stream();
// @ts-ignore
while (stream_state !== "open") {
await new Promise((resolve) => setTimeout(resolve, 100));
}
}
// @ts-ignore
on_change_cb({
type: "submit",
data: {
value: { webrtc_id: value.webrtc_id, textbox: value.textbox },
is_value_data: true,
},
});
console.debug("textbox called submit");
value.textbox = "";
}}
/>
{#if pending}
<div
class="button"
style={`fill: ${icon_button_color}; stroke: ${icon_button_color}; color: ${icon_button_color};`}
>
<Spinner />
</div>
{/if}
<button
class="button"
class:padded-button={false}
on:click={click_mic_icon}
>
<div
class="icon"
style={`fill: ${icon_button_color}; stroke: ${icon_button_color}; color: ${icon_button_color};`}
>
{#if stream_state === "open"}
<PulsingIcon
audio_source_callback={input_audio_source_callback}
stream_state={"open"}
icon={is_mic_muted ? MicrophoneMuted : Microphone}
{icon_button_color}
{pulse_color}
pulse_intensity_threshold={0.99}
/>
{:else}
<Microphone />
{/if}
</div>
</button>
{#if mode === "send-receive"}
<button
class="button"
on:click={toggleMute}
aria-label={is_muted ? "unmute audio" : "mute audio"}
class:hidden={stream_state === "closed"}
>
<div
class="icon"
style={`fill: ${icon_button_color}; stroke: ${icon_button_color}; color: ${icon_button_color};`}
>
{#if stream_state === "open"}
<PulsingIcon
{audio_source_callback}
stream_state={"open"}
icon={is_muted ? VolumeMuted : VolumeHigh}
{icon_button_color}
{pulse_color}
pulse_intensity_threshold={0.8}
/>
{:else if is_muted}
<VolumeMuted />
{:else}
<VolumeHigh />
{/if}
</div>
</button>
{/if}
<button
class="button"
on:click={click_stop_stream}
aria-label="stop stream"
class:hidden={stream_state === "closed"}
>
<div
class="icon"
style={`fill: ${icon_button_color}; stroke: ${icon_button_color}; color: ${icon_button_color};`}
>
<Square fill="none" />
</div>
</button>
</div>
</Block>
<style>
.input-container {
display: flex;
position: relative;
align-items: flex-end;
width: 100%;
}
.button {
border: none;
text-align: center;
text-decoration: none;
font-size: 14px;
cursor: pointer;
border-radius: 15px;
min-width: 30px;
height: 30px;
flex-shrink: 0;
display: flex;
justify-content: center;
align-items: center;
z-index: var(--layer-1);
}
.button {
background: var(--button-secondary-background-fill);
color: var(--button-secondary-text-color);
}
.button:hover {
background: var(--button-secondary-background-fill-hover);
}
.button:disabled {
background: var(--button-secondary-background-fill);
cursor: pointer;
}
.button:active {
box-shadow: var(--button-shadow-active);
}
.button :global(svg) {
height: 22px;
width: 22px;
}
.padded-button {
padding: 0 10px;
}
.hidden {
display: none;
}
</style>

View File

@@ -2,6 +2,11 @@ import { toBlobURL } from "@ffmpeg/util";
import { FFmpeg } from "@ffmpeg/ffmpeg";
import { lookup } from "mrmime";
export type WebRTCValue = {
textbox: string;
webrtc_id: string;
};
export const prettyBytes = (bytes: number): string => {
let units = ["B", "KB", "MB", "GB", "PB"];
let i = 0;