Close Stream from Backend (#222)

* Close from backend * Add code
2026-02-04 09:29:23 +08:00 · 2025-03-28 20:47:34 -04:00
parent 71743acb64
commit 8ed27fba78
10 changed files with 2336 additions and 2237 deletions
--- a/backend/fastrtc/init.py
+++ b/backend/fastrtc/init.py
@@ -25,6 +25,7 @@ from .tracks import (
 )
 from .utils import (
    AdditionalOutputs,
+    CloseStream,
    Warning,
    WebRTCError,
    aggregate_bytes_to_16bit,
@@ -75,4 +76,5 @@ __all__ = [
    "get_silero_model",
    "SileroVadOptions",
    "VideoStreamHandler",
+    "CloseStream",
 ]
--- a/backend/fastrtc/templates/component/index.js
+++ b/backend/fastrtc/templates/component/index.js
--- a/backend/fastrtc/tracks.py
+++ b/backend/fastrtc/tracks.py
@@ -37,6 +37,7 @@ from numpy import typing as npt

 from fastrtc.utils import (
    AdditionalOutputs,
+    CloseStream,
    DataChannel,
    WebRTCError,
    create_message,
@@ -54,9 +55,14 @@ VideoNDArray: TypeAlias = Union[
 ]

 VideoEmitType = (
-    VideoNDArray | tuple[VideoNDArray, AdditionalOutputs] | AdditionalOutputs
+    VideoNDArray
+    | tuple[VideoNDArray, AdditionalOutputs]
+    | tuple[VideoNDArray, CloseStream]
+    | AdditionalOutputs
+    | CloseStream
 )
-VideoEventHandler = Callable[[npt.ArrayLike], VideoEmitType]
+VideoEventGenerator = Generator[VideoEmitType, None, None]
+VideoEventHandler = Callable[[npt.ArrayLike], VideoEmitType | VideoEventGenerator]


@dataclass
@@ -172,6 +178,12 @@ class VideoCallback(VideoStreamTrack):

            args = self.add_frame_to_payload(cast(list, self.latest_args), frame_array)
            array, outputs = split_output(self.event_handler(*args))
+            if isinstance(outputs, CloseStream):
+                cast(DataChannel, self.channel).send(
+                    create_message("end_stream", outputs.msg)
+                )
+                self.stop()
+                return None
            if (
                isinstance(outputs, AdditionalOutputs)
                and self.set_additional_outputs
@@ -444,6 +456,12 @@ class VideoStreamHandler_(VideoCallback):
            ):
                self.set_additional_outputs(outputs)
                self.channel.send(create_message("fetch_output", []))
+            if isinstance(outputs, CloseStream):
+                cast(DataChannel, self.channel).send(
+                    create_message("end_stream", outputs.msg)
+                )
+                self.stop()
+                return
            if array is None and self.mode == "send":
                return

@@ -586,6 +604,12 @@ class AudioCallback(AudioStreamTrack):
            await self.start()

            frame = await self.queue.get()
+            if isinstance(frame, CloseStream):
+                cast(DataChannel, self.channel).send(
+                    create_message("end_stream", frame.msg)
+                )
+                self.stop()
+                return
            logger.debug("frame %s", frame)

            data_time = frame.time
@@ -675,6 +699,12 @@ class ServerToClientVideo(VideoStreamTrack):
                )
            try:
                next_array, outputs = split_output(next(self.generator))
+                if isinstance(outputs, CloseStream):
+                    cast(DataChannel, self.channel).send(
+                        create_message("end_stream", outputs.msg)
+                    )
+                    self.stop()
+                    return
                if (
                    isinstance(outputs, AdditionalOutputs)
                    and self.set_additional_outputs
@@ -770,6 +800,12 @@ class ServerToClientAudio(AudioStreamTrack):

            await self.start()
            data = await self.queue.get()
+            if isinstance(data, CloseStream):
+                cast(DataChannel, self.channel).send(
+                    create_message("end_stream", data.msg)
+                )
+                self.stop()
+                return
            if data is None:
                self.stop()
                return
--- a/backend/fastrtc/utils.py
+++ b/backend/fastrtc/utils.py
@@ -32,6 +32,11 @@ class AdditionalOutputs:
        self.args = args


+class CloseStream:
+    def __init__(self, msg: str = "Stream closed") -> None:
+        self.msg = msg
+
+
 class DataChannel(Protocol):
    def send(self, message: str) -> None: ...

@@ -39,6 +44,7 @@ class DataChannel(Protocol):
 def create_message(
    type: Literal[
        "send_input",
+        "end_stream",
        "fetch_output",
        "stopword",
        "error",
@@ -98,9 +104,13 @@ class WebRTCError(Exception):
        _send_log(message, "error")


-def split_output(data: tuple | Any) -> tuple[Any, AdditionalOutputs | None]:
+def split_output(
+    data: tuple | Any,
+) -> tuple[Any, AdditionalOutputs | CloseStream | None]:
    if isinstance(data, AdditionalOutputs):
        return None, data
+    if isinstance(data, CloseStream):
+        return None, data
    if isinstance(data, tuple):
        # handle the bare audio case
        if 2 <= len(data) <= 3 and isinstance(data[1], np.ndarray):
@@ -109,11 +119,11 @@ def split_output(data: tuple | Any) -> tuple[Any, AdditionalOutputs | None]:
            raise ValueError(
                "The tuple must have exactly two elements: the data and an instance of AdditionalOutputs."
            )
-        if not isinstance(data[-1], AdditionalOutputs):
+        if not isinstance(data[-1], (AdditionalOutputs, CloseStream)):
            raise ValueError(
                "The last element of the tuple must be an instance of AdditionalOutputs."
            )
-        return data[0], cast(AdditionalOutputs, data[1])
+        return data[0], cast(AdditionalOutputs | CloseStream, data[1])
    return data, None


@@ -152,6 +162,8 @@ async def player_worker_decode(
                cast(DataChannel, channel()).send(create_message("fetch_output", []))

            if frame is None:
+                if isinstance(outputs, CloseStream):
+                    await queue.put(outputs)
                if quit_on_none:
                    await queue.put(None)
                    break
@@ -203,7 +215,8 @@ async def player_worker_decode(
                processed_frame.time_base = audio_time_base
                audio_samples += processed_frame.samples
                await queue.put(processed_frame)
-
+            if isinstance(outputs, CloseStream):
+                await queue.put(outputs)
        except (TimeoutError, asyncio.TimeoutError):
            logger.warning(
                "Timeout in frame processing cycle after %s seconds - resetting", 60
--- a/frontend/Index.svelte
+++ b/frontend/Index.svelte
@@ -47,6 +47,8 @@
      msg?.type === "error"
    ) {
      gradio.dispatch(msg?.type === "error" ? "error" : "warning", msg.message);
+    } else if (msg?.type === "end_stream") {
+      gradio.dispatch("warning", msg.data);
    } else if (msg?.type === "fetch_output") {
      gradio.dispatch("state_change");
    } else if (msg?.type === "send_input") {
--- a/frontend/shared/InteractiveAudio.svelte
+++ b/frontend/shared/InteractiveAudio.svelte
@@ -51,12 +51,16 @@
    }
  });

-  let _on_change_cb = (msg: "change" | "tick" | "stopword") => {
+  let _on_change_cb = (msg: "change" | "tick" | "stopword" | any) => {
    if (msg === "stopword") {
      stopword_recognized = true;
      setTimeout(() => {
        stopword_recognized = false;
      }, 3000);
+    } else if (msg.type === "end_stream") {
+      stream_state = "closed";
+      stop(pc);
+      on_change_cb(msg);
    } else {
      console.debug("calling on_change_cb with msg", msg);
      on_change_cb(msg);
--- a/frontend/shared/StaticAudio.svelte
+++ b/frontend/shared/StaticAudio.svelte
@@ -29,6 +29,17 @@
  let pc: RTCPeerConnection;
  let _webrtc_id = Math.random().toString(36).substring(2);

+  let _on_change_cb = (msg: "change" | "tick" | "stopword" | any) => {
+    if (msg.type === "end_stream") {
+      on_change_cb(msg);
+      stream_state = "closed";
+      stop(pc);
+    } else {
+      console.debug("calling on_change_cb with msg", msg);
+      on_change_cb(msg);
+    }
+  };
+
  const dispatch = createEventDispatcher<{
    tick: undefined;
    error: string;
@@ -75,7 +86,7 @@
        server.offer,
        _webrtc_id,
        "audio",
-        on_change_cb,
+        _on_change_cb,
      )
        .then((connection) => {
          clearTimeout(timeoutId);
--- a/frontend/shared/StaticVideo.svelte
+++ b/frontend/shared/StaticVideo.svelte
@@ -25,6 +25,17 @@
    tick: undefined;
  }>();

+  let _on_change_cb = (msg: "change" | "tick" | "stopword" | any) => {
+    if (msg.type === "end_stream") {
+      on_change_cb(msg);
+      stream_state = "closed";
+      stop(pc);
+    } else {
+      console.debug("calling on_change_cb with msg", msg);
+      on_change_cb(msg);
+    }
+  };
+
  let stream_state = "closed";

  $: if (value === "start_webrtc_stream") {
@@ -62,7 +73,7 @@
      server.offer,
      _webrtc_id,
      "video",
-      on_change_cb,
+      _on_change_cb,
    )
      .then((connection) => {
        clearTimeout(timeoutId);
--- a/frontend/shared/Webcam.svelte
+++ b/frontend/shared/Webcam.svelte
@@ -124,6 +124,18 @@
    }
  }

+  let _on_change_cb = (msg: "change" | "tick" | "stopword" | any) => {
+    if (msg.type === "end_stream") {
+      on_change_cb(msg);
+      stream_state = "closed";
+      stop(pc);
+      access_webcam();
+    } else {
+      console.debug("calling on_change_cb with msg", msg);
+      on_change_cb(msg);
+    }
+  };
+
  let recording = false;
  let stream: MediaStream;

@@ -171,7 +183,7 @@
        server.offer,
        webrtc_id,
        "video",
-        on_change_cb,
+        _on_change_cb,
        rtp_params,
        undefined,
        reject_cb,
--- a/frontend/shared/webrtc_utils.ts
+++ b/frontend/shared/webrtc_utils.ts
@@ -80,7 +80,8 @@ export async function start(
      event_json?.type === "error" ||
      event_json?.type === "send_input" ||
      event_json?.type === "fetch_output" ||
-      event_json?.type === "stopword"
+      event_json?.type === "stopword" ||
+      event_json?.type === "end_stream"
    ) {
      on_change_cb(event_json ?? event.data);
    }