Add API Reference and llms.txt (#256)

* stream api reference * docs * Add code * Add code * code
2026-02-05 18:09:23 +08:00 · 2025-04-04 15:32:06 -04:00
parent 948a479601
commit 3147b5979c
10 changed files with 1682 additions and 10 deletions
--- a/backend/fastrtc/reply_on_stopwords.py
+++ b/backend/fastrtc/reply_on_stopwords.py
@@ -20,15 +20,33 @@ logger = logging.getLogger(__name__)


 class ReplyOnStopWordsState(AppState):
+    """Extends AppState to include state specific to stop word detection."""
+
    stop_word_detected: bool = False
    post_stop_word_buffer: np.ndarray | None = None
    started_talking_pre_stop_word: bool = False

    def new(self):
+        """Creates a new instance of ReplyOnStopWordsState."""
        return ReplyOnStopWordsState()


 class ReplyOnStopWords(ReplyOnPause):
+    """
+    A stream handler that extends ReplyOnPause to trigger based on stop words
+    followed by a pause.
+
+    This handler listens to the incoming audio stream, performs Speech-to-Text (STT)
+    to detect predefined stop words. Once a stop word is detected, it waits for a
+    subsequent pause in speech (using the VAD model) before triggering the reply
+    function (`fn`) with the audio recorded *after* the stop word.
+
+    Attributes:
+        stop_words (list[str]): A list of words or phrases that trigger the pause detection.
+        state (ReplyOnStopWordsState): The current state of the stop word and pause detection logic.
+        stt_model: The Speech-to-Text model instance used for detecting stop words.
+    """
+
    def __init__(
        self,
        fn: ReplyFnGenerator,
@@ -43,6 +61,25 @@ class ReplyOnStopWords(ReplyOnPause):
        input_sample_rate: int = 48000,
        model: PauseDetectionModel | None = None,
    ):
+        """
+        Initializes the ReplyOnStopWords handler.
+
+        Args:
+            fn: The generator function to execute upon stop word and pause detection.
+                It receives `(sample_rate, audio_array)` and optionally `*args`.
+            stop_words: A list of strings (words or phrases) to listen for.
+                Detection is case-insensitive and ignores punctuation.
+            startup_fn: An optional function to run once at the beginning.
+            algo_options: Options for the pause detection algorithm (used after stop word).
+            model_options: Options for the VAD model.
+            can_interrupt: If True, incoming audio during `fn` execution
+                will stop the generator and process the new audio.
+            expected_layout: Expected input audio layout ('mono' or 'stereo').
+            output_sample_rate: The sample rate expected for audio yielded by `fn`.
+            output_frame_size: Deprecated.
+            input_sample_rate: The expected sample rate of incoming audio.
+            model: An optional pre-initialized VAD model instance.
+        """
        super().__init__(
            fn,
            algo_options=algo_options,
@@ -60,6 +97,18 @@ class ReplyOnStopWords(ReplyOnPause):
        self.stt_model = get_stt_model("moonshine/base")

    def stop_word_detected(self, text: str) -> bool:
+        """
+        Checks if any of the configured stop words are present in the text.
+
+        Performs a case-insensitive search, treating multi-word stop phrases
+        correctly and ignoring basic punctuation.
+
+        Args:
+            text: The text transcribed from the audio.
+
+        Returns:
+            True if a stop word is found, False otherwise.
+        """
        for stop_word in self.stop_words:
            stop_word = stop_word.lower().strip().split(" ")
            if bool(
@@ -75,17 +124,36 @@ class ReplyOnStopWords(ReplyOnPause):
    async def _send_stopword(
        self,
    ):
+        """Internal async method to send a 'stopword' message via the channel."""
        if self.channel:
            self.channel.send(create_message("stopword", ""))
            logger.debug("Sent stopword")

    def send_stopword(self):
+        """Sends a 'stopword' message asynchronously via the communication channel."""
        asyncio.run_coroutine_threadsafe(self._send_stopword(), self.loop)

    def determine_pause(  # type: ignore
        self, audio: np.ndarray, sampling_rate: int, state: ReplyOnStopWordsState
    ) -> bool:
-        """Take in the stream, determine if a pause happened"""
+        """
+        Analyzes an audio chunk to detect stop words and subsequent pauses.
+
+        Overrides the `ReplyOnPause.determine_pause` method.
+        First, it performs STT on the audio buffer to detect stop words.
+        Once a stop word is detected (`state.stop_word_detected` is True), it then
+        uses the VAD model (similar to `ReplyOnPause`) to detect a pause in the
+        audio *following* the stop word.
+
+        Args:
+            audio: The numpy array containing the audio chunk.
+            sampling_rate: The sample rate of the audio chunk.
+            state: The current application state (ReplyOnStopWordsState).
+
+        Returns:
+            True if a stop word has been detected and a subsequent pause
+            satisfying the configured thresholds is detected, False otherwise.
+        """
        import librosa

        duration = len(audio) / sampling_rate
@@ -142,12 +210,19 @@ class ReplyOnStopWords(ReplyOnPause):
        return False

    def reset(self):
+        """
+        Resets the handler state to its initial condition.
+
+        Clears accumulated audio, resets state flags (including stop word state),
+        closes any active generator, and clears the event flag.
+        """
        super().reset()
        self.generator = None
        self.event.clear()
        self.state = ReplyOnStopWordsState()

    def copy(self):
+        """Creates a new instance of ReplyOnStopWords with the same configuration."""
        return ReplyOnStopWords(
            self.fn,
            self.stop_words,