Raise errors automatically (#69)

* Add auto errors * change code --------- Co-authored-by: Freddy Boulton <freddyboulton@hf-freddy.local>
2026-02-05 18:09:23 +08:00 · 2025-02-24 20:21:25 -05:00
parent c36fb8da50
commit 5a4693ee0b
20 changed files with 297 additions and 344 deletions
--- a/demo/gemini_audio_video/app.py
+++ b/demo/gemini_audio_video/app.py
@@ -62,44 +62,28 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
            api_key=os.getenv("GEMINI_API_KEY"), http_options={"api_version": "v1alpha"}
        )
        config = {"response_modalities": ["AUDIO"]}
-        try:
-            async with client.aio.live.connect(
-                model="gemini-2.0-flash-exp", config=config
-            ) as session:
-                self.session = session
-                print("set session")
-                while not self.quit.is_set():
-                    turn = self.session.receive()
-                    async for response in turn:
-                        if data := response.data:
-                            audio = np.frombuffer(data, dtype=np.int16).reshape(1, -1)
-                            self.audio_queue.put_nowait(audio)
-        except Exception as e:
-            import traceback
-
-            traceback.print_exc()
+        async with client.aio.live.connect(
+            model="gemini-2.0-flash-exp", config=config
+        ) as session:
+            self.session = session
+            print("set session")
+            while not self.quit.is_set():
+                turn = self.session.receive()
+                async for response in turn:
+                    if data := response.data:
+                        audio = np.frombuffer(data, dtype=np.int16).reshape(1, -1)
+                        self.audio_queue.put_nowait(audio)

    async def video_receive(self, frame: np.ndarray):
-        try:
-            print("out")
-            if self.session:
-                print("here")
-                # send image every 1 second
-                print(time.time() - self.last_frame_time)
-                if time.time() - self.last_frame_time > 1:
-                    self.last_frame_time = time.time()
-                    print("sending image")
-                    await self.session.send(input=encode_image(frame))
-                    print("sent image")
-                    if self.latest_args[1] is not None:
-                        print("sending image2")
-                        await self.session.send(input=encode_image(self.latest_args[1]))
-                        print("sent image2")
-        except Exception as e:
-            print(e)
-            import traceback
+        if self.session:
+            # send image every 1 second
+            print(time.time() - self.last_frame_time)
+            if time.time() - self.last_frame_time > 1:
+                self.last_frame_time = time.time()
+                await self.session.send(input=encode_image(frame))
+                if self.latest_args[1] is not None:
+                    await self.session.send(input=encode_image(self.latest_args[1]))

-            traceback.print_exc()
        self.video_queue.put_nowait(frame)

    async def video_emit(self):
@@ -110,13 +94,7 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
        array = array.squeeze()
        audio_message = encode_audio(array)
        if self.session:
-            try:
-                await self.session.send(input=audio_message)
-            except Exception as e:
-                print(e)
-                import traceback
-
-                traceback.print_exc()
+            await self.session.send(input=audio_message)

    async def emit(self):
        array = await self.audio_queue.get()
--- a/demo/hello_computer/app.py
+++ b/demo/hello_computer/app.py
@@ -13,7 +13,6 @@ from fastrtc import (
    AdditionalOutputs,
    ReplyOnStopWords,
    Stream,
-    WebRTCError,
    get_stt_model,
    get_twilio_turn_credentials,
 )
@@ -39,30 +38,23 @@ def response(
 ):
    gradio_chatbot = gradio_chatbot or []
    conversation_state = conversation_state or []
-    try:
-        text = model.stt(audio)
-        print("STT in handler", text)
-        sample_rate, array = audio
-        gradio_chatbot.append(
-            {"role": "user", "content": gr.Audio((sample_rate, array.squeeze()))}
-        )
-        yield AdditionalOutputs(gradio_chatbot, conversation_state)
+    text = model.stt(audio)
+    print("STT in handler", text)
+    sample_rate, array = audio
+    gradio_chatbot.append(
+        {"role": "user", "content": gr.Audio((sample_rate, array.squeeze()))}
+    )
+    yield AdditionalOutputs(gradio_chatbot, conversation_state)

-        conversation_state.append({"role": "user", "content": text})
+    conversation_state.append({"role": "user", "content": text})

-        request = client.chat.completions.create(
-            model="Meta-Llama-3.2-3B-Instruct",
-            messages=conversation_state,  # type: ignore
-            temperature=0.1,
-            top_p=0.1,
-        )
-        response = {"role": "assistant", "content": request.choices[0].message.content}
-
-    except Exception as e:
-        import traceback
-
-        traceback.print_exc()
-        raise WebRTCError(str(e) + "\n" + traceback.format_exc())
+    request = client.chat.completions.create(
+        model="Meta-Llama-3.2-3B-Instruct",
+        messages=conversation_state,  # type: ignore
+        temperature=0.1,
+        top_p=0.1,
+    )
+    response = {"role": "assistant", "content": request.choices[0].message.content}

    conversation_state.append(response)
    gradio_chatbot.append(response)
--- a/demo/llm_voice_chat/app.py
+++ b/demo/llm_voice_chat/app.py
@@ -10,7 +10,6 @@ from fastrtc import (
    AdditionalOutputs,
    ReplyOnPause,
    Stream,
-    WebRTCError,
    get_stt_model,
    get_twilio_turn_credentials,
 )
@@ -30,42 +29,36 @@ def response(
    audio: tuple[int, NDArray[np.int16 | np.float32]],
    chatbot: list[dict] | None = None,
 ):
-    try:
-        chatbot = chatbot or []
-        messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
-        start = time.time()
-        text = stt_model.stt(audio)
-        print("transcription", time.time() - start)
-        print("prompt", text)
-        chatbot.append({"role": "user", "content": text})
-        yield AdditionalOutputs(chatbot)
-        messages.append({"role": "user", "content": text})
-        response_text = (
-            groq_client.chat.completions.create(
-                model="llama-3.1-8b-instant",
-                max_tokens=512,
-                messages=messages,  # type: ignore
-            )
-            .choices[0]
-            .message.content
+    chatbot = chatbot or []
+    messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
+    start = time.time()
+    text = stt_model.stt(audio)
+    print("transcription", time.time() - start)
+    print("prompt", text)
+    chatbot.append({"role": "user", "content": text})
+    yield AdditionalOutputs(chatbot)
+    messages.append({"role": "user", "content": text})
+    response_text = (
+        groq_client.chat.completions.create(
+            model="llama-3.1-8b-instant",
+            max_tokens=512,
+            messages=messages,  # type: ignore
        )
+        .choices[0]
+        .message.content
+    )

-        chatbot.append({"role": "assistant", "content": response_text})
+    chatbot.append({"role": "assistant", "content": response_text})

-        for chunk in tts_client.text_to_speech.convert_as_stream(
-            text=response_text,  # type: ignore
-            voice_id="JBFqnCBsd6RMkjVDRZzb",
-            model_id="eleven_multilingual_v2",
-            output_format="pcm_24000",
-        ):
-            audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
-            yield (24000, audio_array)
-        yield AdditionalOutputs(chatbot)
-    except Exception:
-        import traceback
-
-        traceback.print_exc()
-        raise WebRTCError(traceback.format_exc())
+    for chunk in tts_client.text_to_speech.convert_as_stream(
+        text=response_text,  # type: ignore
+        voice_id="JBFqnCBsd6RMkjVDRZzb",
+        model_id="eleven_multilingual_v2",
+        output_format="pcm_24000",
+    ):
+        audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
+        yield (24000, audio_array)
+    yield AdditionalOutputs(chatbot)


 chatbot = gr.Chatbot(type="messages")
--- a/demo/object_detection/app.py
+++ b/demo/object_detection/app.py
@@ -5,7 +5,7 @@ import cv2
 import gradio as gr
 from fastapi import FastAPI
 from fastapi.responses import HTMLResponse
-from fastrtc import Stream, WebRTCError, get_twilio_turn_credentials
+from fastrtc import Stream, get_twilio_turn_credentials
 from gradio.utils import get_space
 from huggingface_hub import hf_hub_download
 from pydantic import BaseModel, Field
@@ -26,16 +26,10 @@ model = YOLOv10(model_file)


 def detection(image, conf_threshold=0.3):
-    try:
-        image = cv2.resize(image, (model.input_width, model.input_height))
-        print("conf_threshold", conf_threshold)
-        new_image = model.detect_objects(image, conf_threshold)
-        return cv2.resize(new_image, (500, 500))
-    except Exception as e:
-        import traceback
-
-        traceback.print_exc()
-        raise WebRTCError(str(e))
+    image = cv2.resize(image, (model.input_width, model.input_height))
+    print("conf_threshold", conf_threshold)
+    new_image = model.detect_objects(image, conf_threshold)
+    return cv2.resize(new_image, (500, 500))


 stream = Stream(
--- a/demo/phonic_chat/app.py
+++ b/demo/phonic_chat/app.py
@@ -1,6 +1,6 @@
 import subprocess

-subprocess.run(["pip", "install", "fastrtc==0.0.3.post7"])
+subprocess.run(["pip", "install", "fastrtc==0.0.4.post1"])

 import asyncio
 import base64
@@ -15,10 +15,9 @@ from fastrtc import (
    AsyncStreamHandler,
    Stream,
    get_twilio_turn_credentials,
-    WebRTCError,
    audio_to_float32,
+    wait_for_item,
 )
-from fastapi import FastAPI
 from phonic.client import PhonicSTSClient, get_voices

 load_dotenv()
@@ -42,47 +41,38 @@ class PhonicHandler(AsyncStreamHandler):
    async def start_up(self):
        await self.wait_for_args()
        voice_id = self.latest_args[1]
-        try:
-            async with PhonicSTSClient(STS_URI, API_KEY) as client:
-                self.client = client
-                sts_stream = client.sts(  # type: ignore
-                    input_format="pcm_44100",
-                    output_format="pcm_44100",
-                    system_prompt="You are a helpful voice assistant. Respond conversationally.",
-                    # welcome_message="Hello! I'm your voice assistant. How can I help you today?",
-                    voice_id=voice_id,
-                )
-                async for message in sts_stream:
-                    message_type = message.get("type")
-                    if message_type == "audio_chunk":
-                        audio_b64 = message["audio"]
-                        audio_bytes = base64.b64decode(audio_b64)
-                        await self.output_queue.put(
-                            (SAMPLE_RATE, np.frombuffer(audio_bytes, dtype=np.int16))
-                        )
-                        if text := message.get("text"):
-                            msg = {"role": "assistant", "content": text}
-                            await self.output_queue.put(AdditionalOutputs(msg))
-                    elif message_type == "input_text":
-                        msg = {"role": "user", "content": message["text"]}
+        async with PhonicSTSClient(STS_URI, API_KEY) as client:
+            self.client = client
+            sts_stream = client.sts(  # type: ignore
+                input_format="pcm_44100",
+                output_format="pcm_44100",
+                system_prompt="You are a helpful voice assistant. Respond conversationally.",
+                # welcome_message="Hello! I'm your voice assistant. How can I help you today?",
+                voice_id=voice_id,
+            )
+            async for message in sts_stream:
+                message_type = message.get("type")
+                if message_type == "audio_chunk":
+                    audio_b64 = message["audio"]
+                    audio_bytes = base64.b64decode(audio_b64)
+                    await self.output_queue.put(
+                        (SAMPLE_RATE, np.frombuffer(audio_bytes, dtype=np.int16))
+                    )
+                    if text := message.get("text"):
+                        msg = {"role": "assistant", "content": text}
                        await self.output_queue.put(AdditionalOutputs(msg))
-        except Exception as e:
-            raise WebRTCError(f"Error starting up: {e}")
+                elif message_type == "input_text":
+                    msg = {"role": "user", "content": message["text"]}
+                    await self.output_queue.put(AdditionalOutputs(msg))

    async def emit(self):
-        try:
-            return await self.output_queue.get()
-        except Exception as e:
-            raise WebRTCError(f"Error emitting: {e}")
+        return await wait_for_item(self.output_queue)

    async def receive(self, frame: tuple[int, np.ndarray]) -> None:
-        try:
-            if not self.client:
-                return
-            audio_float32 = audio_to_float32(frame)
-            await self.client.send_audio(audio_float32)  # type: ignore
-        except Exception as e:
-            raise WebRTCError(f"Error sending audio: {e}")
+        if not self.client:
+            return
+        audio_float32 = audio_to_float32(frame)
+        await self.client.send_audio(audio_float32)  # type: ignore

    async def shutdown(self):
        if self.client:
@@ -122,9 +112,6 @@ stream = Stream(
 with stream.ui:
    state.change(lambda s: s, inputs=state, outputs=chatbot)

-app = FastAPI()
-stream.mount(app)
-
 if __name__ == "__main__":
    if (mode := os.getenv("MODE")) == "UI":
        stream.ui.launch(server_port=7860)
--- a/demo/talk_to_claude/app.py
+++ b/demo/talk_to_claude/app.py
@@ -14,7 +14,6 @@ from fastrtc import (
    AdditionalOutputs,
    ReplyOnPause,
    Stream,
-    WebRTCError,
    get_tts_model,
    get_twilio_turn_credentials,
 )
@@ -38,41 +37,36 @@ def response(
    audio: tuple[int, np.ndarray],
    chatbot: list[dict] | None = None,
 ):
-    try:
-        chatbot = chatbot or []
-        messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
-        prompt = groq_client.audio.transcriptions.create(
-            file=("audio-file.mp3", audio_to_bytes(audio)),
-            model="whisper-large-v3-turbo",
-            response_format="verbose_json",
-        ).text
+    chatbot = chatbot or []
+    messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
+    prompt = groq_client.audio.transcriptions.create(
+        file=("audio-file.mp3", audio_to_bytes(audio)),
+        model="whisper-large-v3-turbo",
+        response_format="verbose_json",
+    ).text
+    chatbot.append({"role": "user", "content": prompt})
+    yield AdditionalOutputs(chatbot)
+    messages.append({"role": "user", "content": prompt})
+    response = claude_client.messages.create(
+        model="claude-3-5-haiku-20241022",
+        max_tokens=512,
+        messages=messages,  # type: ignore
+    )
+    response_text = " ".join(
+        block.text  # type: ignore
+        for block in response.content
+        if getattr(block, "type", None) == "text"
+    )
+    chatbot.append({"role": "assistant", "content": response_text})

-        print("prompt", prompt)
-        chatbot.append({"role": "user", "content": prompt})
+    start = time.time()
+
+    print("starting tts", start)
+    for i, chunk in enumerate(tts_model.stream_tts_sync(response_text)):
+        print("chunk", i, time.time() - start)
+        yield chunk
+        print("finished tts", time.time() - start)
        yield AdditionalOutputs(chatbot)
-        messages.append({"role": "user", "content": prompt})
-        response = claude_client.messages.create(
-            model="claude-3-5-haiku-20241022",
-            max_tokens=512,
-            messages=messages,  # type: ignore
-        )
-        response_text = " ".join(
-            block.text  # type: ignore
-            for block in response.content
-            if getattr(block, "type", None) == "text"
-        )
-        chatbot.append({"role": "assistant", "content": response_text})
-
-        start = time.time()
-
-        print("starting tts", start)
-        for i, chunk in enumerate(tts_model.stream_tts_sync(response_text)):
-            print("chunk", i, time.time() - start)
-            yield chunk
-            print("finished tts", time.time() - start)
-            yield AdditionalOutputs(chatbot)
-    except Exception as e:
-        raise WebRTCError(str(e))


 chatbot = gr.Chatbot(type="messages")
--- a/demo/talk_to_gemini/app.py
+++ b/demo/talk_to_gemini/app.py
@@ -13,8 +13,8 @@ from fastapi.responses import HTMLResponse
 from fastrtc import (
    AsyncStreamHandler,
    Stream,
-    WebRTCError,
    get_twilio_turn_credentials,
+    wait_for_item,
 )
 from google import genai
 from google.genai.types import (
@@ -68,13 +68,12 @@ class GeminiHandler(AsyncStreamHandler):
            api_key, voice_name = self.latest_args[1:]
        else:
            api_key, voice_name = None, "Puck"
-        try:
-            client = genai.Client(
-                api_key=api_key or os.getenv("GEMINI_API_KEY"),
-                http_options={"api_version": "v1alpha"},
-            )
-        except Exception as e:
-            raise WebRTCError(str(e))
+
+        client = genai.Client(
+            api_key=api_key or os.getenv("GEMINI_API_KEY"),
+            http_options={"api_version": "v1alpha"},
+        )
+
        config = LiveConnectConfig(
            response_modalities=["AUDIO"],  # type: ignore
            speech_config=SpeechConfig(
@@ -85,18 +84,15 @@ class GeminiHandler(AsyncStreamHandler):
                )
            ),
        )
-        try:
-            async with client.aio.live.connect(
-                model="gemini-2.0-flash-exp", config=config
-            ) as session:
-                async for audio in session.start_stream(
-                    stream=self.stream(), mime_type="audio/pcm"
-                ):
-                    if audio.data:
-                        array = np.frombuffer(audio.data, dtype=np.int16)
-                        self.output_queue.put_nowait(array)
-        except Exception as e:
-            raise WebRTCError(str(e))
+        async with client.aio.live.connect(
+            model="gemini-2.0-flash-exp", config=config
+        ) as session:
+            async for audio in session.start_stream(
+                stream=self.stream(), mime_type="audio/pcm"
+            ):
+                if audio.data:
+                    array = np.frombuffer(audio.data, dtype=np.int16)
+                    self.output_queue.put_nowait((self.output_sample_rate, array))

    async def stream(self) -> AsyncGenerator[bytes, None]:
        while not self.quit.is_set():
@@ -112,13 +108,11 @@ class GeminiHandler(AsyncStreamHandler):
        audio_message = encode_audio(array)
        self.input_queue.put_nowait(audio_message)

-    async def emit(self) -> tuple[int, np.ndarray]:
-        array = await self.output_queue.get()
-        return (self.output_sample_rate, array)
+    async def emit(self) -> tuple[int, np.ndarray] | None:
+        return await wait_for_item(self.output_queue)

    def shutdown(self) -> None:
        self.quit.set()
-        self.args_set.clear()


 stream = Stream(
--- a/demo/talk_to_openai/app.py
+++ b/demo/talk_to_openai/app.py
@@ -13,8 +13,8 @@ from fastrtc import (
    AdditionalOutputs,
    AsyncStreamHandler,
    Stream,
-    WebRTCError,
    get_twilio_turn_credentials,
+    wait_for_item,
 )
 from gradio.utils import get_space
 from openai.types.beta.realtime import ResponseAudioTranscriptDoneEvent
@@ -47,60 +47,41 @@ class OpenAIHandler(AsyncStreamHandler):
    ):
        """Connect to realtime API. Run forever in separate thread to keep connection open."""
        self.client = openai.AsyncOpenAI()
-        try:
-            async with self.client.beta.realtime.connect(
-                model="gpt-4o-mini-realtime-preview-2024-12-17"
-            ) as conn:
-                await conn.session.update(
-                    session={"turn_detection": {"type": "server_vad"}}
-                )
-                self.connection = conn
-                async for event in self.connection:
-                    if event.type == "response.audio_transcript.done":
-                        await self.output_queue.put(AdditionalOutputs(event))
-                    if event.type == "response.audio.delta":
-                        await self.output_queue.put(
-                            (
-                                self.output_sample_rate,
-                                np.frombuffer(
-                                    base64.b64decode(event.delta), dtype=np.int16
-                                ).reshape(1, -1),
-                            ),
-                        )
-        except Exception:
-            import traceback
-
-            traceback.print_exc()
-            raise WebRTCError(str(traceback.format_exc()))
+        async with self.client.beta.realtime.connect(
+            model="gpt-4o-mini-realtime-preview-2024-12-17"
+        ) as conn:
+            await conn.session.update(
+                session={"turn_detection": {"type": "server_vad"}}
+            )
+            self.connection = conn
+            async for event in self.connection:
+                if event.type == "response.audio_transcript.done":
+                    await self.output_queue.put(AdditionalOutputs(event))
+                if event.type == "response.audio.delta":
+                    await self.output_queue.put(
+                        (
+                            self.output_sample_rate,
+                            np.frombuffer(
+                                base64.b64decode(event.delta), dtype=np.int16
+                            ).reshape(1, -1),
+                        ),
+                    )

    async def receive(self, frame: tuple[int, np.ndarray]) -> None:
        if not self.connection:
            return
-        try:
-            _, array = frame
-            array = array.squeeze()
-            audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
-            await self.connection.input_audio_buffer.append(audio=audio_message)  # type: ignore
-        except Exception as e:
-            # print traceback
-            print(f"Error in receive: {str(e)}")
-            import traceback
-
-            traceback.print_exc()
-            raise WebRTCError(str(traceback.format_exc()))
+        _, array = frame
+        array = array.squeeze()
+        audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
+        await self.connection.input_audio_buffer.append(audio=audio_message)  # type: ignore

    async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
-        return await self.output_queue.get()
-
-    def reset_state(self):
-        """Reset connection state for new recording session"""
-        self.connection = None
-        self.args_set.clear()
+        return await wait_for_item(self.output_queue)

    async def shutdown(self) -> None:
        if self.connection:
            await self.connection.close()
-            self.reset_state()
+            self.connection = None


 def update_chatbot(chatbot: list[dict], response: ResponseAudioTranscriptDoneEvent):
--- a/demo/talk_to_sambanova/app.py
+++ b/demo/talk_to_sambanova/app.py
@@ -49,20 +49,15 @@ def response(

    conversation_state.append({"role": "user", "content": text})

-    try:
-        request = client.chat.completions.create(
-            model="Meta-Llama-3.2-3B-Instruct",
-            messages=conversation_state,  # type: ignore
-            temperature=0.1,
-            top_p=0.1,
-        )
-        response = {"role": "assistant", "content": request.choices[0].message.content}
+    raise WebRTCError("test")

-    except Exception:
-        import traceback
-
-        traceback.print_exc()
-        raise WebRTCError(traceback.format_exc())
+    request = client.chat.completions.create(
+        model="Meta-Llama-3.2-3B-Instruct",
+        messages=conversation_state,  # type: ignore
+        temperature=0.1,
+        top_p=0.1,
+    )
+    response = {"role": "assistant", "content": request.choices[0].message.content}

    conversation_state.append(response)
    gradio_chatbot.append(response)
--- a/demo/whisper_realtime/app.py
+++ b/demo/whisper_realtime/app.py
@@ -10,7 +10,6 @@ from fastrtc import (
    AdditionalOutputs,
    ReplyOnPause,
    Stream,
-    WebRTCError,
    audio_to_bytes,
    get_twilio_turn_credentials,
 )
@@ -26,15 +25,12 @@ groq_client = AsyncClient()


 async def transcribe(audio: tuple[int, np.ndarray]):
-    try:
-        transcript = await groq_client.audio.transcriptions.create(
-            file=("audio-file.mp3", audio_to_bytes(audio)),
-            model="whisper-large-v3-turbo",
-            response_format="verbose_json",
-        )
-        yield AdditionalOutputs(transcript.text)
-    except Exception as e:
-        raise WebRTCError(str(e))
+    transcript = await groq_client.audio.transcriptions.create(
+        file=("audio-file.mp3", audio_to_bytes(audio)),
+        model="whisper-large-v3-turbo",
+        response_format="verbose_json",
+    )
+    yield AdditionalOutputs(transcript.text)


 stream = Stream(