Rebrand to FastRTC (#60)

* Add code * add code * add code * Rename messages * rename * add code * Add demo * docs + demos + bug fixes * add code * styles * user guide * Styles * Add code * misc docs updates * print nit * whisper + pr * url for images * whsiper update * Fix bugs * remove demo files * version number * Fix pypi readme * Fix * demos * Add llama code editor * Update llama code editor and object detection cookbook * Add more cookbook demos * add code * Fix links for PR deploys * add code * Fix the install * add tts * TTS docs * Typo * Pending bubbles for reply on pause * Stream redesign (#63) * better error handling * Websocket error handling * add code --------- Co-authored-by: Freddy Boulton <freddyboulton@hf-freddy.local> * remove docs from dist * Some docs typos * more typos * upload changes + docs * docs * better phone * update docs * add code * Make demos better * fix docs + websocket start_up * remove mention of FastAPI app * fastphone tweaks * add code * ReplyOnStopWord fixes * Fix cookbook * Fix pypi readme * add code * bump versions * sambanova cookbook * Fix tags * Llm voice chat * kyutai tag * Add error message to all index.html * STT module uses Moonshine * Not required from typing extensions * fix llm voice chat * Add vpn warning * demo fixes * demos * Add more ui args and gemini audio-video * update cookbook * version 9 --------- Co-authored-by: Freddy Boulton <freddyboulton@hf-freddy.local>
2026-02-05 18:09:23 +08:00 · 2025-02-24 01:13:42 -05:00
parent 36190066ec
commit 853d6a06b5
131 changed files with 12349 additions and 4741 deletions
--- a/backend/fastrtc/text_to_speech/init.py
+++ b/backend/fastrtc/text_to_speech/init.py
@@ -0,0 +1,3 @@
+from .tts import KokoroTTSOptions, get_tts_model
+
+__all__ = ["get_tts_model", "KokoroTTSOptions"]
--- a/backend/fastrtc/text_to_speech/tts.py
+++ b/backend/fastrtc/text_to_speech/tts.py
@@ -0,0 +1,90 @@
+import asyncio
+import re
+from dataclasses import dataclass
+from functools import lru_cache
+from typing import AsyncGenerator, Generator, Literal, Protocol
+
+import numpy as np
+from huggingface_hub import hf_hub_download
+from numpy.typing import NDArray
+
+
+class TTSOptions:
+    pass
+
+
+class TTSModel(Protocol):
+    def tts(self, text: str) -> tuple[int, NDArray[np.float32]]: ...
+
+    async def stream_tts(
+        self, text: str, options: TTSOptions | None = None
+    ) -> AsyncGenerator[tuple[int, NDArray[np.float32]], None]: ...
+
+    def stream_tts_sync(
+        self, text: str, options: TTSOptions | None = None
+    ) -> Generator[tuple[int, NDArray[np.float32]], None, None]: ...
+
+
+@dataclass
+class KokoroTTSOptions(TTSOptions):
+    voice: str = "af_heart"
+    speed: float = 1.0
+    lang: str = "en-us"
+
+
+@lru_cache
+def get_tts_model(model: Literal["kokoro"] = "kokoro") -> TTSModel:
+    m = KokoroTTSModel()
+    m.tts("Hello, world!")
+    return m
+
+
+class KokoroTTSModel(TTSModel):
+    def __init__(self):
+        from kokoro_onnx import Kokoro
+
+        self.model = Kokoro(
+            model_path=hf_hub_download("fastrtc/kokoro-onnx", "kokoro-v1.0.onnx"),
+            voices_path=hf_hub_download("fastrtc/kokoro-onnx", "voices-v1.0.bin"),
+        )
+
+    def tts(
+        self, text: str, options: KokoroTTSOptions | None = None
+    ) -> tuple[int, NDArray[np.float32]]:
+        options = options or KokoroTTSOptions()
+        a, b = self.model.create(
+            text, voice=options.voice, speed=options.speed, lang=options.lang
+        )
+        return b, a
+
+    async def stream_tts(
+        self, text: str, options: KokoroTTSOptions | None = None
+    ) -> AsyncGenerator[tuple[int, NDArray[np.float32]], None]:
+        options = options or KokoroTTSOptions()
+
+        sentences = re.split(r"(?<=[.!?])\s+", text.strip())
+
+        for s_idx, sentence in enumerate(sentences):
+            if not sentence.strip():
+                continue
+
+            chunk_idx = 0
+            async for chunk in self.model.create_stream(
+                sentence, voice=options.voice, speed=options.speed, lang=options.lang
+            ):
+                if s_idx != 0 and chunk_idx == 0:
+                    yield chunk[1], np.zeros(chunk[1] // 7, dtype=np.float32)
+                yield chunk[1], chunk[0]
+
+    def stream_tts_sync(
+        self, text: str, options: KokoroTTSOptions | None = None
+    ) -> Generator[tuple[int, NDArray[np.float32]], None, None]:
+        loop = asyncio.new_event_loop()
+
+        # Use the new loop to run the async generator
+        iterator = self.stream_tts(text, options).__aiter__()
+        while True:
+            try:
+                yield loop.run_until_complete(iterator.__anext__())
+            except StopAsyncIteration:
+                break