Files
gradio-webrtc/backend/fastrtc/text_to_speech/tts.py
Freddy Boulton 853d6a06b5 Rebrand to FastRTC (#60)
* Add code

* add code

* add code

* Rename messages

* rename

* add code

* Add demo

* docs + demos + bug fixes

* add code

* styles

* user guide

* Styles

* Add code

* misc docs updates

* print nit

* whisper + pr

* url for images

* whsiper update

* Fix bugs

* remove demo files

* version number

* Fix pypi readme

* Fix

* demos

* Add llama code editor

* Update llama code editor and object detection cookbook

* Add more cookbook demos

* add code

* Fix links for PR deploys

* add code

* Fix the install

* add tts

* TTS docs

* Typo

* Pending bubbles for reply on pause

* Stream redesign (#63)

* better error handling

* Websocket error handling

* add code

---------

Co-authored-by: Freddy Boulton <freddyboulton@hf-freddy.local>

* remove docs from dist

* Some docs typos

* more typos

* upload changes + docs

* docs

* better phone

* update docs

* add code

* Make demos better

* fix docs + websocket start_up

* remove mention of FastAPI app

* fastphone tweaks

* add code

* ReplyOnStopWord fixes

* Fix cookbook

* Fix pypi readme

* add code

* bump versions

* sambanova cookbook

* Fix tags

* Llm voice chat

* kyutai tag

* Add error message to all index.html

* STT module uses Moonshine

* Not required from typing extensions

* fix llm voice chat

* Add vpn warning

* demo fixes

* demos

* Add more ui args and gemini audio-video

* update cookbook

* version 9

---------

Co-authored-by: Freddy Boulton <freddyboulton@hf-freddy.local>
2025-02-24 01:13:42 -05:00

91 lines
2.7 KiB
Python

import asyncio
import re
from dataclasses import dataclass
from functools import lru_cache
from typing import AsyncGenerator, Generator, Literal, Protocol
import numpy as np
from huggingface_hub import hf_hub_download
from numpy.typing import NDArray
class TTSOptions:
pass
class TTSModel(Protocol):
def tts(self, text: str) -> tuple[int, NDArray[np.float32]]: ...
async def stream_tts(
self, text: str, options: TTSOptions | None = None
) -> AsyncGenerator[tuple[int, NDArray[np.float32]], None]: ...
def stream_tts_sync(
self, text: str, options: TTSOptions | None = None
) -> Generator[tuple[int, NDArray[np.float32]], None, None]: ...
@dataclass
class KokoroTTSOptions(TTSOptions):
voice: str = "af_heart"
speed: float = 1.0
lang: str = "en-us"
@lru_cache
def get_tts_model(model: Literal["kokoro"] = "kokoro") -> TTSModel:
m = KokoroTTSModel()
m.tts("Hello, world!")
return m
class KokoroTTSModel(TTSModel):
def __init__(self):
from kokoro_onnx import Kokoro
self.model = Kokoro(
model_path=hf_hub_download("fastrtc/kokoro-onnx", "kokoro-v1.0.onnx"),
voices_path=hf_hub_download("fastrtc/kokoro-onnx", "voices-v1.0.bin"),
)
def tts(
self, text: str, options: KokoroTTSOptions | None = None
) -> tuple[int, NDArray[np.float32]]:
options = options or KokoroTTSOptions()
a, b = self.model.create(
text, voice=options.voice, speed=options.speed, lang=options.lang
)
return b, a
async def stream_tts(
self, text: str, options: KokoroTTSOptions | None = None
) -> AsyncGenerator[tuple[int, NDArray[np.float32]], None]:
options = options or KokoroTTSOptions()
sentences = re.split(r"(?<=[.!?])\s+", text.strip())
for s_idx, sentence in enumerate(sentences):
if not sentence.strip():
continue
chunk_idx = 0
async for chunk in self.model.create_stream(
sentence, voice=options.voice, speed=options.speed, lang=options.lang
):
if s_idx != 0 and chunk_idx == 0:
yield chunk[1], np.zeros(chunk[1] // 7, dtype=np.float32)
yield chunk[1], chunk[0]
def stream_tts_sync(
self, text: str, options: KokoroTTSOptions | None = None
) -> Generator[tuple[int, NDArray[np.float32]], None, None]:
loop = asyncio.new_event_loop()
# Use the new loop to run the async generator
iterator = self.stream_tts(text, options).__aiter__()
while True:
try:
yield loop.run_until_complete(iterator.__anext__())
except StopAsyncIteration:
break