mirror of
https://github.com/HumanAIGC-Engineering/gradio-webrtc.git
synced 2026-02-04 09:29:23 +08:00
Clean up cancelled generators (#124)
* fix links * fix upload * add code * Add code --------- Co-authored-by: Freddy Boulton <freddyboulton@hf-freddy.local>
This commit is contained in:
@@ -116,7 +116,9 @@ class ReplyOnPause(StreamHandler):
|
||||
self.is_async = inspect.isasyncgenfunction(fn)
|
||||
self.event = Event()
|
||||
self.state = AppState()
|
||||
self.generator: Generator[EmitType, None, None] | None = None
|
||||
self.generator: (
|
||||
Generator[EmitType, None, None] | AsyncGenerator[EmitType, None] | None
|
||||
) = None
|
||||
self.model_options = model_options
|
||||
self.algo_options = algo_options or AlgoOptions()
|
||||
|
||||
@@ -184,8 +186,29 @@ class ReplyOnPause(StreamHandler):
|
||||
self.event.set()
|
||||
if self.can_interrupt:
|
||||
self.clear_queue()
|
||||
self._close_generator()
|
||||
self.generator = None
|
||||
|
||||
def _close_generator(self):
|
||||
"""Properly close the generator to ensure resources are released."""
|
||||
if self.generator is None:
|
||||
return
|
||||
|
||||
try:
|
||||
if self.is_async:
|
||||
# For async generators, we need to call aclose()
|
||||
if hasattr(self.generator, "aclose"):
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
cast(AsyncGenerator[EmitType, None], self.generator).aclose(),
|
||||
self.loop,
|
||||
).result(timeout=1.0) # Add timeout to prevent blocking
|
||||
else:
|
||||
# For sync generators, we can just exhaust it or close it
|
||||
if hasattr(self.generator, "close"):
|
||||
cast(Generator[EmitType, None, None], self.generator).close()
|
||||
except Exception as e:
|
||||
logger.debug(f"Error closing generator: {e}")
|
||||
|
||||
def reset(self):
|
||||
super().reset()
|
||||
if self.phone_mode:
|
||||
|
||||
@@ -49,14 +49,17 @@ def response(
|
||||
)
|
||||
|
||||
chatbot.append({"role": "assistant", "content": response_text})
|
||||
yield AdditionalOutputs(chatbot)
|
||||
|
||||
for chunk in tts_client.text_to_speech.convert_as_stream(
|
||||
text=response_text, # type: ignore
|
||||
voice_id="JBFqnCBsd6RMkjVDRZzb",
|
||||
model_id="eleven_multilingual_v2",
|
||||
output_format="pcm_24000",
|
||||
for i, chunk in enumerate(
|
||||
tts_client.text_to_speech.convert_as_stream(
|
||||
text=response_text, # type: ignore
|
||||
voice_id="JBFqnCBsd6RMkjVDRZzb",
|
||||
model_id="eleven_multilingual_v2",
|
||||
output_format="pcm_24000",
|
||||
)
|
||||
):
|
||||
if i == 0:
|
||||
yield AdditionalOutputs(chatbot)
|
||||
audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
|
||||
yield (24000, audio_array)
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ app_file: app.py
|
||||
pinned: false
|
||||
license: mit
|
||||
short_description: FastRTC Voice Agent with smolagents
|
||||
tags: [webrtc, websocket, gradio, secret|HF_TOKEN]
|
||||
tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN]
|
||||
---
|
||||
|
||||
# Voice LLM Agent with Image Generation
|
||||
|
||||
@@ -140,10 +140,8 @@ document.querySelectorAll('.tag-button').forEach(button => {
|
||||
|
||||
<video width=98% src="https://github.com/user-attachments/assets/ddf39ef7-fa7b-417e-8342-de3b9e311891" controls style="text-align: center"></video>
|
||||
|
||||
[:octicons-arrow-right-24: Demo](https://huggingface.co/spaces/fastrtc/talk-to-claude)
|
||||
[:octicons-arrow-right-24: Demo](https://huggingface.co/spaces/burtenshaw/coworking_agent/)
|
||||
|
||||
[:octicons-arrow-right-24: Gradio UI](https://huggingface.co/spaces/fastrtc/talk-to-claude-gradio)
|
||||
|
||||
[:octicons-code-16: Code](https://huggingface.co/spaces/burtenshaw/coworking_agent/blob/main/app.py)
|
||||
|
||||
- :speaking_head:{ .lg .middle } __Talk to Claude__
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
|
||||
Typically, you want to run a python function whenever a user has stopped speaking. This can be done by wrapping a python generator with the `ReplyOnPause` class and passing it to the `handler` argument of the `Stream` object. The `ReplyOnPause` class will handle the voice detection and turn taking logic automatically!
|
||||
|
||||
By default, the `ReplyOnPause` handler will allow you to interrupt the response at any time by speaking again. If you do not want to allow interruption, you can set the `can_interrupt` parameter to `False`.
|
||||
|
||||
=== "Code"
|
||||
```python
|
||||
@@ -35,14 +34,13 @@ By default, the `ReplyOnPause` handler will allow you to interrupt the response
|
||||
You can also use an async generator with `ReplyOnPause`.
|
||||
|
||||
!!! tip "Parameters"
|
||||
You can customize the voice detection parameters by passing in `algo_options` and `model_options` to the `ReplyOnPause` class. Also, you can set the `can_interrupt` parameter to `False` to prevent the user from interrupting the response. By default, `can_interrupt` is `True`.
|
||||
You can customize the voice detection parameters by passing in `algo_options` and `model_options` to the `ReplyOnPause` class.
|
||||
```python
|
||||
from fastrtc import AlgoOptions, SileroVadOptions
|
||||
|
||||
stream = Stream(
|
||||
handler=ReplyOnPause(
|
||||
response,
|
||||
can_interrupt=True,
|
||||
algo_options=AlgoOptions(
|
||||
audio_chunk_duration=0.6,
|
||||
started_talking_threshold=0.2,
|
||||
@@ -57,6 +55,27 @@ By default, the `ReplyOnPause` handler will allow you to interrupt the response
|
||||
)
|
||||
```
|
||||
|
||||
### Interruptions
|
||||
|
||||
By default, the `ReplyOnPause` handler will allow you to interrupt the response at any time by speaking again. If you do not want to allow interruption, you can set the `can_interrupt` parameter to `False`.
|
||||
|
||||
```python
|
||||
from fastrtc import Stream, ReplyOnPause
|
||||
|
||||
stream = Stream(
|
||||
handler=ReplyOnPause(
|
||||
response,
|
||||
can_interrupt=True,
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
<video width=98% src="https://github.com/user-attachments/assets/dba68dd7-7444-439b-b948-59171067e850" controls style="text-align: center"></video>
|
||||
|
||||
|
||||
!!! tip "Muting Response Audio"
|
||||
You can directly talk over the output audio and the interruption will still work. However, in these cases, the audio transcription may be incorrect. To prevent this, it's best practice to mute the output audio before talking over it.
|
||||
|
||||
## Reply On Stopwords
|
||||
|
||||
You can configure your AI model to run whenever a set of "stop words" are detected, like "Hey Siri" or "computer", with the `ReplyOnStopWords` class.
|
||||
|
||||
@@ -8,7 +8,7 @@ build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
name = "fastrtc"
|
||||
version = "0.0.11"
|
||||
version = "0.0.12"
|
||||
description = "The realtime communication library for Python"
|
||||
readme = "README.md"
|
||||
license = "apache-2.0"
|
||||
|
||||
@@ -95,7 +95,7 @@ def upload_space(dir_path: str):
|
||||
|
||||
readme_path = path / "README.md"
|
||||
|
||||
if path.name not in NO_GRADIO_SPACE:
|
||||
if path.name not in NO_GRADIO_SPACE and (path / "README_gradio.md").exists():
|
||||
try:
|
||||
# Upload Gradio version with modified README
|
||||
api.upload_folder(
|
||||
|
||||
Reference in New Issue
Block a user