From 6f02a2f2a90a3f884a44735a6b58aa75a4fea4c2 Mon Sep 17 00:00:00 2001 From: Sofia Casadei <69089170+sofi444@users.noreply.github.com> Date: Tue, 27 May 2025 20:54:33 +0200 Subject: [PATCH] chunk speech after s if no pause detected by VAD (#328) * chunk speech after s if no pause detected by VAD * add attr descriptions in AlgoOptions * Fix --------- Co-authored-by: Freddy Boulton <41651716+freddyaboulton@users.noreply.github.com> --- backend/fastrtc/reply_on_pause.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/backend/fastrtc/reply_on_pause.py b/backend/fastrtc/reply_on_pause.py index c4c6ba7..cd47a98 100644 --- a/backend/fastrtc/reply_on_pause.py +++ b/backend/fastrtc/reply_on_pause.py @@ -18,11 +18,20 @@ logger = getLogger(__name__) @dataclass class AlgoOptions: - """Algorithm options.""" + """ + Algorithm options. + + Attributes: + - audio_chunk_duration: Duration in seconds of audio chunks passed to the VAD model. + - started_talking_threshold: If the chunk has more than started_talking_threshold seconds of speech, the user started talking. + - speech_threshold: If, after the user started speaking, there is a chunk with less than speech_threshold seconds of speech, the user stopped speaking. + - max_continuous_speech_s: Max duration of speech chunks before the handler is triggered, even if a pause is not detected by the VAD model. + """ audio_chunk_duration: float = 0.6 started_talking_threshold: float = 0.2 speech_threshold: float = 0.1 + max_continuous_speech_s: float = float("inf") @dataclass @@ -216,7 +225,14 @@ class ReplyOnPause(StreamHandler): state.stream = audio else: state.stream = np.concatenate((state.stream, audio)) + + # Check if continuous speech limit has been reached + current_duration = len(state.stream) / sampling_rate + if current_duration >= self.algo_options.max_continuous_speech_s: + return True state.buffer = None + + # Check if a pause has been detected by the VAD model if dur_vad < self.algo_options.speech_threshold and state.started_talking: return True return False