From 782e30d28f04422af2ef85cf02c0b3795aeee725 Mon Sep 17 00:00:00 2001 From: adamnsandle Date: Fri, 22 Nov 2024 08:17:25 +0000 Subject: [PATCH] fx https://github.com/snakers4/silero-vad/issues/576 --- src/silero_vad/utils_vad.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/silero_vad/utils_vad.py b/src/silero_vad/utils_vad.py index 9867c0b..75d725f 100644 --- a/src/silero_vad/utils_vad.py +++ b/src/silero_vad/utils_vad.py @@ -376,9 +376,10 @@ def get_speech_timestamps(audio: torch.Tensor, speech['end'] = int(min(audio_length_samples, speech['end'] + speech_pad_samples)) if return_seconds: + audio_length_seconds = audio_length_samples / sampling_rate for speech_dict in speeches: - speech_dict['start'] = round(speech_dict['start'] / sampling_rate, 1) - speech_dict['end'] = round(speech_dict['end'] / sampling_rate, 1) + speech_dict['start'] = max(round(speech_dict['start'] / sampling_rate, 1), 0) + speech_dict['end'] = min(round(speech_dict['end'] / sampling_rate, 1), audio_length_seconds) elif step > 1: for speech_dict in speeches: speech_dict['start'] *= step