From b78f45a947daeef250d2e71eb19c744b25dc98eb Mon Sep 17 00:00:00 2001 From: adamnsandle Date: Fri, 14 May 2021 10:38:06 +0000 Subject: [PATCH] fx issue https://github.com/snakers4/silero-vad/issues/67 --- utils_vad.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/utils_vad.py b/utils_vad.py index c4fd682..4c408f7 100644 --- a/utils_vad.py +++ b/utils_vad.py @@ -203,7 +203,7 @@ def get_speech_ts_adaptive(wav: torch.Tensor, speeches: list list containing ends and beginnings of speech chunks (in samples) """ - + num_samples = num_samples_per_window num_steps = int(num_samples / step) assert min_silence_samples >= step @@ -268,9 +268,18 @@ def get_speech_ts_adaptive(wav: torch.Tensor, if visualize_probs: pd.DataFrame({'probs': smoothed_probs}).plot(figsize=(16, 8)) - for ts in speeches: - ts['start'] = max(0, ts['start'] - speech_pad_samples) - ts['end'] += speech_pad_samples + for i, ts in enumerate(speeches): + if i == 0: + ts['start'] = max(0, ts['start'] - speech_pad_samples) + if i != len(speeches) - 1: + silence_duration = speeches[i+1]['start'] - ts['end'] + if silence_duration < 2 * speech_pad_samples: + ts['end'] += silence_duration // 2 + speeches[i+1]['start'] = max(0, speeches[i+1]['start'] - silence_duration // 2) + else: + ts['end'] += speech_pad_samples + else: + ts['end'] = min(len(wav), ts['end'] + speech_pad_samples) return speeches