Merge pull request #528 from snakers4/adamnsandle

add neg_threshold parameter explicitly
2026-02-05 18:09:22 +08:00 · 2024-08-22 16:39:33 +03:00
parent 36854a90db 4109b107c1
commit 199de226e5
1 changed files with 15 additions and 9 deletions
--- a/src/silero_vad/utils_vad.py
+++ b/src/silero_vad/utils_vad.py
@@ -195,6 +195,7 @@ def get_speech_timestamps(audio: torch.Tensor,
                          return_seconds: bool = False,
                          visualize_probs: bool = False,
                          progress_tracking_callback: Callable[[float], None] = None,
                          neg_threshold: float = None,
                          window_size_samples: int = 512,):
    """
@@ -237,6 +238,9 @@ def get_speech_timestamps(audio: torch.Tensor,
    progress_tracking_callback: Callable[[float], None] (default - None)
        callback function taking progress in percents as an argument
    neg_threshold: float (default = threshold - 0.15)
        Negative threshold (noise or exit threshold). If model's current state is SPEECH, values BELOW this value are considered as NON-SPEECH.
    window_size_samples: int (default - 512 samples)
        !!! DEPRECATED, DOES NOTHING !!!
@@ -298,6 +302,8 @@ def get_speech_timestamps(audio: torch.Tensor,
    triggered = False
    speeches = []
    current_speech = {}
    if neg_threshold is None:
        neg_threshold = threshold - 0.15
    temp_end = 0  # to save potential segment end (and tolerate some silence)
    prev_end = next_start = 0  # to save potential segment limits in case of maximum segment size reached
@@ -334,7 +340,7 @@ def get_speech_timestamps(audio: torch.Tensor,
        if (speech_prob < neg_threshold) and triggered:
            if not temp_end:
                temp_end = window_size_samples * i
-            if ((window_size_samples * i) - temp_end) > min_silence_samples_at_max_speech : # condition to avoid cutting in very short silence
+            if ((window_size_samples * i) - temp_end) > min_silence_samples_at_max_speech:  # condition to avoid cutting in very short silence
                prev_end = temp_end
            if (window_size_samples * i) - temp_end < min_silence_samples:
                continue