add neg_threshold parameter explicitly

This commit is contained in:
adamnsandle
2024-08-20 08:53:15 +00:00
parent 36854a90db
commit 4109b107c1

View File

@@ -195,6 +195,7 @@ def get_speech_timestamps(audio: torch.Tensor,
return_seconds: bool = False, return_seconds: bool = False,
visualize_probs: bool = False, visualize_probs: bool = False,
progress_tracking_callback: Callable[[float], None] = None, progress_tracking_callback: Callable[[float], None] = None,
neg_threshold: float = None,
window_size_samples: int = 512,): window_size_samples: int = 512,):
""" """
@@ -237,6 +238,9 @@ def get_speech_timestamps(audio: torch.Tensor,
progress_tracking_callback: Callable[[float], None] (default - None) progress_tracking_callback: Callable[[float], None] (default - None)
callback function taking progress in percents as an argument callback function taking progress in percents as an argument
neg_threshold: float (default = threshold - 0.15)
Negative threshold (noise or exit threshold). If model's current state is SPEECH, values BELOW this value are considered as NON-SPEECH.
window_size_samples: int (default - 512 samples) window_size_samples: int (default - 512 samples)
!!! DEPRECATED, DOES NOTHING !!! !!! DEPRECATED, DOES NOTHING !!!
@@ -298,6 +302,8 @@ def get_speech_timestamps(audio: torch.Tensor,
triggered = False triggered = False
speeches = [] speeches = []
current_speech = {} current_speech = {}
if neg_threshold is None:
neg_threshold = threshold - 0.15 neg_threshold = threshold - 0.15
temp_end = 0 # to save potential segment end (and tolerate some silence) temp_end = 0 # to save potential segment end (and tolerate some silence)
prev_end = next_start = 0 # to save potential segment limits in case of maximum segment size reached prev_end = next_start = 0 # to save potential segment limits in case of maximum segment size reached