mirror of
https://github.com/snakers4/silero-vad.git
synced 2026-02-05 18:09:22 +08:00
get rid of hop_size_ratio
This commit is contained in:
@@ -202,7 +202,6 @@ def get_speech_timestamps(audio: torch.Tensor,
|
|||||||
progress_tracking_callback: Callable[[float], None] = None,
|
progress_tracking_callback: Callable[[float], None] = None,
|
||||||
neg_threshold: float = None,
|
neg_threshold: float = None,
|
||||||
window_size_samples: int = 512,
|
window_size_samples: int = 512,
|
||||||
hop_size_ratio: float = 1,
|
|
||||||
min_silence_at_max_speech: float = 98,
|
min_silence_at_max_speech: float = 98,
|
||||||
use_max_poss_sil_at_max_speech: bool = True):
|
use_max_poss_sil_at_max_speech: bool = True):
|
||||||
|
|
||||||
@@ -252,13 +251,15 @@ def get_speech_timestamps(audio: torch.Tensor,
|
|||||||
neg_threshold: float (default = threshold - 0.15)
|
neg_threshold: float (default = threshold - 0.15)
|
||||||
Negative threshold (noise or exit threshold). If model's current state is SPEECH, values BELOW this value are considered as NON-SPEECH.
|
Negative threshold (noise or exit threshold). If model's current state is SPEECH, values BELOW this value are considered as NON-SPEECH.
|
||||||
|
|
||||||
|
min_silence_at_max_speech: float (default - 98ms)
|
||||||
|
Minimum silence duration in ms which is used to avoid abrupt cuts when max_speech_duration_s is reached
|
||||||
|
|
||||||
|
use_max_poss_sil_at_max_speech: bool (default - True)
|
||||||
|
Whether to use the maximum possible silence at max_speech_duration_s or not. If not, the last silence is used.
|
||||||
|
|
||||||
window_size_samples: int (default - 512 samples)
|
window_size_samples: int (default - 512 samples)
|
||||||
!!! DEPRECATED, DOES NOTHING !!!
|
!!! DEPRECATED, DOES NOTHING !!!
|
||||||
|
|
||||||
hop_size_ratio: float (default - 1), number of samples by which the window is shifted, 1 means hop_size_samples = window_size_samples
|
|
||||||
min_silence_at_max_speech: float (default - 25ms), minimum silence duration in ms which is used to avoid abrupt cuts when max_speech_duration_s is reached
|
|
||||||
use_max_poss_sil_at_max_speech: bool (default - True), whether to use the maximum possible silence at max_speech_duration_s or not. If not, the last silence is used.
|
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
----------
|
----------
|
||||||
speeches: list of dicts
|
speeches: list of dicts
|
||||||
@@ -288,7 +289,7 @@ def get_speech_timestamps(audio: torch.Tensor,
|
|||||||
raise ValueError("Currently silero VAD models support 8000 and 16000 (or multiply of 16000) sample rates")
|
raise ValueError("Currently silero VAD models support 8000 and 16000 (or multiply of 16000) sample rates")
|
||||||
|
|
||||||
window_size_samples = 512 if sampling_rate == 16000 else 256
|
window_size_samples = 512 if sampling_rate == 16000 else 256
|
||||||
hop_size_samples = int(window_size_samples * hop_size_ratio)
|
hop_size_samples = int(window_size_samples)
|
||||||
|
|
||||||
model.reset_states()
|
model.reset_states()
|
||||||
min_speech_samples = sampling_rate * min_speech_duration_ms / 1000
|
min_speech_samples = sampling_rate * min_speech_duration_ms / 1000
|
||||||
|
|||||||
Reference in New Issue
Block a user