//Author : Nathan Lee //Created On : 2024-11-18 //Description : silero 5.1 system for torch-script(c++). //Version : 1.0 #ifndef SILERO_TORCH_H #define SILERO_TORCH_H #include #include #include #include #include #include #include #include #include #include namespace silero{ struct SpeechSegment{ int start; int end; }; class VadIterator{ public: VadIterator(const std::string &model_path, float threshold = 0.5, int sample_rate = 16000, int window_size_ms = 32, int speech_pad_ms = 30, int min_silence_duration_ms = 100, int min_speech_duration_ms = 250, int max_duration_merge_ms = 300, bool print_as_samples = false); ~VadIterator(); void SpeechProbs(std::vector& input_wav); std::vector GetSpeechTimestamps(); void SetVariables(); float threshold; int sample_rate; int window_size_ms; int min_speech_duration_ms; int max_duration_merge_ms; bool print_as_samples; private: torch::jit::script::Module model; std::vector outputs_prob; int min_silence_samples; int min_speech_samples; int speech_pad_samples; int window_size_samples; int duration_merge_samples; int current_sample = 0; int total_sample_size=0; int min_silence_duration_ms; int speech_pad_ms; bool triggered = false; int temp_end = 0; void init_engine(int window_size_ms); void init_torch_model(const std::string& model_path); void reset_states(); std::vector DoVad(); std::vector mergeSpeeches(const std::vector& speeches, int duration_merge_samples); }; } #endif // SILERO_TORCH_H