From 21ffe8576e85ceb40e213b88ccb7ec4cbd370acc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Thu, 25 Dec 2025 18:25:33 -0800 Subject: [PATCH 1/3] Fix model path in Rust example --- examples/rust-example/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/rust-example/src/main.rs b/examples/rust-example/src/main.rs index bf48d0c..4706bda 100644 --- a/examples/rust-example/src/main.rs +++ b/examples/rust-example/src/main.rs @@ -4,7 +4,7 @@ mod vad_iter; fn main() { let model_path = std::env::var("SILERO_MODEL_PATH") - .unwrap_or_else(|_| String::from("../../files/silero_vad.onnx")); + .unwrap_or_else(|_| String::from("../../src/silero_vad/data/silero_vad.onnx")); let audio_path = std::env::args() .nth(1) .unwrap_or_else(|| String::from("recorder.wav")); From 2a08f0b90d4ca0237c2fda7fd09130dff65ddd53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Sat, 27 Dec 2025 06:36:07 -0800 Subject: [PATCH 2/3] Remove 'load-dynamic' feature of 'ort' dependency It's unclear why we'd want this feature. It seems to make things even less isolated and self-contained than it already is, which certainly isn't a boon for an example. --- examples/rust-example/Cargo.lock | 13 +------------ examples/rust-example/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/examples/rust-example/Cargo.lock b/examples/rust-example/Cargo.lock index 6619841..3c41c04 100644 --- a/examples/rust-example/Cargo.lock +++ b/examples/rust-example/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "adler" @@ -206,16 +206,6 @@ version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" -[[package]] -name = "libloading" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" -dependencies = [ - "cfg-if", - "windows-targets", -] - [[package]] name = "linux-raw-sys" version = "0.4.14" @@ -301,7 +291,6 @@ checksum = "0bc80894094c6a875bfac64415ed456fa661081a278a035e22be661305c87e14" dependencies = [ "half", "js-sys", - "libloading", "ndarray", "ort-sys", "thiserror", diff --git a/examples/rust-example/Cargo.toml b/examples/rust-example/Cargo.toml index d3e72a3..c3d9797 100644 --- a/examples/rust-example/Cargo.toml +++ b/examples/rust-example/Cargo.toml @@ -4,6 +4,6 @@ version = "0.1.0" edition = "2021" [dependencies] -ort = { version = "2.0.0-rc.2", features = ["load-dynamic", "ndarray"] } +ort = { version = "2.0.0-rc.2", features = ["ndarray"] } ndarray = "0.15" hound = "3" From cfe63384f0283ef597fed7007997b71d98652e93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Sun, 28 Dec 2025 07:15:01 -0800 Subject: [PATCH 3/3] Update model plumbing for Rust example The v6.2 models broke the Rust example. Update the logic for driving them to reflect what the reference Python code does. Fixes: #745 Co-Authored-By: Claude --- examples/rust-example/src/silero.rs | 34 ++++++++++++++++++++++++----- examples/rust-example/src/utils.rs | 2 +- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/examples/rust-example/src/silero.rs b/examples/rust-example/src/silero.rs index fce8808..a4d7103 100644 --- a/examples/rust-example/src/silero.rs +++ b/examples/rust-example/src/silero.rs @@ -1,5 +1,5 @@ use crate::utils; -use ndarray::{s, Array, Array2, ArrayBase, ArrayD, Dim, IxDynImpl, OwnedRepr}; +use ndarray::{Array, Array1, Array2, ArrayBase, ArrayD, Dim, IxDynImpl, OwnedRepr}; use std::path::Path; #[derive(Debug)] @@ -7,6 +7,8 @@ pub struct Silero { session: ort::Session, sample_rate: ArrayBase, Dim<[usize; 1]>>, state: ArrayBase, Dim>, + context: Array1, + context_size: usize, } impl Silero { @@ -16,16 +18,22 @@ impl Silero { ) -> Result { let session = ort::Session::builder()?.commit_from_file(model_path)?; let state = ArrayD::::zeros([2, 1, 128].as_slice()); - let sample_rate = Array::from_shape_vec([1], vec![sample_rate.into()]).unwrap(); + let sample_rate_val: i64 = sample_rate.into(); + let context_size = if sample_rate_val == 16000 { 64 } else { 32 }; + let context = Array1::::zeros(context_size); + let sample_rate = Array::from_shape_vec([1], vec![sample_rate_val]).unwrap(); Ok(Self { session, sample_rate, state, + context, + context_size, }) } pub fn reset(&mut self) { self.state = ArrayD::::zeros([2, 1, 128].as_slice()); + self.context = Array1::::zeros(self.context_size); } pub fn calc_level(&mut self, audio_frame: &[i16]) -> Result { @@ -33,8 +41,14 @@ impl Silero { .iter() .map(|x| (*x as f32) / (i16::MAX as f32)) .collect::>(); - let mut frame = Array2::::from_shape_vec([1, data.len()], data).unwrap(); - frame = frame.slice(s![.., ..480]).to_owned(); + + // Concatenate context with input + let mut input_with_context = Vec::with_capacity(self.context_size + data.len()); + input_with_context.extend_from_slice(self.context.as_slice().unwrap()); + input_with_context.extend_from_slice(&data); + + let frame = Array2::::from_shape_vec([1, input_with_context.len()], input_with_context).unwrap(); + let inps = ort::inputs![ frame, std::mem::take(&mut self.state), @@ -43,12 +57,20 @@ impl Silero { let res = self .session .run(ort::SessionInputs::ValueSlice::<3>(&inps))?; + self.state = res["stateN"].try_extract_tensor().unwrap().to_owned(); - Ok(*res["output"] + + // Update context with last context_size samples from the input + if data.len() >= self.context_size { + self.context = Array1::from_vec(data[data.len() - self.context_size..].to_vec()); + } + + let prob = *res["output"] .try_extract_raw_tensor::() .unwrap() .1 .first() - .unwrap()) + .unwrap(); + Ok(prob) } } diff --git a/examples/rust-example/src/utils.rs b/examples/rust-example/src/utils.rs index 8207920..b37c33a 100644 --- a/examples/rust-example/src/utils.rs +++ b/examples/rust-example/src/utils.rs @@ -36,7 +36,7 @@ pub struct VadParams { impl Default for VadParams { fn default() -> Self { Self { - frame_size: 64, + frame_size: 32, // 32ms for 512 samples at 16kHz threshold: 0.5, min_silence_duration_ms: 0, speech_pad_ms: 64,