diff --git a/examples/java-example/pom.xml b/examples/java-example/pom.xml
index 32ba720..88dc906 100644
--- a/examples/java-example/pom.xml
+++ b/examples/java-example/pom.xml
@@ -1,30 +1,31 @@
- 4.0.0
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ 4.0.0
- org.example
- java-example
- 1.0-SNAPSHOT
- jar
+ org.example
+ java-example
+ 1.0-SNAPSHOT
+ jar
- sliero-vad-example
- http://maven.apache.org
+ sliero-vad-example
+ http://maven.apache.org
-
- UTF-8
-
+
+ UTF-8
+
-
-
- junit
- junit
- 3.8.1
- test
-
-
- com.microsoft.onnxruntime
- onnxruntime
- 1.16.0-rc1
-
-
+
+
+ junit
+ junit
+ 3.8.1
+ test
+
+
+
+ com.microsoft.onnxruntime
+ onnxruntime
+ 1.23.1
+
+
diff --git a/examples/java-example/src/main/java/org/example/App.java b/examples/java-example/src/main/java/org/example/App.java
index 7b58f17..56aca29 100644
--- a/examples/java-example/src/main/java/org/example/App.java
+++ b/examples/java-example/src/main/java/org/example/App.java
@@ -2,68 +2,263 @@ package org.example;
import ai.onnxruntime.OrtException;
import javax.sound.sampled.*;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
import java.util.Map;
+/**
+ * Silero VAD Java Example
+ * Voice Activity Detection using ONNX model
+ *
+ * @author VvvvvGH
+ */
public class App {
- private static final String MODEL_PATH = "src/main/resources/silero_vad.onnx";
+ // ONNX model path - using the model file from the project
+ private static final String MODEL_PATH = "../../src/silero_vad/data/silero_vad.onnx";
+ // Test audio file path
+ private static final String AUDIO_FILE_PATH = "../../en_example.wav";
+ // Sampling rate
private static final int SAMPLE_RATE = 16000;
- private static final float START_THRESHOLD = 0.6f;
- private static final float END_THRESHOLD = 0.45f;
- private static final int MIN_SILENCE_DURATION_MS = 600;
- private static final int SPEECH_PAD_MS = 500;
- private static final int WINDOW_SIZE_SAMPLES = 2048;
+ // Speech threshold (consistent with Python default)
+ private static final float THRESHOLD = 0.5f;
+ // Negative threshold (used to determine speech end)
+ private static final float NEG_THRESHOLD = 0.35f; // threshold - 0.15
+ // Minimum speech duration (milliseconds)
+ private static final int MIN_SPEECH_DURATION_MS = 250;
+ // Minimum silence duration (milliseconds)
+ private static final int MIN_SILENCE_DURATION_MS = 100;
+ // Speech padding (milliseconds)
+ private static final int SPEECH_PAD_MS = 30;
+ // Window size (samples) - 512 samples for 16kHz
+ private static final int WINDOW_SIZE_SAMPLES = 512;
public static void main(String[] args) {
- // Initialize the Voice Activity Detector
- SlieroVadDetector vadDetector;
+ System.out.println("=".repeat(60));
+ System.out.println("Silero VAD Java ONNX Example");
+ System.out.println("=".repeat(60));
+
+ // Load ONNX model
+ SlieroVadOnnxModel model;
try {
- vadDetector = new SlieroVadDetector(MODEL_PATH, START_THRESHOLD, END_THRESHOLD, SAMPLE_RATE, MIN_SILENCE_DURATION_MS, SPEECH_PAD_MS);
+ System.out.println("Loading ONNX model: " + MODEL_PATH);
+ model = new SlieroVadOnnxModel(MODEL_PATH);
+ System.out.println("Model loaded successfully!");
} catch (OrtException e) {
- System.err.println("Error initializing the VAD detector: " + e.getMessage());
+ System.err.println("Failed to load model: " + e.getMessage());
+ e.printStackTrace();
return;
}
- // Set audio format
- AudioFormat format = new AudioFormat(SAMPLE_RATE, 16, 1, true, false);
- DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
-
- // Get the target data line and open it with the specified format
- TargetDataLine targetDataLine;
+ // Read WAV file
+ float[] audioData;
try {
- targetDataLine = (TargetDataLine) AudioSystem.getLine(info);
- targetDataLine.open(format);
- targetDataLine.start();
- } catch (LineUnavailableException e) {
- System.err.println("Error opening target data line: " + e.getMessage());
+ System.out.println("\nReading audio file: " + AUDIO_FILE_PATH);
+ audioData = readWavFileAsFloatArray(AUDIO_FILE_PATH);
+ System.out.println("Audio file read successfully, samples: " + audioData.length);
+ System.out.println("Audio duration: " + String.format("%.2f", (audioData.length / (float) SAMPLE_RATE)) + " seconds");
+ } catch (Exception e) {
+ System.err.println("Failed to read audio file: " + e.getMessage());
+ e.printStackTrace();
return;
}
- // Main loop to continuously read data and apply Voice Activity Detection
- while (targetDataLine.isOpen()) {
- byte[] data = new byte[WINDOW_SIZE_SAMPLES];
-
- int numBytesRead = targetDataLine.read(data, 0, data.length);
- if (numBytesRead <= 0) {
- System.err.println("Error reading data from target data line.");
- continue;
- }
-
- // Apply the Voice Activity Detector to the data and get the result
- Map detectResult;
- try {
- detectResult = vadDetector.apply(data, true);
- } catch (Exception e) {
- System.err.println("Error applying VAD detector: " + e.getMessage());
- continue;
- }
-
- if (!detectResult.isEmpty()) {
- System.out.println(detectResult);
- }
+ // Get speech timestamps (batch mode, consistent with Python's get_speech_timestamps)
+ System.out.println("\nDetecting speech segments...");
+ List