From efb5effc8fc1fad9f46858e85b6ea8702ccc2708 Mon Sep 17 00:00:00 2001
From: bygreencn <bygreencn@gmail.com>
Date: Sun, 17 Dec 2023 22:47:37 +0800
Subject: [PATCH 1/2] Fix a bug in c code sample when only one timestamp and
 start from 0.

---
 examples/cpp/silero-vad-onnx.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/examples/cpp/silero-vad-onnx.cpp b/examples/cpp/silero-vad-onnx.cpp
index 27fceb0..eb92296 100644
--- a/examples/cpp/silero-vad-onnx.cpp
+++ b/examples/cpp/silero-vad-onnx.cpp
@@ -26,7 +26,7 @@ public:
     int end;
 
     // default + parameterized constructor
-    timestamp_t(int start = 0, int end = 0)
+    timestamp_t(int start = -1, int end = -1)
         : start(start), end(end)
     {
     };
@@ -129,7 +129,7 @@ private:
         prev_end = next_start = 0;
 
         speeches.clear();
-        current_speech = timestamp_t(0, 0);
+        current_speech = timestamp_t();
     };
 
     void predict(const std::vector<float> &data)
@@ -198,7 +198,7 @@ private:
             if (prev_end > 0) {
                 current_speech.end = prev_end;
                 speeches.push_back(current_speech);
-                current_speech = timestamp_t(0, 0);
+                current_speech = timestamp_t();
                 
                 // previously reached silence(< neg_thres) and is still not speech(< thres)
                 if (next_start < prev_end)
@@ -214,7 +214,7 @@ private:
             else{ 
                 current_speech.end = current_sample;
                 speeches.push_back(current_speech);
-                current_speech = timestamp_t(0, 0);
+                current_speech = timestamp_t();
                 prev_end = 0;
                 next_start = 0;
                 temp_end = 0;
@@ -268,7 +268,7 @@ private:
                     if (current_speech.end - current_speech.start > min_speech_samples)
                     {
                         speeches.push_back(current_speech);
-                        current_speech = timestamp_t(0, 0);
+                        current_speech = timestamp_t();
                         prev_end = 0;
                         next_start = 0;
                         temp_end = 0;
@@ -297,10 +297,10 @@ public:
             predict(r);
         }
 
-        if (current_speech.start > 0) {
+        if (current_speech.start >= 0) {
             current_speech.end = audio_length_samples;
             speeches.push_back(current_speech);
-            current_speech = timestamp_t(0, 0);
+            current_speech = timestamp_t();
             prev_end = 0;
             next_start = 0;
             temp_end = 0;

From 0b7da6e74b4a76700a7b8fb0435664ea4ed133d3 Mon Sep 17 00:00:00 2001
From: bygreencn <bygreencn@gmail.com>
Date: Sun, 17 Dec 2023 22:54:44 +0800
Subject: [PATCH 2/2] Fix wav functions:

1. fix data_size is not correct and be 0.
2. detect data format of IEEE-float.
3. add PCMS8bit, PCMS16bit and PCMS32 convert to float 32bit at class WavReader.
---
 examples/cpp/wav.h | 67 +++++++++++++++++++++++++++++++---------------
 1 file changed, 46 insertions(+), 21 deletions(-)

diff --git a/examples/cpp/wav.h b/examples/cpp/wav.h
index a3566b6..249d7e3 100644
--- a/examples/cpp/wav.h
+++ b/examples/cpp/wav.h
@@ -79,6 +79,13 @@ class WavReader {
       fread(header.data, 8, sizeof(char), fp);
     }
 
+    if (header.data_size == 0) {
+        int offset = ftell(fp);
+        fseek(fp, 0, SEEK_END);
+        header.data_size = ftell(fp) - offset;
+        fseek(fp, offset, SEEK_SET);
+    }
+
     num_channel_ = header.channels;
     sample_rate_ = header.sample_rate;
     bits_per_sample_ = header.bit;
@@ -90,35 +97,53 @@ class WavReader {
     std::cout << "sample_rate_    :" << sample_rate_ << std::endl;
     std::cout << "bits_per_sample_:" << bits_per_sample_ << std::endl;
     std::cout << "num_samples     :" << num_data << std::endl;
-    std::cout << "num_data size   :" << header.data_size << std::endl;
+    std::cout << "num_data_size   :" << header.data_size << std::endl;
 
-    for (int i = 0; i < num_data; ++i) {
-      switch (bits_per_sample_) {
+    switch (bits_per_sample_) {
         case 8: {
-          char sample;
-          fread(&sample, 1, sizeof(char), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
+            char sample;
+            for (int i = 0; i < num_data; ++i) {
+                fread(&sample, 1, sizeof(char), fp);
+                data_[i] = static_cast<float>(sample) / 32768;
+            }
+            break;
         }
         case 16: {
-          int16_t sample;
-          fread(&sample, 1, sizeof(int16_t), fp);
-          // std::cout << sample;
-          data_[i] = static_cast<float>(sample);
-          // std::cout << data_[i];
-          break;
+            int16_t sample;
+            for (int i = 0; i < num_data; ++i) {
+                fread(&sample, 1, sizeof(int16_t), fp);
+                data_[i] = static_cast<float>(sample) / 32768;
+            }
+            break;
         }
-        case 32: {
-          int sample;
-          fread(&sample, 1, sizeof(int), fp);
-          data_[i] = static_cast<float>(sample);
-          break;
+        case 32:
+        {
+            if (header.format == 1) //S32
+            {
+                int sample;
+                for (int i = 0; i < num_data; ++i) {
+                    fread(&sample, 1, sizeof(int), fp);
+                    data_[i] = static_cast<float>(sample) / 32768;
+                }
+            }
+            else if (header.format == 3) // IEEE-float
+            {
+                float sample;
+                for (int i = 0; i < num_data; ++i) {
+                    fread(&sample, 1, sizeof(float), fp);
+                    data_[i] = static_cast<float>(sample);
+                }
+            }
+            else {
+                printf("unsupported quantization bits\n");
+            }
+            break;
         }
         default:
-          printf("unsupported quantization bits\n");
-          exit(1);
-      }
+            printf("unsupported quantization bits\n");
+            break;
     }
+
     fclose(fp);
     return true;
   }