Merge pull request #407 from bygreencn/master

Fix a bug at c sample code and some bugs at wav.h.
2026-02-05 18:09:22 +08:00 · 2023-12-17 20:26:40 +03:00
parent 03dc3fae5c 0b7da6e74b
commit 94504ece54
2 changed files with 53 additions and 28 deletions
--- a/examples/cpp/silero-vad-onnx.cpp
+++ b/examples/cpp/silero-vad-onnx.cpp
@@ -26,7 +26,7 @@ public:
    int end;
    // default + parameterized constructor
-    timestamp_t(int start = 0, int end = 0)
+    timestamp_t(int start = -1, int end = -1)
        : start(start), end(end)
    {
    };
@@ -129,7 +129,7 @@ private:
        prev_end = next_start = 0;
        speeches.clear();
-        current_speech = timestamp_t(0, 0);
+        current_speech = timestamp_t();
    };
    void predict(const std::vector<float> &data)
@@ -198,7 +198,7 @@ private:
            if (prev_end > 0) {
                current_speech.end = prev_end;
                speeches.push_back(current_speech);
-                current_speech = timestamp_t(0, 0);
+                current_speech = timestamp_t();
                // previously reached silence(< neg_thres) and is still not speech(< thres)
                if (next_start < prev_end)
@@ -214,7 +214,7 @@ private:
            else{ 
                current_speech.end = current_sample;
                speeches.push_back(current_speech);
-                current_speech = timestamp_t(0, 0);
+                current_speech = timestamp_t();
                prev_end = 0;
                next_start = 0;
                temp_end = 0;
@@ -268,7 +268,7 @@ private:
                    if (current_speech.end - current_speech.start > min_speech_samples)
                    {
                        speeches.push_back(current_speech);
-                        current_speech = timestamp_t(0, 0);
+                        current_speech = timestamp_t();
                        prev_end = 0;
                        next_start = 0;
                        temp_end = 0;
@@ -297,10 +297,10 @@ public:
            predict(r);
        }
-        if (current_speech.start > 0) {
+        if (current_speech.start >= 0) {
            current_speech.end = audio_length_samples;
            speeches.push_back(current_speech);
-            current_speech = timestamp_t(0, 0);
+            current_speech = timestamp_t();
            prev_end = 0;
            next_start = 0;
            temp_end = 0;
--- a/examples/cpp/wav.h
+++ b/examples/cpp/wav.h
@@ -79,6 +79,13 @@ class WavReader {
      fread(header.data, 8, sizeof(char), fp);
    }
    if (header.data_size == 0) {
        int offset = ftell(fp);
        fseek(fp, 0, SEEK_END);
        header.data_size = ftell(fp) - offset;
        fseek(fp, offset, SEEK_SET);
    }
    num_channel_ = header.channels;
    sample_rate_ = header.sample_rate;
    bits_per_sample_ = header.bit;
@@ -90,35 +97,53 @@ class WavReader {
    std::cout << "sample_rate_    :" << sample_rate_ << std::endl;
    std::cout << "bits_per_sample_:" << bits_per_sample_ << std::endl;
    std::cout << "num_samples     :" << num_data << std::endl;
-    std::cout << "num_data size   :" << header.data_size << std::endl;
+    std::cout << "num_data_size   :" << header.data_size << std::endl;
-    for (int i = 0; i < num_data; ++i) {
+    switch (bits_per_sample_) {
      switch (bits_per_sample_) {
        case 8: {
-          char sample;
+            char sample;
-          fread(&sample, 1, sizeof(char), fp);
+            for (int i = 0; i < num_data; ++i) {
-          data_[i] = static_cast<float>(sample);
+                fread(&sample, 1, sizeof(char), fp);
-          break;
+                data_[i] = static_cast<float>(sample) / 32768;
            }
            break;
        }
        case 16: {
-          int16_t sample;
+            int16_t sample;
-          fread(&sample, 1, sizeof(int16_t), fp);
+            for (int i = 0; i < num_data; ++i) {
-          // std::cout << sample;
+                fread(&sample, 1, sizeof(int16_t), fp);
-          data_[i] = static_cast<float>(sample);
+                data_[i] = static_cast<float>(sample) / 32768;
-          // std::cout << data_[i];
+            }
-          break;
+            break;
        }
-        case 32: {
+        case 32:
-          int sample;
+        {
-          fread(&sample, 1, sizeof(int), fp);
+            if (header.format == 1) //S32
-          data_[i] = static_cast<float>(sample);
+            {
-          break;
+                int sample;
                for (int i = 0; i < num_data; ++i) {
                    fread(&sample, 1, sizeof(int), fp);
                    data_[i] = static_cast<float>(sample) / 32768;
                }
            }
            else if (header.format == 3) // IEEE-float
            {
                float sample;
                for (int i = 0; i < num_data; ++i) {
                    fread(&sample, 1, sizeof(float), fp);
                    data_[i] = static_cast<float>(sample);
                }
            }
            else {
                printf("unsupported quantization bits\n");
            }
            break;
        }
        default:
-          printf("unsupported quantization bits\n");
+            printf("unsupported quantization bits\n");
-          exit(1);
+            break;
      }
    }
    fclose(fp);
    return true;
  }