From efb5effc8fc1fad9f46858e85b6ea8702ccc2708 Mon Sep 17 00:00:00 2001 From: bygreencn Date: Sun, 17 Dec 2023 22:47:37 +0800 Subject: [PATCH 1/2] Fix a bug in c code sample when only one timestamp and start from 0. --- examples/cpp/silero-vad-onnx.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/cpp/silero-vad-onnx.cpp b/examples/cpp/silero-vad-onnx.cpp index 27fceb0..eb92296 100644 --- a/examples/cpp/silero-vad-onnx.cpp +++ b/examples/cpp/silero-vad-onnx.cpp @@ -26,7 +26,7 @@ public: int end; // default + parameterized constructor - timestamp_t(int start = 0, int end = 0) + timestamp_t(int start = -1, int end = -1) : start(start), end(end) { }; @@ -129,7 +129,7 @@ private: prev_end = next_start = 0; speeches.clear(); - current_speech = timestamp_t(0, 0); + current_speech = timestamp_t(); }; void predict(const std::vector &data) @@ -198,7 +198,7 @@ private: if (prev_end > 0) { current_speech.end = prev_end; speeches.push_back(current_speech); - current_speech = timestamp_t(0, 0); + current_speech = timestamp_t(); // previously reached silence(< neg_thres) and is still not speech(< thres) if (next_start < prev_end) @@ -214,7 +214,7 @@ private: else{ current_speech.end = current_sample; speeches.push_back(current_speech); - current_speech = timestamp_t(0, 0); + current_speech = timestamp_t(); prev_end = 0; next_start = 0; temp_end = 0; @@ -268,7 +268,7 @@ private: if (current_speech.end - current_speech.start > min_speech_samples) { speeches.push_back(current_speech); - current_speech = timestamp_t(0, 0); + current_speech = timestamp_t(); prev_end = 0; next_start = 0; temp_end = 0; @@ -297,10 +297,10 @@ public: predict(r); } - if (current_speech.start > 0) { + if (current_speech.start >= 0) { current_speech.end = audio_length_samples; speeches.push_back(current_speech); - current_speech = timestamp_t(0, 0); + current_speech = timestamp_t(); prev_end = 0; next_start = 0; temp_end = 0; From 0b7da6e74b4a76700a7b8fb0435664ea4ed133d3 Mon Sep 17 00:00:00 2001 From: bygreencn Date: Sun, 17 Dec 2023 22:54:44 +0800 Subject: [PATCH 2/2] Fix wav functions: 1. fix data_size is not correct and be 0. 2. detect data format of IEEE-float. 3. add PCMS8bit, PCMS16bit and PCMS32 convert to float 32bit at class WavReader. --- examples/cpp/wav.h | 67 +++++++++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 21 deletions(-) diff --git a/examples/cpp/wav.h b/examples/cpp/wav.h index a3566b6..249d7e3 100644 --- a/examples/cpp/wav.h +++ b/examples/cpp/wav.h @@ -79,6 +79,13 @@ class WavReader { fread(header.data, 8, sizeof(char), fp); } + if (header.data_size == 0) { + int offset = ftell(fp); + fseek(fp, 0, SEEK_END); + header.data_size = ftell(fp) - offset; + fseek(fp, offset, SEEK_SET); + } + num_channel_ = header.channels; sample_rate_ = header.sample_rate; bits_per_sample_ = header.bit; @@ -90,35 +97,53 @@ class WavReader { std::cout << "sample_rate_ :" << sample_rate_ << std::endl; std::cout << "bits_per_sample_:" << bits_per_sample_ << std::endl; std::cout << "num_samples :" << num_data << std::endl; - std::cout << "num_data size :" << header.data_size << std::endl; + std::cout << "num_data_size :" << header.data_size << std::endl; - for (int i = 0; i < num_data; ++i) { - switch (bits_per_sample_) { + switch (bits_per_sample_) { case 8: { - char sample; - fread(&sample, 1, sizeof(char), fp); - data_[i] = static_cast(sample); - break; + char sample; + for (int i = 0; i < num_data; ++i) { + fread(&sample, 1, sizeof(char), fp); + data_[i] = static_cast(sample) / 32768; + } + break; } case 16: { - int16_t sample; - fread(&sample, 1, sizeof(int16_t), fp); - // std::cout << sample; - data_[i] = static_cast(sample); - // std::cout << data_[i]; - break; + int16_t sample; + for (int i = 0; i < num_data; ++i) { + fread(&sample, 1, sizeof(int16_t), fp); + data_[i] = static_cast(sample) / 32768; + } + break; } - case 32: { - int sample; - fread(&sample, 1, sizeof(int), fp); - data_[i] = static_cast(sample); - break; + case 32: + { + if (header.format == 1) //S32 + { + int sample; + for (int i = 0; i < num_data; ++i) { + fread(&sample, 1, sizeof(int), fp); + data_[i] = static_cast(sample) / 32768; + } + } + else if (header.format == 3) // IEEE-float + { + float sample; + for (int i = 0; i < num_data; ++i) { + fread(&sample, 1, sizeof(float), fp); + data_[i] = static_cast(sample); + } + } + else { + printf("unsupported quantization bits\n"); + } + break; } default: - printf("unsupported quantization bits\n"); - exit(1); - } + printf("unsupported quantization bits\n"); + break; } + fclose(fp); return true; }