Merge pull request #407 from bygreencn/master

Fix a bug at c sample code and some bugs at wav.h.
This commit is contained in:
Alexander Veysov
2023-12-17 20:26:40 +03:00
committed by GitHub
2 changed files with 53 additions and 28 deletions

View File

@@ -26,7 +26,7 @@ public:
int end; int end;
// default + parameterized constructor // default + parameterized constructor
timestamp_t(int start = 0, int end = 0) timestamp_t(int start = -1, int end = -1)
: start(start), end(end) : start(start), end(end)
{ {
}; };
@@ -129,7 +129,7 @@ private:
prev_end = next_start = 0; prev_end = next_start = 0;
speeches.clear(); speeches.clear();
current_speech = timestamp_t(0, 0); current_speech = timestamp_t();
}; };
void predict(const std::vector<float> &data) void predict(const std::vector<float> &data)
@@ -198,7 +198,7 @@ private:
if (prev_end > 0) { if (prev_end > 0) {
current_speech.end = prev_end; current_speech.end = prev_end;
speeches.push_back(current_speech); speeches.push_back(current_speech);
current_speech = timestamp_t(0, 0); current_speech = timestamp_t();
// previously reached silence(< neg_thres) and is still not speech(< thres) // previously reached silence(< neg_thres) and is still not speech(< thres)
if (next_start < prev_end) if (next_start < prev_end)
@@ -214,7 +214,7 @@ private:
else{ else{
current_speech.end = current_sample; current_speech.end = current_sample;
speeches.push_back(current_speech); speeches.push_back(current_speech);
current_speech = timestamp_t(0, 0); current_speech = timestamp_t();
prev_end = 0; prev_end = 0;
next_start = 0; next_start = 0;
temp_end = 0; temp_end = 0;
@@ -268,7 +268,7 @@ private:
if (current_speech.end - current_speech.start > min_speech_samples) if (current_speech.end - current_speech.start > min_speech_samples)
{ {
speeches.push_back(current_speech); speeches.push_back(current_speech);
current_speech = timestamp_t(0, 0); current_speech = timestamp_t();
prev_end = 0; prev_end = 0;
next_start = 0; next_start = 0;
temp_end = 0; temp_end = 0;
@@ -297,10 +297,10 @@ public:
predict(r); predict(r);
} }
if (current_speech.start > 0) { if (current_speech.start >= 0) {
current_speech.end = audio_length_samples; current_speech.end = audio_length_samples;
speeches.push_back(current_speech); speeches.push_back(current_speech);
current_speech = timestamp_t(0, 0); current_speech = timestamp_t();
prev_end = 0; prev_end = 0;
next_start = 0; next_start = 0;
temp_end = 0; temp_end = 0;

View File

@@ -79,6 +79,13 @@ class WavReader {
fread(header.data, 8, sizeof(char), fp); fread(header.data, 8, sizeof(char), fp);
} }
if (header.data_size == 0) {
int offset = ftell(fp);
fseek(fp, 0, SEEK_END);
header.data_size = ftell(fp) - offset;
fseek(fp, offset, SEEK_SET);
}
num_channel_ = header.channels; num_channel_ = header.channels;
sample_rate_ = header.sample_rate; sample_rate_ = header.sample_rate;
bits_per_sample_ = header.bit; bits_per_sample_ = header.bit;
@@ -90,35 +97,53 @@ class WavReader {
std::cout << "sample_rate_ :" << sample_rate_ << std::endl; std::cout << "sample_rate_ :" << sample_rate_ << std::endl;
std::cout << "bits_per_sample_:" << bits_per_sample_ << std::endl; std::cout << "bits_per_sample_:" << bits_per_sample_ << std::endl;
std::cout << "num_samples :" << num_data << std::endl; std::cout << "num_samples :" << num_data << std::endl;
std::cout << "num_data size :" << header.data_size << std::endl; std::cout << "num_data_size :" << header.data_size << std::endl;
for (int i = 0; i < num_data; ++i) { switch (bits_per_sample_) {
switch (bits_per_sample_) {
case 8: { case 8: {
char sample; char sample;
fread(&sample, 1, sizeof(char), fp); for (int i = 0; i < num_data; ++i) {
data_[i] = static_cast<float>(sample); fread(&sample, 1, sizeof(char), fp);
break; data_[i] = static_cast<float>(sample) / 32768;
}
break;
} }
case 16: { case 16: {
int16_t sample; int16_t sample;
fread(&sample, 1, sizeof(int16_t), fp); for (int i = 0; i < num_data; ++i) {
// std::cout << sample; fread(&sample, 1, sizeof(int16_t), fp);
data_[i] = static_cast<float>(sample); data_[i] = static_cast<float>(sample) / 32768;
// std::cout << data_[i]; }
break; break;
} }
case 32: { case 32:
int sample; {
fread(&sample, 1, sizeof(int), fp); if (header.format == 1) //S32
data_[i] = static_cast<float>(sample); {
break; int sample;
for (int i = 0; i < num_data; ++i) {
fread(&sample, 1, sizeof(int), fp);
data_[i] = static_cast<float>(sample) / 32768;
}
}
else if (header.format == 3) // IEEE-float
{
float sample;
for (int i = 0; i < num_data; ++i) {
fread(&sample, 1, sizeof(float), fp);
data_[i] = static_cast<float>(sample);
}
}
else {
printf("unsupported quantization bits\n");
}
break;
} }
default: default:
printf("unsupported quantization bits\n"); printf("unsupported quantization bits\n");
exit(1); break;
}
} }
fclose(fp); fclose(fp);
return true; return true;
} }