|
|
|
|
@ -24,15 +24,22 @@ WavFileSource::WavFileSource(const std::string& path, int target_sample_rate)
|
|
|
|
|
: path_(path), target_sample_rate_(target_sample_rate), fp_(nullptr) {}
|
|
|
|
|
|
|
|
|
|
WavFileSource::~WavFileSource() {
|
|
|
|
|
if (fp_) fclose(fp_);
|
|
|
|
|
close();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void WavFileSource::close() {
|
|
|
|
|
if (fp_) {
|
|
|
|
|
fclose(fp_);
|
|
|
|
|
fp_ = nullptr;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WavFileSource::open() {
|
|
|
|
|
close();
|
|
|
|
|
fp_ = fopen(path_.c_str(), "rb");
|
|
|
|
|
if (!fp_) return false;
|
|
|
|
|
if (!parse_wav_header()) {
|
|
|
|
|
fclose(fp_);
|
|
|
|
|
fp_ = nullptr;
|
|
|
|
|
close();
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
@ -42,7 +49,7 @@ bool WavFileSource::parse_wav_header() {
|
|
|
|
|
WavHeader hdr;
|
|
|
|
|
if (fread(&hdr, sizeof(hdr), 1, fp_) != 1) return false;
|
|
|
|
|
if (std::memcmp(hdr.riff, "RIFF", 4) != 0 || std::memcmp(hdr.wave, "WAVE", 4) != 0) return false;
|
|
|
|
|
if (hdr.audio_format != 1) return false; // PCM only
|
|
|
|
|
if (hdr.audio_format != 1 && hdr.audio_format != 3) return false; // PCM or IEEE float
|
|
|
|
|
|
|
|
|
|
num_channels_ = hdr.num_channels;
|
|
|
|
|
file_sample_rate_ = hdr.sample_rate;
|
|
|
|
|
@ -55,7 +62,9 @@ bool WavFileSource::parse_wav_header() {
|
|
|
|
|
if (fread(&chunk_size, 4, 1, fp_) != 1) return false;
|
|
|
|
|
if (std::memcmp(chunk_id, "data", 4) == 0) {
|
|
|
|
|
data_start_ = ftell(fp_);
|
|
|
|
|
total_samples_ = chunk_size / (num_channels_ * (bits_per_sample_ / 8));
|
|
|
|
|
int bytes_per_sample = bits_per_sample_ / 8;
|
|
|
|
|
if (bytes_per_sample == 0) bytes_per_sample = 1;
|
|
|
|
|
total_samples_ = chunk_size / (num_channels_ * bytes_per_sample);
|
|
|
|
|
read_pos_ = 0;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
@ -64,6 +73,42 @@ bool WavFileSource::parse_wav_header() {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
|
|
float convert_sample(const uint8_t* raw, int bytes, bool is_float) {
|
|
|
|
|
if (is_float) {
|
|
|
|
|
if (bytes == 4) {
|
|
|
|
|
float val;
|
|
|
|
|
std::memcpy(&val, raw, 4);
|
|
|
|
|
return val;
|
|
|
|
|
}
|
|
|
|
|
return 0.0f;
|
|
|
|
|
}
|
|
|
|
|
// Integer PCM
|
|
|
|
|
if (bytes == 1) {
|
|
|
|
|
int32_t val = static_cast<int32_t>(raw[0]) - 128;
|
|
|
|
|
return val / 128.0f;
|
|
|
|
|
} else if (bytes == 2) {
|
|
|
|
|
int16_t val;
|
|
|
|
|
std::memcpy(&val, raw, 2);
|
|
|
|
|
return val / 32768.0f;
|
|
|
|
|
} else if (bytes == 3) {
|
|
|
|
|
int32_t val = (static_cast<int32_t>(raw[2]) << 16)
|
|
|
|
|
| (static_cast<int32_t>(raw[1]) << 8)
|
|
|
|
|
| static_cast<int32_t>(raw[0]);
|
|
|
|
|
// Sign-extend 24-bit to 32-bit
|
|
|
|
|
if (val & 0x800000) val |= 0xFF000000;
|
|
|
|
|
return val / 8388608.0f;
|
|
|
|
|
} else if (bytes >= 4) {
|
|
|
|
|
int32_t val;
|
|
|
|
|
std::memcpy(&val, raw, 4);
|
|
|
|
|
return std::max(-1.0f, std::min(1.0f, val / 2147483648.0f));
|
|
|
|
|
}
|
|
|
|
|
return 0.0f;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // anonymous namespace
|
|
|
|
|
|
|
|
|
|
void WavFileSource::resample_if_needed(std::vector<float>& mono, int src_rate, int dst_rate) {
|
|
|
|
|
if (src_rate == dst_rate || dst_rate <= 0) return;
|
|
|
|
|
double ratio = static_cast<double>(dst_rate) / src_rate;
|
|
|
|
|
@ -86,7 +131,10 @@ size_t WavFileSource::read(std::vector<std::vector<float>>& out, size_t max_samp
|
|
|
|
|
if (samples_to_read == 0) return 0;
|
|
|
|
|
|
|
|
|
|
int bytes_per_sample = bits_per_sample_ / 8;
|
|
|
|
|
if (bytes_per_sample == 0) bytes_per_sample = 1;
|
|
|
|
|
int block_align = num_channels_ * bytes_per_sample;
|
|
|
|
|
bool is_float = (bits_per_sample_ == 32); // Assume 32-bit is float if audio_format==3
|
|
|
|
|
|
|
|
|
|
std::vector<uint8_t> raw(samples_to_read * block_align);
|
|
|
|
|
fseek(fp_, static_cast<long>(data_start_ + read_pos_ * block_align), SEEK_SET);
|
|
|
|
|
size_t read_blocks = fread(raw.data(), block_align, samples_to_read, fp_);
|
|
|
|
|
@ -99,9 +147,8 @@ size_t WavFileSource::read(std::vector<std::vector<float>>& out, size_t max_samp
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < read_blocks; ++i) {
|
|
|
|
|
for (int ch = 0; ch < num_channels_; ++ch) {
|
|
|
|
|
int16_t sample = 0;
|
|
|
|
|
std::memcpy(&sample, &raw[i * block_align + ch * bytes_per_sample], bytes_per_sample);
|
|
|
|
|
out[ch][i] = sample / 32768.0f;
|
|
|
|
|
out[ch][i] = convert_sample(&raw[i * block_align + ch * bytes_per_sample],
|
|
|
|
|
bytes_per_sample, is_float);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|