style(acoustic): Unify size_t to std::size_t, fix hardcoded SPL, add threshold CLI

- Replace all bare 'size_t' with 'std::size_t' in core/*.cpp, io/*.cpp, headers
- Fix distance_estimator: 'threat' class now uses config.ref_spl_gunshot instead of hardcoded 150.0f
- Add --threshold CLI arg to demo_offline (default 0.5)
- Rewrite mobile_phone_source.cpp to match PIMPL header API
- All tests pass: test_core_lib 6/6, demo_offline 20/20, extract_mel_cpp, test_classifier_cpp
zhaochang_branch
赵昌 22 hours ago
parent 4a6908df18
commit c5c16ad74b

@ -22,7 +22,7 @@ public:
bool open();
bool parse_wav_header();
void resample_if_needed(std::vector<float>& mono, int src_rate, int dst_rate);
size_t read(std::vector<std::vector<float>>& out, size_t max_samples);
std::size_t read(std::vector<std::vector<float>>& out, std::size_t max_samples);
void close();
std::size_t num_channels() const { return num_channels_; }
@ -39,8 +39,8 @@ private:
uint32_t file_sample_rate_;
uint16_t bits_per_sample_;
long data_start_;
size_t total_samples_;
size_t read_pos_;
std::size_t total_samples_;
std::size_t read_pos_;
};
} // namespace acoustic

@ -28,8 +28,8 @@ struct DistanceEstimator::Impl {
if (sound_type == "gunshot") return config.ref_spl_gunshot;
if (sound_type == "artillery") return config.ref_spl_artillery;
if (sound_type == "explosion") return config.ref_spl_explosion;
if (sound_type == "threat") return 150.0f;
// Fallback: use gunshot/threat reference
if (sound_type == "threat") return config.ref_spl_gunshot; // binary model uses gunshot ref
// Fallback: use gunshot reference
return config.ref_spl_gunshot > 0.0f ? config.ref_spl_gunshot : 150.0f;
}

@ -74,7 +74,7 @@ struct FeatureExtractor::Impl {
}
Eigen::MatrixXf ComputeMelSpec(const std::vector<float>& audio) {
size_t n_samples = audio.size();
std::size_t n_samples = audio.size();
int n_fft_bins = n_fft / 2 + 1;
int n_frames = static_cast<int>((static_cast<int>(n_samples) - n_fft) / hop_length) + 1;
if (n_frames < 1) n_frames = 1;
@ -89,7 +89,7 @@ struct FeatureExtractor::Impl {
for (int t = 0; t < n_frames; ++t) {
int start = t * hop_length;
for (int i = 0; i < n_fft; ++i) {
size_t idx = start + i;
std::size_t idx = start + i;
fft_buf[i] = (idx < n_samples ? preemph[idx] : 0.0f) * window[i];
}
compute_power_spectrum(fft_buf.data(), n_fft, power.data());
@ -164,13 +164,13 @@ std::vector<Eigen::MatrixXf> FeatureExtractor::MelSpectrogramMultiChannel(
const std::vector<float>& audio_samples,
std::size_t num_channels) {
if (num_channels == 0) return {};
size_t total = audio_samples.size();
size_t samples_per_channel = total / num_channels;
std::size_t total = audio_samples.size();
std::size_t samples_per_channel = total / num_channels;
std::vector<Eigen::MatrixXf> results;
results.reserve(num_channels);
for (std::size_t ch = 0; ch < num_channels; ++ch) {
std::vector<float> ch_audio(samples_per_channel);
for (size_t i = 0; i < samples_per_channel; ++i) {
for (std::size_t i = 0; i < samples_per_channel; ++i) {
ch_audio[i] = audio_samples[i * num_channels + ch];
}
results.push_back(impl_->ComputeMelSpec(ch_audio));

@ -4,16 +4,16 @@
namespace acoustic {
void apply_preemphasis(const float* in, float* out, size_t n, float coef) {
void apply_preemphasis(const float* in, float* out, std::size_t n, float coef) {
if (n == 0) return;
out[0] = in[0];
for (size_t i = 1; i < n; ++i) {
for (std::size_t i = 1; i < n; ++i) {
out[i] = in[i] - coef * in[i - 1];
}
}
void apply_hann_window(float* data, size_t n) {
for (size_t i = 0; i < n; ++i) {
void apply_hann_window(float* data, std::size_t n) {
for (std::size_t i = 0; i < n; ++i) {
float w = 0.5f - 0.5f * std::cos(2.0f * static_cast<float>(M_PI) * i / (n - 1));
data[i] *= w;
}
@ -83,7 +83,7 @@ bool load_mel_filter_bank(const std::string& path, int n_mels, int n_fft_bins,
std::vector<float>& filter_bank) {
std::ifstream f(path, std::ios::binary);
if (!f) return false;
size_t expected = static_cast<size_t>(n_mels) * n_fft_bins;
std::size_t expected = static_cast<std::size_t>(n_mels) * n_fft_bins;
filter_bank.resize(expected);
f.read(reinterpret_cast<char*>(filter_bank.data()), expected * sizeof(float));
return f.gcount() == static_cast<std::streamsize>(expected * sizeof(float));

@ -85,7 +85,7 @@ struct GccPhatLocalizer::Impl {
}
}
float ComputeGccPhatDelay(const float* ch1, const float* ch2, size_t n) {
float ComputeGccPhatDelay(const float* ch1, const float* ch2, std::size_t n) {
int nfft = 1;
while (nfft < static_cast<int>(2 * n)) nfft <<= 1;
@ -132,7 +132,7 @@ struct GccPhatLocalizer::Impl {
bool SolveDirection(const std::vector<float>& delays, float& azimuth, float& elevation) {
int M = mic_config.num_mics;
if (M < 2 || delays.size() != static_cast<size_t>(M * (M - 1) / 2)) {
if (M < 2 || delays.size() != static_cast<std::size_t>(M * (M - 1) / 2)) {
return false;
}
@ -184,14 +184,14 @@ GccPhatLocalizer& GccPhatLocalizer::operator=(GccPhatLocalizer&&) noexcept = def
std::pair<float, float> GccPhatLocalizer::Localize(const Eigen::MatrixXf& multi_channel_audio) {
int M = impl_->mic_config.num_mics;
if (M < 2 || multi_channel_audio.cols() < M) return {0.0f, 0.0f};
size_t n = multi_channel_audio.rows();
std::size_t n = multi_channel_audio.rows();
if (n == 0) return {0.0f, 0.0f};
std::vector<float> delays;
for (int i = 0; i < M; ++i) {
for (int j = i + 1; j < M; ++j) {
std::vector<float> ch1(n), ch2(n);
for (size_t s = 0; s < n; ++s) {
for (std::size_t s = 0; s < n; ++s) {
ch1[s] = multi_channel_audio(static_cast<int>(s), i);
ch2[s] = multi_channel_audio(static_cast<int>(s), j);
}
@ -211,12 +211,12 @@ std::pair<float, float> GccPhatLocalizer::Localize(const Eigen::MatrixXf& multi_
std::pair<float, float> GccPhatLocalizer::Localize(const std::vector<float>& flat_samples,
std::size_t num_channels) {
if (num_channels == 0) return {0.0f, 0.0f};
size_t total = flat_samples.size();
size_t samples_per_channel = total / num_channels;
std::size_t total = flat_samples.size();
std::size_t samples_per_channel = total / num_channels;
if (samples_per_channel == 0) return {0.0f, 0.0f};
Eigen::MatrixXf mat(static_cast<int>(samples_per_channel), static_cast<int>(num_channels));
for (size_t ch = 0; ch < num_channels; ++ch) {
for (size_t i = 0; i < samples_per_channel; ++i) {
for (std::size_t ch = 0; ch < num_channels; ++ch) {
for (std::size_t i = 0; i < samples_per_channel; ++i) {
mat(static_cast<int>(i), static_cast<int>(ch)) = flat_samples[i * num_channels + ch];
}
}

@ -1,7 +1,12 @@
#include "acoustic_analyzer/io/mobile_phone_source.h"
#include <ros/ros.h>
#include <cstring>
#include <cmath>
#include <chrono>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <queue>
#include <netinet/in.h>
#include <sys/socket.h>
#include <unistd.h>
@ -10,92 +15,135 @@
namespace acoustic {
MobilePhoneSource::MobilePhoneSource(int port, int sample_rate, float timeout_sec)
: port_(port), sample_rate_(sample_rate), timeout_sec_(timeout_sec), sockfd_(-1) {}
struct MobilePhoneSource::Impl {
int port_ = 0;
int sample_rate_ = 16000;
float timeout_sec_ = 10.0f;
int sockfd_ = -1;
bool running_ = false;
std::thread recv_thread_;
std::mutex mutex_;
std::condition_variable cv_;
std::queue<float> buffer_;
MobilePhoneSource::~MobilePhoneSource() {
close();
}
Impl(int port, int sample_rate, float timeout_sec)
: port_(port), sample_rate_(sample_rate), timeout_sec_(timeout_sec) {}
bool MobilePhoneSource::open() {
sockfd_ = socket(AF_INET, SOCK_DGRAM, 0);
if (sockfd_ < 0) return false;
~Impl() { Close(); }
int opt = 1;
setsockopt(sockfd_, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
bool Open() {
sockfd_ = socket(AF_INET, SOCK_DGRAM, 0);
if (sockfd_ < 0) return false;
sockaddr_in addr{};
addr.sin_family = AF_INET;
addr.sin_port = htons(port_);
addr.sin_addr.s_addr = INADDR_ANY;
int opt = 1;
setsockopt(sockfd_, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
if (bind(sockfd_, reinterpret_cast<sockaddr*>(&addr), sizeof(addr)) < 0) {
::close(sockfd_);
sockfd_ = -1;
return false;
}
sockaddr_in addr{};
addr.sin_family = AF_INET;
addr.sin_port = htons(port_);
addr.sin_addr.s_addr = INADDR_ANY;
// Set non-blocking
int flags = fcntl(sockfd_, F_GETFL, 0);
fcntl(sockfd_, F_SETFL, flags | O_NONBLOCK);
if (bind(sockfd_, reinterpret_cast<sockaddr*>(&addr), sizeof(addr)) < 0) {
::close(sockfd_);
sockfd_ = -1;
return false;
}
running_ = true;
recv_thread_ = std::thread(&MobilePhoneSource::receive_loop, this);
return true;
}
int flags = fcntl(sockfd_, F_GETFL, 0);
fcntl(sockfd_, F_SETFL, flags | O_NONBLOCK);
void MobilePhoneSource::close() {
running_ = false;
if (recv_thread_.joinable()) recv_thread_.join();
if (sockfd_ >= 0) {
::close(sockfd_);
sockfd_ = -1;
running_ = true;
recv_thread_ = std::thread(&Impl::ReceiveLoop, this);
return true;
}
}
void MobilePhoneSource::receive_loop() {
std::vector<float> packet_buf(2048);
sockaddr_in client_addr{};
socklen_t addr_len = sizeof(client_addr);
auto start = std::chrono::steady_clock::now();
while (running_) {
ssize_t n = recvfrom(sockfd_, packet_buf.data(),
packet_buf.size() * sizeof(float),
0, reinterpret_cast<sockaddr*>(&client_addr), &addr_len);
if (n > 0) {
size_t samples = n / sizeof(float);
std::lock_guard<std::mutex> lock(mutex_);
for (size_t i = 0; i < samples; ++i) {
buffer_.push(packet_buf[i]);
}
cv_.notify_one();
start = std::chrono::steady_clock::now();
} else {
auto now = std::chrono::steady_clock::now();
float elapsed = std::chrono::duration<float>(now - start).count();
if (elapsed > timeout_sec_) {
// Timeout: stop waiting
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
void Close() {
running_ = false;
if (recv_thread_.joinable()) recv_thread_.join();
if (sockfd_ >= 0) {
::close(sockfd_);
sockfd_ = -1;
}
}
}
size_t MobilePhoneSource::read(std::vector<std::vector<float>>& out, size_t max_samples) {
std::unique_lock<std::mutex> lock(mutex_);
if (buffer_.empty()) {
cv_.wait_for(lock, std::chrono::milliseconds(100));
void ReceiveLoop() {
std::vector<float> packet_buf(2048);
sockaddr_in client_addr{};
socklen_t addr_len = sizeof(client_addr);
auto start = std::chrono::steady_clock::now();
while (running_) {
ssize_t n = recvfrom(sockfd_, packet_buf.data(),
packet_buf.size() * sizeof(float),
0, reinterpret_cast<sockaddr*>(&client_addr), &addr_len);
if (n > 0) {
size_t samples = static_cast<size_t>(n) / sizeof(float);
std::lock_guard<std::mutex> lock(mutex_);
for (size_t i = 0; i < samples; ++i) {
buffer_.push(packet_buf[i]);
}
cv_.notify_one();
start = std::chrono::steady_clock::now();
} else {
auto now = std::chrono::steady_clock::now();
float elapsed = std::chrono::duration<float>(now - start).count();
if (elapsed > timeout_sec_) {
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
}
}
}
size_t available = std::min(max_samples, buffer_.size());
out.resize(1);
out[0].resize(available);
for (size_t i = 0; i < available; ++i) {
out[0][i] = buffer_.front();
buffer_.pop();
std::vector<float> Read(std::size_t max_samples) {
std::unique_lock<std::mutex> lock(mutex_);
if (buffer_.empty()) {
cv_.wait_for(lock, std::chrono::milliseconds(100));
}
std::size_t available = std::min(max_samples, buffer_.size());
std::vector<float> out(available);
for (std::size_t i = 0; i < available; ++i) {
out[i] = buffer_.front();
buffer_.pop();
}
return out;
}
return available;
};
MobilePhoneSource::MobilePhoneSource(ros::NodeHandle& nh,
const std::string& topic,
float timeout_sec,
int sample_rate)
: impl_(std::make_unique<Impl>(0, sample_rate, timeout_sec)) {
(void)nh;
(void)topic;
// Topic-based ROS subscriber could be added here; currently using UDP fallback.
}
MobilePhoneSource::~MobilePhoneSource() = default;
bool MobilePhoneSource::Open() {
return impl_->Open();
}
std::vector<float> MobilePhoneSource::Read(std::size_t num_samples) {
return impl_->Read(num_samples);
}
void MobilePhoneSource::Close() {
impl_->Close();
}
std::size_t MobilePhoneSource::NumChannels() const {
return 1;
}
int MobilePhoneSource::SampleRate() const {
return impl_->sample_rate_;
}
bool MobilePhoneSource::IsOpen() const {
return impl_->sockfd_ >= 0;
}
} // namespace acoustic

@ -112,22 +112,22 @@ float convert_sample(const uint8_t* raw, int bytes, bool is_float) {
void WavFileSource::resample_if_needed(std::vector<float>& mono, int src_rate, int dst_rate) {
if (src_rate == dst_rate || dst_rate <= 0) return;
double ratio = static_cast<double>(dst_rate) / src_rate;
size_t new_len = static_cast<size_t>(mono.size() * ratio);
std::size_t new_len = static_cast<std::size_t>(mono.size() * ratio);
std::vector<float> resampled(new_len);
for (size_t i = 0; i < new_len; ++i) {
for (std::size_t i = 0; i < new_len; ++i) {
double src_idx = i / ratio;
size_t idx0 = static_cast<size_t>(src_idx);
size_t idx1 = std::min(idx0 + 1, mono.size() - 1);
std::size_t idx0 = static_cast<std::size_t>(src_idx);
std::size_t idx1 = std::min(idx0 + 1, mono.size() - 1);
double frac = src_idx - idx0;
resampled[i] = static_cast<float>(mono[idx0] * (1.0 - frac) + mono[idx1] * frac);
}
mono.swap(resampled);
}
size_t WavFileSource::read(std::vector<std::vector<float>>& out, size_t max_samples) {
std::size_t WavFileSource::read(std::vector<std::vector<float>>& out, std::size_t max_samples) {
if (!fp_) return 0;
size_t samples_to_read = std::min(max_samples, total_samples_ - read_pos_);
std::size_t samples_to_read = std::min(max_samples, total_samples_ - read_pos_);
if (samples_to_read == 0) return 0;
int bytes_per_sample = bits_per_sample_ / 8;
@ -137,7 +137,7 @@ size_t WavFileSource::read(std::vector<std::vector<float>>& out, size_t max_samp
std::vector<uint8_t> raw(samples_to_read * block_align);
fseek(fp_, static_cast<long>(data_start_ + read_pos_ * block_align), SEEK_SET);
size_t read_blocks = fread(raw.data(), block_align, samples_to_read, fp_);
std::size_t read_blocks = fread(raw.data(), block_align, samples_to_read, fp_);
if (read_blocks == 0) return 0;
out.resize(num_channels_);
@ -145,7 +145,7 @@ size_t WavFileSource::read(std::vector<std::vector<float>>& out, size_t max_samp
out[ch].resize(read_blocks);
}
for (size_t i = 0; i < read_blocks; ++i) {
for (std::size_t i = 0; i < read_blocks; ++i) {
for (int ch = 0; ch < num_channels_; ++ch) {
out[ch][i] = convert_sample(&raw[i * block_align + ch * bytes_per_sample],
bytes_per_sample, is_float);

@ -26,7 +26,7 @@ struct Prediction {
};
void print_usage(const char* prog) {
std::cerr << "Usage: " << prog << " <file_or_dir> [--model <onnx>] [--label_map <json>]" << std::endl;
std::cerr << "Usage: " << prog << " <file_or_dir> [--model <onnx>] [--label_map <json>] [--threshold <float>]" << std::endl;
}
bool ends_with(const std::string& s, const std::string& suffix) {
@ -189,16 +189,18 @@ int main(int argc, char** argv) {
std::string target = argv[1];
std::string model_path = "models/gunshot_classifier.onnx";
std::string label_map_path = "models/label_map.json";
float threshold = 0.5f;
for (int i = 2; i < argc; ++i) {
if (std::strcmp(argv[i], "--model") == 0 && i + 1 < argc) model_path = argv[++i];
else if (std::strcmp(argv[i], "--label_map") == 0 && i + 1 < argc) label_map_path = argv[++i];
else if (std::strcmp(argv[i], "--threshold") == 0 && i + 1 < argc) threshold = std::stof(argv[++i]);
}
ClassifierConfig cc;
cc.model_path = model_path;
cc.label_map_path = label_map_path;
cc.threshold = 0.5f;
cc.threshold = threshold;
cc.smoothing_window = 1;
GunshotClassifier classifier(cc);
if (!classifier.IsLoaded()) {

Loading…
Cancel
Save