diff --git a/src/drone-software/src/acoustic/build/demo_offline.exe b/src/drone-software/src/acoustic/build/demo_offline.exe index 312871c..1a4091a 100644 Binary files a/src/drone-software/src/acoustic/build/demo_offline.exe and b/src/drone-software/src/acoustic/build/demo_offline.exe differ diff --git a/src/drone-software/src/acoustic/build/extract_mel_cpp.exe b/src/drone-software/src/acoustic/build/extract_mel_cpp.exe index ce056fb..4a04e4e 100644 Binary files a/src/drone-software/src/acoustic/build/extract_mel_cpp.exe and b/src/drone-software/src/acoustic/build/extract_mel_cpp.exe differ diff --git a/src/drone-software/src/acoustic/build/test_classifier_cpp.exe b/src/drone-software/src/acoustic/build/test_classifier_cpp.exe index 8700e6b..3320b8a 100644 Binary files a/src/drone-software/src/acoustic/build/test_classifier_cpp.exe and b/src/drone-software/src/acoustic/build/test_classifier_cpp.exe differ diff --git a/src/drone-software/src/acoustic/build/test_core_lib.exe b/src/drone-software/src/acoustic/build/test_core_lib.exe index c751169..7e1fbd0 100644 Binary files a/src/drone-software/src/acoustic/build/test_core_lib.exe and b/src/drone-software/src/acoustic/build/test_core_lib.exe differ diff --git a/src/drone-software/src/acoustic/include/acoustic_analyzer/io/wav_file_source.h b/src/drone-software/src/acoustic/include/acoustic_analyzer/io/wav_file_source.h index d71ae0e..5898cb5 100644 --- a/src/drone-software/src/acoustic/include/acoustic_analyzer/io/wav_file_source.h +++ b/src/drone-software/src/acoustic/include/acoustic_analyzer/io/wav_file_source.h @@ -22,7 +22,7 @@ public: bool open(); bool parse_wav_header(); void resample_if_needed(std::vector& mono, int src_rate, int dst_rate); - size_t read(std::vector>& out, size_t max_samples); + std::size_t read(std::vector>& out, std::size_t max_samples); void close(); std::size_t num_channels() const { return num_channels_; } @@ -39,8 +39,8 @@ private: uint32_t file_sample_rate_; uint16_t bits_per_sample_; long data_start_; - size_t total_samples_; - size_t read_pos_; + std::size_t total_samples_; + std::size_t read_pos_; }; } // namespace acoustic diff --git a/src/drone-software/src/acoustic/src/core/distance_estimator.cpp b/src/drone-software/src/acoustic/src/core/distance_estimator.cpp index c2407d7..59a7a6e 100644 --- a/src/drone-software/src/acoustic/src/core/distance_estimator.cpp +++ b/src/drone-software/src/acoustic/src/core/distance_estimator.cpp @@ -28,8 +28,8 @@ struct DistanceEstimator::Impl { if (sound_type == "gunshot") return config.ref_spl_gunshot; if (sound_type == "artillery") return config.ref_spl_artillery; if (sound_type == "explosion") return config.ref_spl_explosion; - if (sound_type == "threat") return 150.0f; - // Fallback: use gunshot/threat reference + if (sound_type == "threat") return config.ref_spl_gunshot; // binary model uses gunshot ref + // Fallback: use gunshot reference return config.ref_spl_gunshot > 0.0f ? config.ref_spl_gunshot : 150.0f; } diff --git a/src/drone-software/src/acoustic/src/core/feature_extractor.cpp b/src/drone-software/src/acoustic/src/core/feature_extractor.cpp index 4c79e66..4cfa66b 100644 --- a/src/drone-software/src/acoustic/src/core/feature_extractor.cpp +++ b/src/drone-software/src/acoustic/src/core/feature_extractor.cpp @@ -74,7 +74,7 @@ struct FeatureExtractor::Impl { } Eigen::MatrixXf ComputeMelSpec(const std::vector& audio) { - size_t n_samples = audio.size(); + std::size_t n_samples = audio.size(); int n_fft_bins = n_fft / 2 + 1; int n_frames = static_cast((static_cast(n_samples) - n_fft) / hop_length) + 1; if (n_frames < 1) n_frames = 1; @@ -89,7 +89,7 @@ struct FeatureExtractor::Impl { for (int t = 0; t < n_frames; ++t) { int start = t * hop_length; for (int i = 0; i < n_fft; ++i) { - size_t idx = start + i; + std::size_t idx = start + i; fft_buf[i] = (idx < n_samples ? preemph[idx] : 0.0f) * window[i]; } compute_power_spectrum(fft_buf.data(), n_fft, power.data()); @@ -164,13 +164,13 @@ std::vector FeatureExtractor::MelSpectrogramMultiChannel( const std::vector& audio_samples, std::size_t num_channels) { if (num_channels == 0) return {}; - size_t total = audio_samples.size(); - size_t samples_per_channel = total / num_channels; + std::size_t total = audio_samples.size(); + std::size_t samples_per_channel = total / num_channels; std::vector results; results.reserve(num_channels); for (std::size_t ch = 0; ch < num_channels; ++ch) { std::vector ch_audio(samples_per_channel); - for (size_t i = 0; i < samples_per_channel; ++i) { + for (std::size_t i = 0; i < samples_per_channel; ++i) { ch_audio[i] = audio_samples[i * num_channels + ch]; } results.push_back(impl_->ComputeMelSpec(ch_audio)); diff --git a/src/drone-software/src/acoustic/src/core/fft_utils.cpp b/src/drone-software/src/acoustic/src/core/fft_utils.cpp index 6f790ed..7bdab56 100644 --- a/src/drone-software/src/acoustic/src/core/fft_utils.cpp +++ b/src/drone-software/src/acoustic/src/core/fft_utils.cpp @@ -4,16 +4,16 @@ namespace acoustic { -void apply_preemphasis(const float* in, float* out, size_t n, float coef) { +void apply_preemphasis(const float* in, float* out, std::size_t n, float coef) { if (n == 0) return; out[0] = in[0]; - for (size_t i = 1; i < n; ++i) { + for (std::size_t i = 1; i < n; ++i) { out[i] = in[i] - coef * in[i - 1]; } } -void apply_hann_window(float* data, size_t n) { - for (size_t i = 0; i < n; ++i) { +void apply_hann_window(float* data, std::size_t n) { + for (std::size_t i = 0; i < n; ++i) { float w = 0.5f - 0.5f * std::cos(2.0f * static_cast(M_PI) * i / (n - 1)); data[i] *= w; } @@ -83,7 +83,7 @@ bool load_mel_filter_bank(const std::string& path, int n_mels, int n_fft_bins, std::vector& filter_bank) { std::ifstream f(path, std::ios::binary); if (!f) return false; - size_t expected = static_cast(n_mels) * n_fft_bins; + std::size_t expected = static_cast(n_mels) * n_fft_bins; filter_bank.resize(expected); f.read(reinterpret_cast(filter_bank.data()), expected * sizeof(float)); return f.gcount() == static_cast(expected * sizeof(float)); diff --git a/src/drone-software/src/acoustic/src/core/gcc_phat_localizer.cpp b/src/drone-software/src/acoustic/src/core/gcc_phat_localizer.cpp index 66194a0..b8915ba 100644 --- a/src/drone-software/src/acoustic/src/core/gcc_phat_localizer.cpp +++ b/src/drone-software/src/acoustic/src/core/gcc_phat_localizer.cpp @@ -85,7 +85,7 @@ struct GccPhatLocalizer::Impl { } } - float ComputeGccPhatDelay(const float* ch1, const float* ch2, size_t n) { + float ComputeGccPhatDelay(const float* ch1, const float* ch2, std::size_t n) { int nfft = 1; while (nfft < static_cast(2 * n)) nfft <<= 1; @@ -132,7 +132,7 @@ struct GccPhatLocalizer::Impl { bool SolveDirection(const std::vector& delays, float& azimuth, float& elevation) { int M = mic_config.num_mics; - if (M < 2 || delays.size() != static_cast(M * (M - 1) / 2)) { + if (M < 2 || delays.size() != static_cast(M * (M - 1) / 2)) { return false; } @@ -184,14 +184,14 @@ GccPhatLocalizer& GccPhatLocalizer::operator=(GccPhatLocalizer&&) noexcept = def std::pair GccPhatLocalizer::Localize(const Eigen::MatrixXf& multi_channel_audio) { int M = impl_->mic_config.num_mics; if (M < 2 || multi_channel_audio.cols() < M) return {0.0f, 0.0f}; - size_t n = multi_channel_audio.rows(); + std::size_t n = multi_channel_audio.rows(); if (n == 0) return {0.0f, 0.0f}; std::vector delays; for (int i = 0; i < M; ++i) { for (int j = i + 1; j < M; ++j) { std::vector ch1(n), ch2(n); - for (size_t s = 0; s < n; ++s) { + for (std::size_t s = 0; s < n; ++s) { ch1[s] = multi_channel_audio(static_cast(s), i); ch2[s] = multi_channel_audio(static_cast(s), j); } @@ -211,12 +211,12 @@ std::pair GccPhatLocalizer::Localize(const Eigen::MatrixXf& multi_ std::pair GccPhatLocalizer::Localize(const std::vector& flat_samples, std::size_t num_channels) { if (num_channels == 0) return {0.0f, 0.0f}; - size_t total = flat_samples.size(); - size_t samples_per_channel = total / num_channels; + std::size_t total = flat_samples.size(); + std::size_t samples_per_channel = total / num_channels; if (samples_per_channel == 0) return {0.0f, 0.0f}; Eigen::MatrixXf mat(static_cast(samples_per_channel), static_cast(num_channels)); - for (size_t ch = 0; ch < num_channels; ++ch) { - for (size_t i = 0; i < samples_per_channel; ++i) { + for (std::size_t ch = 0; ch < num_channels; ++ch) { + for (std::size_t i = 0; i < samples_per_channel; ++i) { mat(static_cast(i), static_cast(ch)) = flat_samples[i * num_channels + ch]; } } diff --git a/src/drone-software/src/acoustic/src/io/mobile_phone_source.cpp b/src/drone-software/src/acoustic/src/io/mobile_phone_source.cpp index 4e8b8db..7b58a43 100644 --- a/src/drone-software/src/acoustic/src/io/mobile_phone_source.cpp +++ b/src/drone-software/src/acoustic/src/io/mobile_phone_source.cpp @@ -1,7 +1,12 @@ #include "acoustic_analyzer/io/mobile_phone_source.h" +#include #include #include #include +#include +#include +#include +#include #include #include #include @@ -10,92 +15,135 @@ namespace acoustic { -MobilePhoneSource::MobilePhoneSource(int port, int sample_rate, float timeout_sec) - : port_(port), sample_rate_(sample_rate), timeout_sec_(timeout_sec), sockfd_(-1) {} +struct MobilePhoneSource::Impl { + int port_ = 0; + int sample_rate_ = 16000; + float timeout_sec_ = 10.0f; + int sockfd_ = -1; + bool running_ = false; + std::thread recv_thread_; + std::mutex mutex_; + std::condition_variable cv_; + std::queue buffer_; -MobilePhoneSource::~MobilePhoneSource() { - close(); -} + Impl(int port, int sample_rate, float timeout_sec) + : port_(port), sample_rate_(sample_rate), timeout_sec_(timeout_sec) {} -bool MobilePhoneSource::open() { - sockfd_ = socket(AF_INET, SOCK_DGRAM, 0); - if (sockfd_ < 0) return false; + ~Impl() { Close(); } - int opt = 1; - setsockopt(sockfd_, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); + bool Open() { + sockfd_ = socket(AF_INET, SOCK_DGRAM, 0); + if (sockfd_ < 0) return false; - sockaddr_in addr{}; - addr.sin_family = AF_INET; - addr.sin_port = htons(port_); - addr.sin_addr.s_addr = INADDR_ANY; + int opt = 1; + setsockopt(sockfd_, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); - if (bind(sockfd_, reinterpret_cast(&addr), sizeof(addr)) < 0) { - ::close(sockfd_); - sockfd_ = -1; - return false; - } + sockaddr_in addr{}; + addr.sin_family = AF_INET; + addr.sin_port = htons(port_); + addr.sin_addr.s_addr = INADDR_ANY; - // Set non-blocking - int flags = fcntl(sockfd_, F_GETFL, 0); - fcntl(sockfd_, F_SETFL, flags | O_NONBLOCK); + if (bind(sockfd_, reinterpret_cast(&addr), sizeof(addr)) < 0) { + ::close(sockfd_); + sockfd_ = -1; + return false; + } - running_ = true; - recv_thread_ = std::thread(&MobilePhoneSource::receive_loop, this); - return true; -} + int flags = fcntl(sockfd_, F_GETFL, 0); + fcntl(sockfd_, F_SETFL, flags | O_NONBLOCK); -void MobilePhoneSource::close() { - running_ = false; - if (recv_thread_.joinable()) recv_thread_.join(); - if (sockfd_ >= 0) { - ::close(sockfd_); - sockfd_ = -1; + running_ = true; + recv_thread_ = std::thread(&Impl::ReceiveLoop, this); + return true; } -} -void MobilePhoneSource::receive_loop() { - std::vector packet_buf(2048); - sockaddr_in client_addr{}; - socklen_t addr_len = sizeof(client_addr); - auto start = std::chrono::steady_clock::now(); - - while (running_) { - ssize_t n = recvfrom(sockfd_, packet_buf.data(), - packet_buf.size() * sizeof(float), - 0, reinterpret_cast(&client_addr), &addr_len); - if (n > 0) { - size_t samples = n / sizeof(float); - std::lock_guard lock(mutex_); - for (size_t i = 0; i < samples; ++i) { - buffer_.push(packet_buf[i]); - } - cv_.notify_one(); - start = std::chrono::steady_clock::now(); - } else { - auto now = std::chrono::steady_clock::now(); - float elapsed = std::chrono::duration(now - start).count(); - if (elapsed > timeout_sec_) { - // Timeout: stop waiting - std::this_thread::sleep_for(std::chrono::milliseconds(10)); - } + void Close() { + running_ = false; + if (recv_thread_.joinable()) recv_thread_.join(); + if (sockfd_ >= 0) { + ::close(sockfd_); + sockfd_ = -1; } } -} -size_t MobilePhoneSource::read(std::vector>& out, size_t max_samples) { - std::unique_lock lock(mutex_); - if (buffer_.empty()) { - cv_.wait_for(lock, std::chrono::milliseconds(100)); + void ReceiveLoop() { + std::vector packet_buf(2048); + sockaddr_in client_addr{}; + socklen_t addr_len = sizeof(client_addr); + auto start = std::chrono::steady_clock::now(); + + while (running_) { + ssize_t n = recvfrom(sockfd_, packet_buf.data(), + packet_buf.size() * sizeof(float), + 0, reinterpret_cast(&client_addr), &addr_len); + if (n > 0) { + size_t samples = static_cast(n) / sizeof(float); + std::lock_guard lock(mutex_); + for (size_t i = 0; i < samples; ++i) { + buffer_.push(packet_buf[i]); + } + cv_.notify_one(); + start = std::chrono::steady_clock::now(); + } else { + auto now = std::chrono::steady_clock::now(); + float elapsed = std::chrono::duration(now - start).count(); + if (elapsed > timeout_sec_) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + } + } } - size_t available = std::min(max_samples, buffer_.size()); - out.resize(1); - out[0].resize(available); - for (size_t i = 0; i < available; ++i) { - out[0][i] = buffer_.front(); - buffer_.pop(); + std::vector Read(std::size_t max_samples) { + std::unique_lock lock(mutex_); + if (buffer_.empty()) { + cv_.wait_for(lock, std::chrono::milliseconds(100)); + } + + std::size_t available = std::min(max_samples, buffer_.size()); + std::vector out(available); + for (std::size_t i = 0; i < available; ++i) { + out[i] = buffer_.front(); + buffer_.pop(); + } + return out; } - return available; +}; + +MobilePhoneSource::MobilePhoneSource(ros::NodeHandle& nh, + const std::string& topic, + float timeout_sec, + int sample_rate) + : impl_(std::make_unique(0, sample_rate, timeout_sec)) { + (void)nh; + (void)topic; + // Topic-based ROS subscriber could be added here; currently using UDP fallback. +} + +MobilePhoneSource::~MobilePhoneSource() = default; + +bool MobilePhoneSource::Open() { + return impl_->Open(); +} + +std::vector MobilePhoneSource::Read(std::size_t num_samples) { + return impl_->Read(num_samples); +} + +void MobilePhoneSource::Close() { + impl_->Close(); +} + +std::size_t MobilePhoneSource::NumChannels() const { + return 1; +} + +int MobilePhoneSource::SampleRate() const { + return impl_->sample_rate_; +} + +bool MobilePhoneSource::IsOpen() const { + return impl_->sockfd_ >= 0; } } // namespace acoustic diff --git a/src/drone-software/src/acoustic/src/io/wav_file_source.cpp b/src/drone-software/src/acoustic/src/io/wav_file_source.cpp index f7177d4..7cb624a 100644 --- a/src/drone-software/src/acoustic/src/io/wav_file_source.cpp +++ b/src/drone-software/src/acoustic/src/io/wav_file_source.cpp @@ -112,22 +112,22 @@ float convert_sample(const uint8_t* raw, int bytes, bool is_float) { void WavFileSource::resample_if_needed(std::vector& mono, int src_rate, int dst_rate) { if (src_rate == dst_rate || dst_rate <= 0) return; double ratio = static_cast(dst_rate) / src_rate; - size_t new_len = static_cast(mono.size() * ratio); + std::size_t new_len = static_cast(mono.size() * ratio); std::vector resampled(new_len); - for (size_t i = 0; i < new_len; ++i) { + for (std::size_t i = 0; i < new_len; ++i) { double src_idx = i / ratio; - size_t idx0 = static_cast(src_idx); - size_t idx1 = std::min(idx0 + 1, mono.size() - 1); + std::size_t idx0 = static_cast(src_idx); + std::size_t idx1 = std::min(idx0 + 1, mono.size() - 1); double frac = src_idx - idx0; resampled[i] = static_cast(mono[idx0] * (1.0 - frac) + mono[idx1] * frac); } mono.swap(resampled); } -size_t WavFileSource::read(std::vector>& out, size_t max_samples) { +std::size_t WavFileSource::read(std::vector>& out, std::size_t max_samples) { if (!fp_) return 0; - size_t samples_to_read = std::min(max_samples, total_samples_ - read_pos_); + std::size_t samples_to_read = std::min(max_samples, total_samples_ - read_pos_); if (samples_to_read == 0) return 0; int bytes_per_sample = bits_per_sample_ / 8; @@ -137,7 +137,7 @@ size_t WavFileSource::read(std::vector>& out, size_t max_samp std::vector raw(samples_to_read * block_align); fseek(fp_, static_cast(data_start_ + read_pos_ * block_align), SEEK_SET); - size_t read_blocks = fread(raw.data(), block_align, samples_to_read, fp_); + std::size_t read_blocks = fread(raw.data(), block_align, samples_to_read, fp_); if (read_blocks == 0) return 0; out.resize(num_channels_); @@ -145,7 +145,7 @@ size_t WavFileSource::read(std::vector>& out, size_t max_samp out[ch].resize(read_blocks); } - for (size_t i = 0; i < read_blocks; ++i) { + for (std::size_t i = 0; i < read_blocks; ++i) { for (int ch = 0; ch < num_channels_; ++ch) { out[ch][i] = convert_sample(&raw[i * block_align + ch * bytes_per_sample], bytes_per_sample, is_float); diff --git a/src/drone-software/src/acoustic/tests/demo_offline.cpp b/src/drone-software/src/acoustic/tests/demo_offline.cpp index a027bdc..1e40dff 100644 --- a/src/drone-software/src/acoustic/tests/demo_offline.cpp +++ b/src/drone-software/src/acoustic/tests/demo_offline.cpp @@ -26,7 +26,7 @@ struct Prediction { }; void print_usage(const char* prog) { - std::cerr << "Usage: " << prog << " [--model ] [--label_map ]" << std::endl; + std::cerr << "Usage: " << prog << " [--model ] [--label_map ] [--threshold ]" << std::endl; } bool ends_with(const std::string& s, const std::string& suffix) { @@ -189,16 +189,18 @@ int main(int argc, char** argv) { std::string target = argv[1]; std::string model_path = "models/gunshot_classifier.onnx"; std::string label_map_path = "models/label_map.json"; + float threshold = 0.5f; for (int i = 2; i < argc; ++i) { if (std::strcmp(argv[i], "--model") == 0 && i + 1 < argc) model_path = argv[++i]; else if (std::strcmp(argv[i], "--label_map") == 0 && i + 1 < argc) label_map_path = argv[++i]; + else if (std::strcmp(argv[i], "--threshold") == 0 && i + 1 < argc) threshold = std::stof(argv[++i]); } ClassifierConfig cc; cc.model_path = model_path; cc.label_map_path = label_map_path; - cc.threshold = 0.5f; + cc.threshold = threshold; cc.smoothing_window = 1; GunshotClassifier classifier(cc); if (!classifier.IsLoaded()) {