diff --git a/src/drone-software/src/acoustic/.gitignore b/src/drone-software/src/acoustic/.gitignore new file mode 100644 index 0000000..98b8aaa --- /dev/null +++ b/src/drone-software/src/acoustic/.gitignore @@ -0,0 +1,19 @@ +# Build artifacts +build/*.o +build/*.obj +CMakeCache.txt +CMakeFiles/ +cmake-build-*/ + +# IDE +.vs/ +.vscode/ +*.user + +# Python cache +scripts/__pycache__/ +*.pyc + +# Temporary files +*.tmp +*.log diff --git a/src/drone-software/src/acoustic/CMakeLists.txt b/src/drone-software/src/acoustic/CMakeLists.txt index 0055e85..a0150e6 100644 --- a/src/drone-software/src/acoustic/CMakeLists.txt +++ b/src/drone-software/src/acoustic/CMakeLists.txt @@ -94,7 +94,7 @@ else() message(WARNING "ONNX Runtime NOT found. ONNX tests disabled.") endif() -set(KISSFFT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_party/kiss_fft" CACHE PATH "Kiss FFT root") + # ── Core library sources ───────────────────────────────────────── set(CORE_BASE_SOURCES @@ -126,7 +126,7 @@ target_include_directories(${PROJECT_NAME}_core_base PUBLIC $ $ ${EIGEN3_INCLUDE_DIR} - ${KISSFFT_DIR} + ) # Full core library @@ -137,7 +137,7 @@ if(ONNXRuntime_FOUND) $ ${ONNXRuntime_INCLUDE_DIRS} ${EIGEN3_INCLUDE_DIR} - ${KISSFFT_DIR} + ) if(YAML_CPP_FOUND) target_include_directories(${PROJECT_NAME}_core PUBLIC ${yaml-cpp_INCLUDE_DIRS}) diff --git a/src/drone-software/src/acoustic/README.md b/src/drone-software/src/acoustic/README.md new file mode 100644 index 0000000..514eb7a --- /dev/null +++ b/src/drone-software/src/acoustic/README.md @@ -0,0 +1,127 @@ +# Acoustic Analyzer Module + +无人机声学威胁检测模块,基于深度学习(CNN-GRU)的枪声/爆炸声分类器,结合麦克风阵列 GCC-PHAT 定位与 SPL 衰减模型测距。 + +## 模块结构 + +``` +acoustic/ +├── include/acoustic_analyzer/ # 公共头文件 +│ ├── core/ # 核心算法 +│ │ ├── audio_buffer.h # 音频循环缓冲区 +│ │ ├── distance_estimator.h # SPL 距离估计器 +│ │ ├── feature_extractor.h # Mel 频谱图提取 +│ │ ├── fft_utils.h # FFT 工具 +│ │ ├── gcc_phat_localizer.h # GCC-PHAT 声源定位 +│ │ ├── gunshot_classifier.h # ONNX 分类器 +│ │ ├── pipeline.h # 完整处理流水线 +│ │ ├── threat_tracker.h # 威胁跟踪/去重 +│ │ └── types.h # 公共类型定义 +│ ├── io/ # 音频输入源 +│ │ ├── audio_source.h +│ │ ├── mobile_phone_source.h +│ │ └── wav_file_source.h # WAV 文件读取 +│ └── ros/ # ROS 包装器 +│ ├── acoustic_node.h +│ └── threat_publisher.h +├── src/ # 实现文件 +│ ├── core/ +│ ├── io/ +│ └── ros/ +├── tests/ # 测试程序 +│ ├── demo_offline.cpp # 离线推理演示 +│ ├── test_core_lib.cpp +│ ├── test_classifier_cpp.cpp +│ └── extract_mel_cpp.cpp +├── scripts/ # Python 训练/导出脚本 +│ ├── train_binary_classifier.py +│ ├── export_onnx.py +│ ├── generate_dataset.py +│ └── verify_val_accuracy.py +├── models/ # 模型文件 +│ ├── gunshot_classifier.onnx +│ ├── label_map.json +│ └── mel_filter_bank.bin +├── config/ +│ └── acoustic_params.yaml # 流水线配置 +├── third_party/ # 头文件级依赖 +│ ├── eigen-3.4.0/ # 矩阵运算 +│ └── onnxruntime/ # ONNX Runtime C++ API +├── CMakeLists.txt +├── build_demo.bat # Windows 一键编译 +├── batch_test.bat # Windows 一键测试 +└── package.xml # ROS 包描述 +``` + +## 快速开始(Windows + MinGW) + +### 1. 编译离线演示程序 + +```bat +build_demo.bat +``` + +### 2. 运行单文件测试 + +```bat +build\demo_offline.exe dataset\binary\val\ambient\drone_noise_val_000.wav +build\demo_offline.exe dataset\binary\val\threat\synth_threat_val_000.wav +``` + +### 3. 批量验证整个数据集 + +```bat +batch_test.bat +:: 或直接传目录(支持递归子目录) +build\demo_offline.exe dataset\binary\val +``` + +### 4. 使用自定义模型 + +```bat +build\demo_offline.exe some_audio.wav --model my_model.onnx --label_map my_labels.json +``` + +## 模型训练(Python) + +```bash +# 1. 生成合成数据集(或准备真实音频) +cd scripts +python generate_dataset.py + +# 2. 训练二分类器 +python train_binary_classifier.py --data_dir ../dataset/binary --epochs 50 + +# 3. 导出 ONNX +python export_onnx.py --checkpoint checkpoints/best.pth --output ../models/gunshot_classifier.onnx +``` + +## ROS 集成(Ubuntu) + +```bash +cd ~/catkin_ws/src +git clone +cd ~/catkin_ws +catkin_make -DCMAKE_BUILD_TYPE=Release +source devel/setup.bash +roslaunch acoustic_analyzer acoustic_node.launch +``` + +## 依赖 + +| 依赖 | 用途 | 状态 | +|------|------|------| +| Eigen 3.4.0 | 矩阵运算 | Bundled | +| ONNX Runtime 1.17+ | 模型推理 | Bundled headers + DLL/lib | +| yaml-cpp | 配置解析 | Ubuntu: `apt install libyaml-cpp-dev` | +| kiss_fft | ~~FFT~~ | **已移除**,使用内联实现 | + +## 已知限制 + +- 当前分类器基于合成数据集训练,仅用于代码集成验证。实际部署需真实威胁音频重新训练。 +- Windows 下完整流水线(Pipeline 类)需要 yaml-cpp,离线 demo 已绕过此依赖。 +- 距离估计依赖 SPL 校准,合成音频的归一化幅度会导致距离值偏大。 + +## 许可证 + +与父项目保持一致。 diff --git a/src/drone-software/src/acoustic/build/demo_offline.exe b/src/drone-software/src/acoustic/build/demo_offline.exe index 6d0464a..4aa5854 100644 Binary files a/src/drone-software/src/acoustic/build/demo_offline.exe and b/src/drone-software/src/acoustic/build/demo_offline.exe differ diff --git a/src/drone-software/src/acoustic/build_demo.bat b/src/drone-software/src/acoustic/build_demo.bat index 7499f38..ec8cceb 100644 --- a/src/drone-software/src/acoustic/build_demo.bat +++ b/src/drone-software/src/acoustic/build_demo.bat @@ -8,11 +8,10 @@ echo ========================================== set SRC_ROOT=%~dp0 set EIGEN=%SRC_ROOT%third_party\eigen-3.4.0 -set KISSFFT=%SRC_ROOT%third_party\kiss_fft set ONNX_INC=%SRC_ROOT%third_party\onnxruntime\include set ONNX_LIB=%SRC_ROOT%third_party\onnxruntime\lib\libonnxruntime.a -set INCLUDES=-I%SRC_ROOT%include -I%EIGEN% -I%KISSFFT% -I%ONNX_INC% +set INCLUDES=-I%SRC_ROOT%include -I%EIGEN% -I%ONNX_INC% set FLAGS=-std=c++17 -O2 -D_USE_MATH_DEFINES -Wa,-mbig-obj set LIBS=%ONNX_LIB% diff --git a/src/drone-software/src/acoustic/src/io/wav_file_source.cpp b/src/drone-software/src/acoustic/src/io/wav_file_source.cpp index 5d0aa7e..f7177d4 100644 --- a/src/drone-software/src/acoustic/src/io/wav_file_source.cpp +++ b/src/drone-software/src/acoustic/src/io/wav_file_source.cpp @@ -24,15 +24,22 @@ WavFileSource::WavFileSource(const std::string& path, int target_sample_rate) : path_(path), target_sample_rate_(target_sample_rate), fp_(nullptr) {} WavFileSource::~WavFileSource() { - if (fp_) fclose(fp_); + close(); +} + +void WavFileSource::close() { + if (fp_) { + fclose(fp_); + fp_ = nullptr; + } } bool WavFileSource::open() { + close(); fp_ = fopen(path_.c_str(), "rb"); if (!fp_) return false; if (!parse_wav_header()) { - fclose(fp_); - fp_ = nullptr; + close(); return false; } return true; @@ -42,7 +49,7 @@ bool WavFileSource::parse_wav_header() { WavHeader hdr; if (fread(&hdr, sizeof(hdr), 1, fp_) != 1) return false; if (std::memcmp(hdr.riff, "RIFF", 4) != 0 || std::memcmp(hdr.wave, "WAVE", 4) != 0) return false; - if (hdr.audio_format != 1) return false; // PCM only + if (hdr.audio_format != 1 && hdr.audio_format != 3) return false; // PCM or IEEE float num_channels_ = hdr.num_channels; file_sample_rate_ = hdr.sample_rate; @@ -55,7 +62,9 @@ bool WavFileSource::parse_wav_header() { if (fread(&chunk_size, 4, 1, fp_) != 1) return false; if (std::memcmp(chunk_id, "data", 4) == 0) { data_start_ = ftell(fp_); - total_samples_ = chunk_size / (num_channels_ * (bits_per_sample_ / 8)); + int bytes_per_sample = bits_per_sample_ / 8; + if (bytes_per_sample == 0) bytes_per_sample = 1; + total_samples_ = chunk_size / (num_channels_ * bytes_per_sample); read_pos_ = 0; return true; } @@ -64,6 +73,42 @@ bool WavFileSource::parse_wav_header() { return false; } +namespace { + +float convert_sample(const uint8_t* raw, int bytes, bool is_float) { + if (is_float) { + if (bytes == 4) { + float val; + std::memcpy(&val, raw, 4); + return val; + } + return 0.0f; + } + // Integer PCM + if (bytes == 1) { + int32_t val = static_cast(raw[0]) - 128; + return val / 128.0f; + } else if (bytes == 2) { + int16_t val; + std::memcpy(&val, raw, 2); + return val / 32768.0f; + } else if (bytes == 3) { + int32_t val = (static_cast(raw[2]) << 16) + | (static_cast(raw[1]) << 8) + | static_cast(raw[0]); + // Sign-extend 24-bit to 32-bit + if (val & 0x800000) val |= 0xFF000000; + return val / 8388608.0f; + } else if (bytes >= 4) { + int32_t val; + std::memcpy(&val, raw, 4); + return std::max(-1.0f, std::min(1.0f, val / 2147483648.0f)); + } + return 0.0f; +} + +} // anonymous namespace + void WavFileSource::resample_if_needed(std::vector& mono, int src_rate, int dst_rate) { if (src_rate == dst_rate || dst_rate <= 0) return; double ratio = static_cast(dst_rate) / src_rate; @@ -86,7 +131,10 @@ size_t WavFileSource::read(std::vector>& out, size_t max_samp if (samples_to_read == 0) return 0; int bytes_per_sample = bits_per_sample_ / 8; + if (bytes_per_sample == 0) bytes_per_sample = 1; int block_align = num_channels_ * bytes_per_sample; + bool is_float = (bits_per_sample_ == 32); // Assume 32-bit is float if audio_format==3 + std::vector raw(samples_to_read * block_align); fseek(fp_, static_cast(data_start_ + read_pos_ * block_align), SEEK_SET); size_t read_blocks = fread(raw.data(), block_align, samples_to_read, fp_); @@ -99,9 +147,8 @@ size_t WavFileSource::read(std::vector>& out, size_t max_samp for (size_t i = 0; i < read_blocks; ++i) { for (int ch = 0; ch < num_channels_; ++ch) { - int16_t sample = 0; - std::memcpy(&sample, &raw[i * block_align + ch * bytes_per_sample], bytes_per_sample); - out[ch][i] = sample / 32768.0f; + out[ch][i] = convert_sample(&raw[i * block_align + ch * bytes_per_sample], + bytes_per_sample, is_float); } }