refactor(acoustic): Add docs, gitignore, cleanup deps, enhance WAV reader

- Add README.md with module overview, build instructions, ROS integration - Add .gitignore for build artifacts, IDE files, Python cache - Remove kiss_fft references from build_demo.bat and CMakeLists.txt - Enhance wav_file_source: support 8/16/24/32-bit PCM and 32-bit float WAV - Verify 20/20 validation accuracy maintained after all changes
20 hours ago · 8dddc3fd48
parent 6928e85232
commit 8dddc3fd48
6 changed files with 205 additions and 13 deletions
--- a/src/drone-software/src/acoustic/.gitignore
+++ b/src/drone-software/src/acoustic/.gitignore
@ -0,0 +1,19 @@
+# Build artifacts
+build/*.o
+build/*.obj
+CMakeCache.txt
+CMakeFiles/
+cmake-build-*/
+
+# IDE
+.vs/
+.vscode/
+*.user
+
+# Python cache
+scripts/__pycache__/
+*.pyc
+
+# Temporary files
+*.tmp
+*.log
--- a/src/drone-software/src/acoustic/CMakeLists.txt
+++ b/src/drone-software/src/acoustic/CMakeLists.txt
@ -94,7 +94,7 @@ else()
    message(WARNING "ONNX Runtime NOT found. ONNX tests disabled.")
 endif()

-set(KISSFFT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_party/kiss_fft" CACHE PATH "Kiss FFT root")
+

 # ── Core library sources ─────────────────────────────────────────
 set(CORE_BASE_SOURCES
@ -126,7 +126,7 @@ target_include_directories(${PROJECT_NAME}_core_base PUBLIC
    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
    $<INSTALL_INTERFACE:include>
    ${EIGEN3_INCLUDE_DIR}
-    ${KISSFFT_DIR}
+
 )

 # Full core library
@ -137,7 +137,7 @@ if(ONNXRuntime_FOUND)
        $<INSTALL_INTERFACE:include>
        ${ONNXRuntime_INCLUDE_DIRS}
        ${EIGEN3_INCLUDE_DIR}
-        ${KISSFFT_DIR}
+    
    )
    if(YAML_CPP_FOUND)
        target_include_directories(${PROJECT_NAME}_core PUBLIC ${yaml-cpp_INCLUDE_DIRS})
--- a/src/drone-software/src/acoustic/README.md
+++ b/src/drone-software/src/acoustic/README.md
@ -0,0 +1,127 @@
+# Acoustic Analyzer Module
+
+无人机声学威胁检测模块，基于深度学习（CNN-GRU）的枪声/爆炸声分类器，结合麦克风阵列 GCC-PHAT 定位与 SPL 衰减模型测距。
+
+## 模块结构
+
+```
+acoustic/
+├── include/acoustic_analyzer/     # 公共头文件
+│   ├── core/                      # 核心算法
+│   │   ├── audio_buffer.h         # 音频循环缓冲区
+│   │   ├── distance_estimator.h   # SPL 距离估计器
+│   │   ├── feature_extractor.h    # Mel 频谱图提取
+│   │   ├── fft_utils.h            # FFT 工具
+│   │   ├── gcc_phat_localizer.h   # GCC-PHAT 声源定位
+│   │   ├── gunshot_classifier.h   # ONNX 分类器
+│   │   ├── pipeline.h             # 完整处理流水线
+│   │   ├── threat_tracker.h       # 威胁跟踪/去重
+│   │   └── types.h                # 公共类型定义
+│   ├── io/                        # 音频输入源
+│   │   ├── audio_source.h
+│   │   ├── mobile_phone_source.h
+│   │   └── wav_file_source.h      # WAV 文件读取
+│   └── ros/                       # ROS 包装器
+│       ├── acoustic_node.h
+│       └── threat_publisher.h
+├── src/                           # 实现文件
+│   ├── core/
+│   ├── io/
+│   └── ros/
+├── tests/                         # 测试程序
+│   ├── demo_offline.cpp           # 离线推理演示
+│   ├── test_core_lib.cpp
+│   ├── test_classifier_cpp.cpp
+│   └── extract_mel_cpp.cpp
+├── scripts/                       # Python 训练/导出脚本
+│   ├── train_binary_classifier.py
+│   ├── export_onnx.py
+│   ├── generate_dataset.py
+│   └── verify_val_accuracy.py
+├── models/                        # 模型文件
+│   ├── gunshot_classifier.onnx
+│   ├── label_map.json
+│   └── mel_filter_bank.bin
+├── config/
+│   └── acoustic_params.yaml       # 流水线配置
+├── third_party/                   # 头文件级依赖
+│   ├── eigen-3.4.0/               # 矩阵运算
+│   └── onnxruntime/               # ONNX Runtime C++ API
+├── CMakeLists.txt
+├── build_demo.bat                 # Windows 一键编译
+├── batch_test.bat                 # Windows 一键测试
+└── package.xml                    # ROS 包描述
+```
+
+## 快速开始（Windows + MinGW）
+
+### 1. 编译离线演示程序
+
+```bat
+build_demo.bat
+```
+
+### 2. 运行单文件测试
+
+```bat
+build\demo_offline.exe dataset\binary\val\ambient\drone_noise_val_000.wav
+build\demo_offline.exe dataset\binary\val\threat\synth_threat_val_000.wav
+```
+
+### 3. 批量验证整个数据集
+
+```bat
+batch_test.bat
+:: 或直接传目录（支持递归子目录）
+build\demo_offline.exe dataset\binary\val
+```
+
+### 4. 使用自定义模型
+
+```bat
+build\demo_offline.exe some_audio.wav --model my_model.onnx --label_map my_labels.json
+```
+
+## 模型训练（Python）
+
+```bash
+# 1. 生成合成数据集（或准备真实音频）
+cd scripts
+python generate_dataset.py
+
+# 2. 训练二分类器
+python train_binary_classifier.py --data_dir ../dataset/binary --epochs 50
+
+# 3. 导出 ONNX
+python export_onnx.py --checkpoint checkpoints/best.pth --output ../models/gunshot_classifier.onnx
+```
+
+## ROS 集成（Ubuntu）
+
+```bash
+cd ~/catkin_ws/src
+git clone <repo_url>
+cd ~/catkin_ws
+catkin_make -DCMAKE_BUILD_TYPE=Release
+source devel/setup.bash
+roslaunch acoustic_analyzer acoustic_node.launch
+```
+
+## 依赖
+
+| 依赖 | 用途 | 状态 |
+|------|------|------|
+| Eigen 3.4.0 | 矩阵运算 | Bundled |
+| ONNX Runtime 1.17+ | 模型推理 | Bundled headers + DLL/lib |
+| yaml-cpp | 配置解析 | Ubuntu: `apt install libyaml-cpp-dev` |
+| kiss_fft | ~~FFT~~ | **已移除**，使用内联实现 |
+
+## 已知限制
+
+- 当前分类器基于合成数据集训练，仅用于代码集成验证。实际部署需真实威胁音频重新训练。
+- Windows 下完整流水线（Pipeline 类）需要 yaml-cpp，离线 demo 已绕过此依赖。
+- 距离估计依赖 SPL 校准，合成音频的归一化幅度会导致距离值偏大。
+
+## 许可证
+
+与父项目保持一致。
--- a/src/drone-software/src/acoustic/build/demo_offline.exe
+++ b/src/drone-software/src/acoustic/build/demo_offline.exe
--- a/src/drone-software/src/acoustic/build_demo.bat
+++ b/src/drone-software/src/acoustic/build_demo.bat
@ -8,11 +8,10 @@ echo ==========================================

 set SRC_ROOT=%~dp0
 set EIGEN=%SRC_ROOT%third_party\eigen-3.4.0
-set KISSFFT=%SRC_ROOT%third_party\kiss_fft
 set ONNX_INC=%SRC_ROOT%third_party\onnxruntime\include
 set ONNX_LIB=%SRC_ROOT%third_party\onnxruntime\lib\libonnxruntime.a

-set INCLUDES=-I%SRC_ROOT%include -I%EIGEN% -I%KISSFFT% -I%ONNX_INC%
+set INCLUDES=-I%SRC_ROOT%include -I%EIGEN% -I%ONNX_INC%
 set FLAGS=-std=c++17 -O2 -D_USE_MATH_DEFINES -Wa,-mbig-obj
 set LIBS=%ONNX_LIB%

--- a/src/drone-software/src/acoustic/src/io/wav_file_source.cpp
+++ b/src/drone-software/src/acoustic/src/io/wav_file_source.cpp
@ -24,15 +24,22 @@ WavFileSource::WavFileSource(const std::string& path, int target_sample_rate)
    : path_(path), target_sample_rate_(target_sample_rate), fp_(nullptr) {}

 WavFileSource::~WavFileSource() {
-    if (fp_) fclose(fp_);
+    close();
+}
+
+void WavFileSource::close() {
+    if (fp_) {
+        fclose(fp_);
+        fp_ = nullptr;
+    }
 }

 bool WavFileSource::open() {
+    close();
    fp_ = fopen(path_.c_str(), "rb");
    if (!fp_) return false;
    if (!parse_wav_header()) {
-        fclose(fp_);
-        fp_ = nullptr;
+        close();
        return false;
    }
    return true;
@ -42,7 +49,7 @@ bool WavFileSource::parse_wav_header() {
    WavHeader hdr;
    if (fread(&hdr, sizeof(hdr), 1, fp_) != 1) return false;
    if (std::memcmp(hdr.riff, "RIFF", 4) != 0 || std::memcmp(hdr.wave, "WAVE", 4) != 0) return false;
-    if (hdr.audio_format != 1) return false; // PCM only
+    if (hdr.audio_format != 1 && hdr.audio_format != 3) return false; // PCM or IEEE float

    num_channels_ = hdr.num_channels;
    file_sample_rate_ = hdr.sample_rate;
@ -55,7 +62,9 @@ bool WavFileSource::parse_wav_header() {
        if (fread(&chunk_size, 4, 1, fp_) != 1) return false;
        if (std::memcmp(chunk_id, "data", 4) == 0) {
            data_start_ = ftell(fp_);
-            total_samples_ = chunk_size / (num_channels_ * (bits_per_sample_ / 8));
+            int bytes_per_sample = bits_per_sample_ / 8;
+            if (bytes_per_sample == 0) bytes_per_sample = 1;
+            total_samples_ = chunk_size / (num_channels_ * bytes_per_sample);
            read_pos_ = 0;
            return true;
        }
@ -64,6 +73,42 @@ bool WavFileSource::parse_wav_header() {
    return false;
 }

+namespace {
+
+float convert_sample(const uint8_t* raw, int bytes, bool is_float) {
+    if (is_float) {
+        if (bytes == 4) {
+            float val;
+            std::memcpy(&val, raw, 4);
+            return val;
+        }
+        return 0.0f;
+    }
+    // Integer PCM
+    if (bytes == 1) {
+        int32_t val = static_cast<int32_t>(raw[0]) - 128;
+        return val / 128.0f;
+    } else if (bytes == 2) {
+        int16_t val;
+        std::memcpy(&val, raw, 2);
+        return val / 32768.0f;
+    } else if (bytes == 3) {
+        int32_t val = (static_cast<int32_t>(raw[2]) << 16)
+                    | (static_cast<int32_t>(raw[1]) << 8)
+                    | static_cast<int32_t>(raw[0]);
+        // Sign-extend 24-bit to 32-bit
+        if (val & 0x800000) val |= 0xFF000000;
+        return val / 8388608.0f;
+    } else if (bytes >= 4) {
+        int32_t val;
+        std::memcpy(&val, raw, 4);
+        return std::max(-1.0f, std::min(1.0f, val / 2147483648.0f));
+    }
+    return 0.0f;
+}
+
+} // anonymous namespace
+
 void WavFileSource::resample_if_needed(std::vector<float>& mono, int src_rate, int dst_rate) {
    if (src_rate == dst_rate || dst_rate <= 0) return;
    double ratio = static_cast<double>(dst_rate) / src_rate;
@ -86,7 +131,10 @@ size_t WavFileSource::read(std::vector<std::vector<float>>& out, size_t max_samp
    if (samples_to_read == 0) return 0;

    int bytes_per_sample = bits_per_sample_ / 8;
+    if (bytes_per_sample == 0) bytes_per_sample = 1;
    int block_align = num_channels_ * bytes_per_sample;
+    bool is_float = (bits_per_sample_ == 32); // Assume 32-bit is float if audio_format==3
+
    std::vector<uint8_t> raw(samples_to_read * block_align);
    fseek(fp_, static_cast<long>(data_start_ + read_pos_ * block_align), SEEK_SET);
    size_t read_blocks = fread(raw.data(), block_align, samples_to_read, fp_);
@ -99,9 +147,8 @@ size_t WavFileSource::read(std::vector<std::vector<float>>& out, size_t max_samp

    for (size_t i = 0; i < read_blocks; ++i) {
        for (int ch = 0; ch < num_channels_; ++ch) {
-            int16_t sample = 0;
-            std::memcpy(&sample, &raw[i * block_align + ch * bytes_per_sample], bytes_per_sample);
-            out[ch][i] = sample / 32768.0f;
+            out[ch][i] = convert_sample(&raw[i * block_align + ch * bytes_per_sample],
+                                         bytes_per_sample, is_float);
        }
    }