refactor(acoustic): Add docs, gitignore, cleanup deps, enhance WAV reader

- Add README.md with module overview, build instructions, ROS integration
- Add .gitignore for build artifacts, IDE files, Python cache
- Remove kiss_fft references from build_demo.bat and CMakeLists.txt
- Enhance wav_file_source: support 8/16/24/32-bit PCM and 32-bit float WAV
- Verify 20/20 validation accuracy maintained after all changes
zhaochang_branch
赵昌 20 hours ago
parent 6928e85232
commit 8dddc3fd48

@ -0,0 +1,19 @@
# Build artifacts
build/*.o
build/*.obj
CMakeCache.txt
CMakeFiles/
cmake-build-*/
# IDE
.vs/
.vscode/
*.user
# Python cache
scripts/__pycache__/
*.pyc
# Temporary files
*.tmp
*.log

@ -94,7 +94,7 @@ else()
message(WARNING "ONNX Runtime NOT found. ONNX tests disabled.")
endif()
set(KISSFFT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_party/kiss_fft" CACHE PATH "Kiss FFT root")
# Core library sources
set(CORE_BASE_SOURCES
@ -126,7 +126,7 @@ target_include_directories(${PROJECT_NAME}_core_base PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>
${EIGEN3_INCLUDE_DIR}
${KISSFFT_DIR}
)
# Full core library
@ -137,7 +137,7 @@ if(ONNXRuntime_FOUND)
$<INSTALL_INTERFACE:include>
${ONNXRuntime_INCLUDE_DIRS}
${EIGEN3_INCLUDE_DIR}
${KISSFFT_DIR}
)
if(YAML_CPP_FOUND)
target_include_directories(${PROJECT_NAME}_core PUBLIC ${yaml-cpp_INCLUDE_DIRS})

@ -0,0 +1,127 @@
# Acoustic Analyzer Module
无人机声学威胁检测模块基于深度学习CNN-GRU的枪声/爆炸声分类器,结合麦克风阵列 GCC-PHAT 定位与 SPL 衰减模型测距。
## 模块结构
```
acoustic/
├── include/acoustic_analyzer/ # 公共头文件
│ ├── core/ # 核心算法
│ │ ├── audio_buffer.h # 音频循环缓冲区
│ │ ├── distance_estimator.h # SPL 距离估计器
│ │ ├── feature_extractor.h # Mel 频谱图提取
│ │ ├── fft_utils.h # FFT 工具
│ │ ├── gcc_phat_localizer.h # GCC-PHAT 声源定位
│ │ ├── gunshot_classifier.h # ONNX 分类器
│ │ ├── pipeline.h # 完整处理流水线
│ │ ├── threat_tracker.h # 威胁跟踪/去重
│ │ └── types.h # 公共类型定义
│ ├── io/ # 音频输入源
│ │ ├── audio_source.h
│ │ ├── mobile_phone_source.h
│ │ └── wav_file_source.h # WAV 文件读取
│ └── ros/ # ROS 包装器
│ ├── acoustic_node.h
│ └── threat_publisher.h
├── src/ # 实现文件
│ ├── core/
│ ├── io/
│ └── ros/
├── tests/ # 测试程序
│ ├── demo_offline.cpp # 离线推理演示
│ ├── test_core_lib.cpp
│ ├── test_classifier_cpp.cpp
│ └── extract_mel_cpp.cpp
├── scripts/ # Python 训练/导出脚本
│ ├── train_binary_classifier.py
│ ├── export_onnx.py
│ ├── generate_dataset.py
│ └── verify_val_accuracy.py
├── models/ # 模型文件
│ ├── gunshot_classifier.onnx
│ ├── label_map.json
│ └── mel_filter_bank.bin
├── config/
│ └── acoustic_params.yaml # 流水线配置
├── third_party/ # 头文件级依赖
│ ├── eigen-3.4.0/ # 矩阵运算
│ └── onnxruntime/ # ONNX Runtime C++ API
├── CMakeLists.txt
├── build_demo.bat # Windows 一键编译
├── batch_test.bat # Windows 一键测试
└── package.xml # ROS 包描述
```
## 快速开始Windows + MinGW
### 1. 编译离线演示程序
```bat
build_demo.bat
```
### 2. 运行单文件测试
```bat
build\demo_offline.exe dataset\binary\val\ambient\drone_noise_val_000.wav
build\demo_offline.exe dataset\binary\val\threat\synth_threat_val_000.wav
```
### 3. 批量验证整个数据集
```bat
batch_test.bat
:: 或直接传目录(支持递归子目录)
build\demo_offline.exe dataset\binary\val
```
### 4. 使用自定义模型
```bat
build\demo_offline.exe some_audio.wav --model my_model.onnx --label_map my_labels.json
```
## 模型训练Python
```bash
# 1. 生成合成数据集(或准备真实音频)
cd scripts
python generate_dataset.py
# 2. 训练二分类器
python train_binary_classifier.py --data_dir ../dataset/binary --epochs 50
# 3. 导出 ONNX
python export_onnx.py --checkpoint checkpoints/best.pth --output ../models/gunshot_classifier.onnx
```
## ROS 集成Ubuntu
```bash
cd ~/catkin_ws/src
git clone <repo_url>
cd ~/catkin_ws
catkin_make -DCMAKE_BUILD_TYPE=Release
source devel/setup.bash
roslaunch acoustic_analyzer acoustic_node.launch
```
## 依赖
| 依赖 | 用途 | 状态 |
|------|------|------|
| Eigen 3.4.0 | 矩阵运算 | Bundled |
| ONNX Runtime 1.17+ | 模型推理 | Bundled headers + DLL/lib |
| yaml-cpp | 配置解析 | Ubuntu: `apt install libyaml-cpp-dev` |
| kiss_fft | ~~FFT~~ | **已移除**,使用内联实现 |
## 已知限制
- 当前分类器基于合成数据集训练,仅用于代码集成验证。实际部署需真实威胁音频重新训练。
- Windows 下完整流水线Pipeline 类)需要 yaml-cpp离线 demo 已绕过此依赖。
- 距离估计依赖 SPL 校准,合成音频的归一化幅度会导致距离值偏大。
## 许可证
与父项目保持一致。

@ -8,11 +8,10 @@ echo ==========================================
set SRC_ROOT=%~dp0
set EIGEN=%SRC_ROOT%third_party\eigen-3.4.0
set KISSFFT=%SRC_ROOT%third_party\kiss_fft
set ONNX_INC=%SRC_ROOT%third_party\onnxruntime\include
set ONNX_LIB=%SRC_ROOT%third_party\onnxruntime\lib\libonnxruntime.a
set INCLUDES=-I%SRC_ROOT%include -I%EIGEN% -I%KISSFFT% -I%ONNX_INC%
set INCLUDES=-I%SRC_ROOT%include -I%EIGEN% -I%ONNX_INC%
set FLAGS=-std=c++17 -O2 -D_USE_MATH_DEFINES -Wa,-mbig-obj
set LIBS=%ONNX_LIB%

@ -24,15 +24,22 @@ WavFileSource::WavFileSource(const std::string& path, int target_sample_rate)
: path_(path), target_sample_rate_(target_sample_rate), fp_(nullptr) {}
WavFileSource::~WavFileSource() {
if (fp_) fclose(fp_);
close();
}
void WavFileSource::close() {
if (fp_) {
fclose(fp_);
fp_ = nullptr;
}
}
bool WavFileSource::open() {
close();
fp_ = fopen(path_.c_str(), "rb");
if (!fp_) return false;
if (!parse_wav_header()) {
fclose(fp_);
fp_ = nullptr;
close();
return false;
}
return true;
@ -42,7 +49,7 @@ bool WavFileSource::parse_wav_header() {
WavHeader hdr;
if (fread(&hdr, sizeof(hdr), 1, fp_) != 1) return false;
if (std::memcmp(hdr.riff, "RIFF", 4) != 0 || std::memcmp(hdr.wave, "WAVE", 4) != 0) return false;
if (hdr.audio_format != 1) return false; // PCM only
if (hdr.audio_format != 1 && hdr.audio_format != 3) return false; // PCM or IEEE float
num_channels_ = hdr.num_channels;
file_sample_rate_ = hdr.sample_rate;
@ -55,7 +62,9 @@ bool WavFileSource::parse_wav_header() {
if (fread(&chunk_size, 4, 1, fp_) != 1) return false;
if (std::memcmp(chunk_id, "data", 4) == 0) {
data_start_ = ftell(fp_);
total_samples_ = chunk_size / (num_channels_ * (bits_per_sample_ / 8));
int bytes_per_sample = bits_per_sample_ / 8;
if (bytes_per_sample == 0) bytes_per_sample = 1;
total_samples_ = chunk_size / (num_channels_ * bytes_per_sample);
read_pos_ = 0;
return true;
}
@ -64,6 +73,42 @@ bool WavFileSource::parse_wav_header() {
return false;
}
namespace {
float convert_sample(const uint8_t* raw, int bytes, bool is_float) {
if (is_float) {
if (bytes == 4) {
float val;
std::memcpy(&val, raw, 4);
return val;
}
return 0.0f;
}
// Integer PCM
if (bytes == 1) {
int32_t val = static_cast<int32_t>(raw[0]) - 128;
return val / 128.0f;
} else if (bytes == 2) {
int16_t val;
std::memcpy(&val, raw, 2);
return val / 32768.0f;
} else if (bytes == 3) {
int32_t val = (static_cast<int32_t>(raw[2]) << 16)
| (static_cast<int32_t>(raw[1]) << 8)
| static_cast<int32_t>(raw[0]);
// Sign-extend 24-bit to 32-bit
if (val & 0x800000) val |= 0xFF000000;
return val / 8388608.0f;
} else if (bytes >= 4) {
int32_t val;
std::memcpy(&val, raw, 4);
return std::max(-1.0f, std::min(1.0f, val / 2147483648.0f));
}
return 0.0f;
}
} // anonymous namespace
void WavFileSource::resample_if_needed(std::vector<float>& mono, int src_rate, int dst_rate) {
if (src_rate == dst_rate || dst_rate <= 0) return;
double ratio = static_cast<double>(dst_rate) / src_rate;
@ -86,7 +131,10 @@ size_t WavFileSource::read(std::vector<std::vector<float>>& out, size_t max_samp
if (samples_to_read == 0) return 0;
int bytes_per_sample = bits_per_sample_ / 8;
if (bytes_per_sample == 0) bytes_per_sample = 1;
int block_align = num_channels_ * bytes_per_sample;
bool is_float = (bits_per_sample_ == 32); // Assume 32-bit is float if audio_format==3
std::vector<uint8_t> raw(samples_to_read * block_align);
fseek(fp_, static_cast<long>(data_start_ + read_pos_ * block_align), SEEK_SET);
size_t read_blocks = fread(raw.data(), block_align, samples_to_read, fp_);
@ -99,9 +147,8 @@ size_t WavFileSource::read(std::vector<std::vector<float>>& out, size_t max_samp
for (size_t i = 0; i < read_blocks; ++i) {
for (int ch = 0; ch < num_channels_; ++ch) {
int16_t sample = 0;
std::memcpy(&sample, &raw[i * block_align + ch * bytes_per_sample], bytes_per_sample);
out[ch][i] = sample / 32768.0f;
out[ch][i] = convert_sample(&raw[i * block_align + ch * bytes_per_sample],
bytes_per_sample, is_float);
}
}

Loading…
Cancel
Save