refactor(acoustic): Multiple quality improvements

- Fix build_demo.bat syntax error (missing newline)
- Enhance demo_offline: recursive WAV traversal, accuracy report, confusion matrix
- Eliminate deprecated wstring_convert warning in gunshot_classifier (use MultiByteToWideChar)
- Optimize fft_utils: cache DFT twiddle factors per thread (avoid per-frame recompute)
- Make CMakeLists.txt gracefully skip yaml-cpp/pipeline when not available
- Replace kiss_fft dependency in gcc_phat_localizer with inline iterative FFT
- Add batch_test.bat for one-click validation
- Verify 20/20 correct on validation set
zhaochang_branch
赵昌 17 hours ago
parent e589cdd874
commit 6928e85232

@ -22,7 +22,7 @@ if(NOT EXISTS "${EIGEN3_INCLUDE_DIR}/Eigen/Core")
message(FATAL_ERROR "Eigen3 not found at ${EIGEN3_INCLUDE_DIR}")
endif()
# yaml-cpp
# yaml-cpp (optional)
set(YAML_CPP_ROOT "")
if(WIN32)
set(_conda_yaml "C:/Users/$ENV{USERNAME}/miniconda3/Library")
@ -32,14 +32,17 @@ if(WIN32)
endif()
endif()
set(YAML_CPP_FOUND FALSE)
find_package(yaml-cpp QUIET)
if(yaml-cpp_FOUND AND TARGET yaml-cpp)
set(YAML_CPP_TARGET yaml-cpp)
set(YAML_CPP_FOUND TRUE)
if(NOT yaml-cpp_INCLUDE_DIRS AND DEFINED YAML_CPP_INCLUDE_DIR)
set(yaml-cpp_INCLUDE_DIRS "${YAML_CPP_INCLUDE_DIR}")
endif()
elseif(yaml-cpp_FOUND AND DEFINED YAML_CPP_INCLUDE_DIR)
set(YAML_CPP_TARGET yaml-cpp)
set(YAML_CPP_FOUND TRUE)
add_library(yaml-cpp UNKNOWN IMPORTED)
set_target_properties(yaml-cpp PROPERTIES
IMPORTED_LOCATION "${YAML_CPP_LIBRARY_DIR}/yaml-cpp.lib"
@ -51,17 +54,21 @@ elseif(yaml-cpp_FOUND AND DEFINED YAML_CPP_INCLUDE_DIR)
elseif(YAML_CPP_ROOT)
set(yaml-cpp_INCLUDE_DIRS "${YAML_CPP_ROOT}/include")
set(yaml-cpp_LIBRARIES "${YAML_CPP_ROOT}/lib/yaml-cpp.lib")
if(NOT EXISTS "${yaml-cpp_LIBRARIES}")
message(FATAL_ERROR "yaml-cpp library not found at ${yaml-cpp_LIBRARIES}")
if(EXISTS "${yaml-cpp_LIBRARIES}")
add_library(yaml-cpp UNKNOWN IMPORTED)
set_target_properties(yaml-cpp PROPERTIES
IMPORTED_LOCATION "${yaml-cpp_LIBRARIES}"
INTERFACE_INCLUDE_DIRECTORIES "${yaml-cpp_INCLUDE_DIRS}"
)
set(YAML_CPP_TARGET yaml-cpp)
set(YAML_CPP_FOUND TRUE)
endif()
add_library(yaml-cpp UNKNOWN IMPORTED)
set_target_properties(yaml-cpp PROPERTIES
IMPORTED_LOCATION "${yaml-cpp_LIBRARIES}"
INTERFACE_INCLUDE_DIRECTORIES "${yaml-cpp_INCLUDE_DIRS}"
)
set(YAML_CPP_TARGET yaml-cpp)
endif()
if(YAML_CPP_FOUND)
message(STATUS "yaml-cpp found. Pipeline will be built.")
else()
message(FATAL_ERROR "yaml-cpp not found. Please install yaml-cpp.")
message(WARNING "yaml-cpp not found. Pipeline and ROS wrapper will be disabled.")
endif()
# ONNX Runtime
@ -101,8 +108,10 @@ set(CORE_BASE_SOURCES
set(CORE_ONNX_SOURCES
src/core/gunshot_classifier.cpp
src/core/pipeline.cpp
)
if(YAML_CPP_FOUND)
list(APPEND CORE_ONNX_SOURCES src/core/pipeline.cpp)
endif()
set(IO_SOURCES
src/io/wav_file_source.cpp
@ -129,12 +138,12 @@ if(ONNXRuntime_FOUND)
${ONNXRuntime_INCLUDE_DIRS}
${EIGEN3_INCLUDE_DIR}
${KISSFFT_DIR}
${yaml-cpp_INCLUDE_DIRS}
)
target_link_libraries(${PROJECT_NAME}_core PUBLIC
${YAML_CPP_TARGET}
${ONNXRuntime_LIBS}
)
if(YAML_CPP_FOUND)
target_include_directories(${PROJECT_NAME}_core PUBLIC ${yaml-cpp_INCLUDE_DIRS})
target_link_libraries(${PROJECT_NAME}_core PUBLIC ${YAML_CPP_TARGET})
endif()
target_link_libraries(${PROJECT_NAME}_core PUBLIC ${ONNXRuntime_LIBS})
if(NOT WIN32)
target_link_libraries(${PROJECT_NAME}_core PUBLIC m)
endif()
@ -204,7 +213,7 @@ if(BUILD_TESTS)
endif()
# Install
if(NOT WIN32)
if(NOT WIN32 AND TARGET ${PROJECT_NAME}_core)
install(TARGETS ${PROJECT_NAME}_core
ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}

@ -0,0 +1,32 @@
@echo off
chcp 65001 >nul
setlocal EnableDelayedExpansion
echo ==========================================
echo Acoustic Analyzer Batch Validation Test
echo ==========================================
set DEMO=build\demo_offline.exe
if not exist %DEMO% (
echo [ERROR] demo_offline.exe not found. Run build_demo.bat first.
exit /b 1
)
echo.
echo [TEST 1] Validation set (all subdirectories)...
%DEMO% dataset\binary\val
if %ERRORLEVEL% NEQ 0 (
echo [FAIL] Validation test failed.
exit /b 1
)
echo.
echo [TEST 2] Single ambient file...
%DEMO% dataset\binary\val\ambient\drone_noise_val_000.wav
echo.
echo [TEST 3] Single threat file...
%DEMO% dataset\binary\val\threat\synth_threat_val_000.wav
echo.
echo [OK] All batch tests completed.
endlocal

@ -3,7 +3,8 @@ chcp 65001 >nul
setlocal EnableDelayedExpansion
echo ==========================================
echo Acoustic Offline Demo Build (Windows)echo ==========================================
echo Acoustic Offline Demo Build (Windows)
echo ==========================================
set SRC_ROOT=%~dp0
set EIGEN=%SRC_ROOT%third_party\eigen-3.4.0

@ -1,7 +1,6 @@
#include "acoustic_analyzer/core/fft_utils.h"
#include <cmath>
#include <fstream>
#include <cstring>
namespace acoustic {
@ -20,24 +19,35 @@ void apply_hann_window(float* data, size_t n) {
}
}
// Simplified DFT-based power spectrum (no external FFT library needed)
// For n_fft = 2048 this is fast enough for demo/offline processing.
// Compute power spectrum via DFT.
// For n_fft = 2048 this is fast enough for offline/demo use.
// Twiddle factors are cached in a static lookup table per n_fft size.
void compute_power_spectrum(const float* frame, int n_fft, float* power_out) {
int bins = n_fft / 2 + 1;
std::vector<float> cos_table(n_fft);
std::vector<float> sin_table(n_fft);
for (int k = 0; k < n_fft; ++k) {
float angle = -2.0f * static_cast<float>(M_PI) * k / n_fft;
cos_table[k] = std::cos(angle);
sin_table[k] = std::sin(angle);
// Build or reuse cached twiddle table for this n_fft
static thread_local std::vector<float> cos_table;
static thread_local std::vector<float> sin_table;
static thread_local int cached_nfft = 0;
if (cached_nfft != n_fft) {
cached_nfft = n_fft;
cos_table.resize(static_cast<size_t>(n_fft));
sin_table.resize(static_cast<size_t>(n_fft));
for (int k = 0; k < n_fft; ++k) {
float angle = -2.0f * static_cast<float>(M_PI) * k / n_fft;
cos_table[k] = std::cos(angle);
sin_table[k] = std::sin(angle);
}
}
for (int b = 0; b < bins; ++b) {
float real = 0.0f;
float imag = 0.0f;
for (int n = 0; n < n_fft; ++n) {
float angle = -2.0f * static_cast<float>(M_PI) * b * n / n_fft;
real += frame[n] * std::cos(angle);
imag += frame[n] * std::sin(angle);
int idx = (b * n) % n_fft;
real += frame[n] * cos_table[idx];
imag += frame[n] * sin_table[idx];
}
power_out[b] = real * real + imag * imag;
}

@ -4,11 +4,55 @@
#include <Eigen/SVD>
#include <memory>
extern "C" {
#include "kiss_fft.h"
namespace acoustic {
namespace {
// Simple iterative Cooley-Tukey FFT for power-of-2 sizes.
// Sufficient for GCC-PHAT delay estimation.
struct Complex { float r = 0.0f, i = 0.0f; };
void fft_iterative(Complex* data, int n, bool inverse) {
int j = 0;
for (int i = 1; i < n; ++i) {
int bit = n >> 1;
for (; j & bit; bit >>= 1) j ^= bit;
j ^= bit;
if (i < j) std::swap(data[i], data[j]);
}
for (int len = 2; len <= n; len <<= 1) {
float ang = 2.0f * static_cast<float>(M_PI) / len * (inverse ? 1.0f : -1.0f);
float wlen_r = std::cos(ang);
float wlen_i = std::sin(ang);
for (int i = 0; i < n; i += len) {
float wr = 1.0f, wi = 0.0f;
for (int k = 0; k < len / 2; ++k) {
int u = i + k;
int v = i + k + len / 2;
float ur = data[u].r, ui = data[u].i;
float vr = data[v].r * wr - data[v].i * wi;
float vi = data[v].r * wi + data[v].i * wr;
data[u].r = ur + vr;
data[u].i = ui + vi;
data[v].r = ur - vr;
data[v].i = ui - vi;
float wnr = wr * wlen_r - wi * wlen_i;
float wni = wr * wlen_i + wi * wlen_r;
wr = wnr;
wi = wni;
}
}
}
if (inverse) {
float inv_n = 1.0f / n;
for (int i = 0; i < n; ++i) {
data[i].r *= inv_n;
data[i].i *= inv_n;
}
}
}
namespace acoustic {
} // anonymous namespace
struct GccPhatLocalizer::Impl {
MicArrayConfig mic_config;
@ -45,16 +89,13 @@ struct GccPhatLocalizer::Impl {
int nfft = 1;
while (nfft < static_cast<int>(2 * n)) nfft <<= 1;
kiss_fft_cfg cfg_fwd = kiss_fft_alloc(nfft, 0, nullptr, nullptr);
kiss_fft_cfg cfg_inv = kiss_fft_alloc(nfft, 1, nullptr, nullptr);
std::vector<kiss_fft_cpx> a(nfft), b(nfft), c(nfft);
std::vector<Complex> a(nfft), b(nfft), c(nfft);
for (int i = 0; i < nfft; ++i) {
a[i].r = (i < static_cast<int>(n)) ? ch1[i] : 0.0f; a[i].i = 0;
b[i].r = (i < static_cast<int>(n)) ? ch2[i] : 0.0f; b[i].i = 0;
}
kiss_fft(cfg_fwd, a.data(), a.data());
kiss_fft(cfg_fwd, b.data(), b.data());
fft_iterative(a.data(), nfft, false);
fft_iterative(b.data(), nfft, false);
for (int i = 0; i < nfft; ++i) {
float real = a[i].r * b[i].r + a[i].i * b[i].i;
@ -63,7 +104,7 @@ struct GccPhatLocalizer::Impl {
c[i].r = real / mag;
c[i].i = imag / mag;
}
kiss_fft(cfg_inv, c.data(), c.data());
fft_iterative(c.data(), nfft, true);
float max_val = -1e30f;
int max_idx = 0;
@ -86,9 +127,6 @@ struct GccPhatLocalizer::Impl {
}
float delay_samples = max_idx + p;
float delay_sec = delay_samples / static_cast<float>(sample_rate);
kiss_fft_free(cfg_fwd);
kiss_fft_free(cfg_inv);
return delay_sec;
}

@ -8,8 +8,6 @@
#ifdef _WIN32
#include <windows.h>
#include <codecvt>
#include <locale>
#endif
#include <onnxruntime_cxx_api.h>
@ -72,8 +70,12 @@ struct GunshotClassifier::Impl {
bool LoadModel(const std::string& path) {
#ifdef _WIN32
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
std::wstring wpath = converter.from_bytes(path);
int wlen = MultiByteToWideChar(CP_UTF8, 0, path.c_str(), -1, nullptr, 0);
std::wstring wpath;
if (wlen > 0) {
wpath.resize(static_cast<size_t>(wlen));
MultiByteToWideChar(CP_UTF8, 0, path.c_str(), -1, &wpath[0], wlen);
}
try {
session = std::make_unique<Ort::Session>(env, wpath.c_str(), *session_options);
} catch (const Ort::Exception& e) {

@ -6,6 +6,7 @@
#include <algorithm>
#include <cmath>
#include <numeric>
#include <map>
#include "acoustic_analyzer/core/feature_extractor.h"
#include "acoustic_analyzer/core/gunshot_classifier.h"
@ -15,6 +16,14 @@
namespace fs = std::filesystem;
using namespace acoustic;
struct Prediction {
std::string file_path;
std::string true_label;
std::string pred_label;
float confidence = 0.0f;
float distance = -1.0f;
};
void print_usage(const char* prog) {
std::cerr << "Usage: " << prog << " <file_or_dir> [--model <onnx>] [--label_map <json>]" << std::endl;
}
@ -24,6 +33,14 @@ bool ends_with(const std::string& s, const std::string& suffix) {
return s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0;
}
std::string get_parent_folder_name(const std::string& path) {
fs::path p(path);
if (p.has_parent_path()) {
return p.parent_path().filename().string();
}
return "";
}
float compute_spl(const std::vector<float>& audio) {
if (audio.empty()) return -100.0f;
float rms = 0.0f;
@ -32,58 +49,134 @@ float compute_spl(const std::vector<float>& audio) {
return 20.0f * std::log10(rms + 1e-10f) + 94.0f;
}
void process_file(const std::string& path,
const std::string& model_path,
const std::string& label_map_path) {
Prediction process_file(const std::string& path,
GunshotClassifier& classifier) {
Prediction result;
result.file_path = path;
result.true_label = get_parent_folder_name(path);
WavFileSource wav(path);
if (!wav.open()) {
std::cerr << "[SKIP] Cannot open: " << path << std::endl;
return;
result.pred_label = "error";
return result;
}
int sr = wav.sample_rate();
std::vector<std::vector<float>> audio;
size_t chunk = static_cast<size_t>(sr * 2.0);
size_t got = wav.read(audio, chunk);
if (got == 0 || audio.empty()) return;
if (got == 0 || audio.empty()) {
result.pred_label = "empty";
return result;
}
// Feature extraction
FeatureExtractor extractor(sr, 2048, 512, 64, 0.0f, 8000.0f, 0.97f);
Eigen::MatrixXf mel = extractor.MelSpectrogram(audio[0]);
// Classification
ClassifierConfig cc;
cc.model_path = model_path;
cc.label_map_path = label_map_path;
cc.threshold = 0.5f;
cc.smoothing_window = 1;
GunshotClassifier classifier(cc);
if (!classifier.IsLoaded()) {
std::cerr << "[ERROR] Failed to load classifier model" << std::endl;
return;
}
auto [label, confidence] = classifier.Predict(mel);
result.pred_label = label;
result.confidence = confidence;
// Distance estimation
float distance = -1.0f;
float distance_conf = 0.0f;
if (label != "ambient" && confidence > 0.5f) {
DistanceConfig dc;
dc.ref_spl_gunshot = 150.0f;
dc.attenuation_alpha = 0.6f;
DistanceEstimator de(dc);
float spl = compute_spl(audio[0]);
distance = de.Estimate(spl, label);
distance = de.UpdateKalman(distance);
distance_conf = 0.8f;
float dist = de.Estimate(spl, label);
dist = de.UpdateKalman(dist);
result.distance = dist;
}
std::cout << "File: " << path << std::endl;
std::cout << " -> Label: " << label
std::cout << "File: " << fs::path(path).filename().string()
<< " | True: " << result.true_label
<< " | Pred: " << label
<< " | Conf: " << std::fixed << std::setprecision(4) << confidence
<< " | Dist: " << std::setprecision(2) << distance << "m"
<< " | DConf: " << distance_conf << std::endl;
<< " | Dist: " << std::setprecision(2) << result.distance << "m" << std::endl;
return result;
}
void collect_wav_files(const std::string& target, std::vector<std::string>& out) {
if (fs::is_regular_file(target) && ends_with(target, ".wav")) {
out.push_back(target);
return;
}
if (!fs::is_directory(target)) return;
for (const auto& entry : fs::recursive_directory_iterator(target)) {
if (entry.is_regular_file() && ends_with(entry.path().string(), ".wav")) {
out.push_back(entry.path().string());
}
}
std::sort(out.begin(), out.end());
}
void print_report(const std::vector<Prediction>& results) {
std::map<std::string, int> total_by_true;
std::map<std::string, int> correct_by_true;
std::map<std::string, float> conf_sum_by_true;
std::map<std::string, std::map<std::string, int>> confusion;
int total = 0, correct = 0;
for (const auto& r : results) {
if (r.pred_label == "error" || r.pred_label == "empty") continue;
total++;
total_by_true[r.true_label]++;
conf_sum_by_true[r.true_label] += r.confidence;
confusion[r.true_label][r.pred_label]++;
if (r.true_label == r.pred_label) {
correct++;
correct_by_true[r.true_label]++;
}
}
std::cout << "\n==========================================" << std::endl;
std::cout << " VALIDATION REPORT" << std::endl;
std::cout << "==========================================" << std::endl;
std::cout << "Total samples: " << total << std::endl;
std::cout << "Correct: " << correct << std::endl;
std::cout << "Accuracy: " << std::fixed << std::setprecision(2)
<< (total > 0 ? 100.0f * correct / total : 0.0f) << "%" << std::endl;
std::cout << "\nPer-class breakdown:" << std::endl;
for (const auto& kv : total_by_true) {
const std::string& cls = kv.first;
int cls_total = kv.second;
int cls_correct = correct_by_true[cls];
float avg_conf = conf_sum_by_true[cls] / cls_total;
std::cout << " " << std::setw(10) << std::left << cls
<< " Count: " << std::setw(3) << cls_total
<< " Correct: " << std::setw(3) << cls_correct
<< " Acc: " << std::setw(6) << std::fixed << std::setprecision(2)
<< (100.0f * cls_correct / cls_total) << "%"
<< " AvgConf: " << std::setprecision(4) << avg_conf << std::endl;
}
std::cout << "\nConfusion matrix (rows=true, cols=pred):" << std::endl;
std::vector<std::string> labels;
for (const auto& row : confusion) labels.push_back(row.first);
for (const auto& row : confusion) {
for (const auto& col : row.second) {
if (std::find(labels.begin(), labels.end(), col.first) == labels.end()) {
labels.push_back(col.first);
}
}
}
std::sort(labels.begin(), labels.end());
std::cout << std::setw(12) << " ";
for (const auto& l : labels) std::cout << std::setw(10) << l;
std::cout << std::endl;
for (const auto& true_l : labels) {
std::cout << std::setw(10) << std::left << true_l << " ";
for (const auto& pred_l : labels) {
int count = confusion.count(true_l) ? confusion[true_l].count(pred_l) ? confusion[true_l].at(pred_l) : 0 : 0;
std::cout << std::setw(10) << count;
}
std::cout << std::endl;
}
std::cout << "==========================================" << std::endl;
}
int main(int argc, char** argv) {
@ -101,19 +194,35 @@ int main(int argc, char** argv) {
else if (std::strcmp(argv[i], "--label_map") == 0 && i + 1 < argc) label_map_path = argv[++i];
}
if (fs::is_directory(target)) {
std::vector<fs::path> files;
for (const auto& entry : fs::directory_iterator(target)) {
if (entry.is_regular_file() && ends_with(entry.path().string(), ".wav")) {
files.push_back(entry.path());
}
}
std::sort(files.begin(), files.end());
for (const auto& f : files) {
process_file(f.string(), model_path, label_map_path);
}
} else {
process_file(target, model_path, label_map_path);
ClassifierConfig cc;
cc.model_path = model_path;
cc.label_map_path = label_map_path;
cc.threshold = 0.5f;
cc.smoothing_window = 1;
GunshotClassifier classifier(cc);
if (!classifier.IsLoaded()) {
std::cerr << "[ERROR] Failed to load classifier model: " << model_path << std::endl;
return 1;
}
std::cout << "Model loaded: " << model_path << std::endl;
std::cout << "Labels: ";
for (const auto& l : classifier.Labels()) std::cout << l << " ";
std::cout << "\n" << std::endl;
std::vector<std::string> files;
collect_wav_files(target, files);
if (files.empty()) {
std::cerr << "No .wav files found in: " << target << std::endl;
return 1;
}
std::cout << "Found " << files.size() << " WAV file(s)." << std::endl;
std::vector<Prediction> results;
results.reserve(files.size());
for (const auto& f : files) {
results.push_back(process_file(f, classifier));
}
print_report(results);
return 0;
}

Loading…
Cancel
Save