diff --git a/src/drone-software/src/acoustic/CMakeLists.txt b/src/drone-software/src/acoustic/CMakeLists.txt index 90f4b7c..0055e85 100644 --- a/src/drone-software/src/acoustic/CMakeLists.txt +++ b/src/drone-software/src/acoustic/CMakeLists.txt @@ -22,7 +22,7 @@ if(NOT EXISTS "${EIGEN3_INCLUDE_DIR}/Eigen/Core") message(FATAL_ERROR "Eigen3 not found at ${EIGEN3_INCLUDE_DIR}") endif() -# ── yaml-cpp ───────────────────────────────────────────────────── +# ── yaml-cpp (optional) ────────────────────────────────────────── set(YAML_CPP_ROOT "") if(WIN32) set(_conda_yaml "C:/Users/$ENV{USERNAME}/miniconda3/Library") @@ -32,14 +32,17 @@ if(WIN32) endif() endif() +set(YAML_CPP_FOUND FALSE) find_package(yaml-cpp QUIET) if(yaml-cpp_FOUND AND TARGET yaml-cpp) set(YAML_CPP_TARGET yaml-cpp) + set(YAML_CPP_FOUND TRUE) if(NOT yaml-cpp_INCLUDE_DIRS AND DEFINED YAML_CPP_INCLUDE_DIR) set(yaml-cpp_INCLUDE_DIRS "${YAML_CPP_INCLUDE_DIR}") endif() elseif(yaml-cpp_FOUND AND DEFINED YAML_CPP_INCLUDE_DIR) set(YAML_CPP_TARGET yaml-cpp) + set(YAML_CPP_FOUND TRUE) add_library(yaml-cpp UNKNOWN IMPORTED) set_target_properties(yaml-cpp PROPERTIES IMPORTED_LOCATION "${YAML_CPP_LIBRARY_DIR}/yaml-cpp.lib" @@ -51,17 +54,21 @@ elseif(yaml-cpp_FOUND AND DEFINED YAML_CPP_INCLUDE_DIR) elseif(YAML_CPP_ROOT) set(yaml-cpp_INCLUDE_DIRS "${YAML_CPP_ROOT}/include") set(yaml-cpp_LIBRARIES "${YAML_CPP_ROOT}/lib/yaml-cpp.lib") - if(NOT EXISTS "${yaml-cpp_LIBRARIES}") - message(FATAL_ERROR "yaml-cpp library not found at ${yaml-cpp_LIBRARIES}") + if(EXISTS "${yaml-cpp_LIBRARIES}") + add_library(yaml-cpp UNKNOWN IMPORTED) + set_target_properties(yaml-cpp PROPERTIES + IMPORTED_LOCATION "${yaml-cpp_LIBRARIES}" + INTERFACE_INCLUDE_DIRECTORIES "${yaml-cpp_INCLUDE_DIRS}" + ) + set(YAML_CPP_TARGET yaml-cpp) + set(YAML_CPP_FOUND TRUE) endif() - add_library(yaml-cpp UNKNOWN IMPORTED) - set_target_properties(yaml-cpp PROPERTIES - IMPORTED_LOCATION "${yaml-cpp_LIBRARIES}" - INTERFACE_INCLUDE_DIRECTORIES "${yaml-cpp_INCLUDE_DIRS}" - ) - set(YAML_CPP_TARGET yaml-cpp) +endif() + +if(YAML_CPP_FOUND) + message(STATUS "yaml-cpp found. Pipeline will be built.") else() - message(FATAL_ERROR "yaml-cpp not found. Please install yaml-cpp.") + message(WARNING "yaml-cpp not found. Pipeline and ROS wrapper will be disabled.") endif() # ── ONNX Runtime ───────────────────────────────────────────────── @@ -101,8 +108,10 @@ set(CORE_BASE_SOURCES set(CORE_ONNX_SOURCES src/core/gunshot_classifier.cpp - src/core/pipeline.cpp ) +if(YAML_CPP_FOUND) + list(APPEND CORE_ONNX_SOURCES src/core/pipeline.cpp) +endif() set(IO_SOURCES src/io/wav_file_source.cpp @@ -129,12 +138,12 @@ if(ONNXRuntime_FOUND) ${ONNXRuntime_INCLUDE_DIRS} ${EIGEN3_INCLUDE_DIR} ${KISSFFT_DIR} - ${yaml-cpp_INCLUDE_DIRS} - ) - target_link_libraries(${PROJECT_NAME}_core PUBLIC - ${YAML_CPP_TARGET} - ${ONNXRuntime_LIBS} ) + if(YAML_CPP_FOUND) + target_include_directories(${PROJECT_NAME}_core PUBLIC ${yaml-cpp_INCLUDE_DIRS}) + target_link_libraries(${PROJECT_NAME}_core PUBLIC ${YAML_CPP_TARGET}) + endif() + target_link_libraries(${PROJECT_NAME}_core PUBLIC ${ONNXRuntime_LIBS}) if(NOT WIN32) target_link_libraries(${PROJECT_NAME}_core PUBLIC m) endif() @@ -204,7 +213,7 @@ if(BUILD_TESTS) endif() # ── Install ────────────────────────────────────────────────────── -if(NOT WIN32) +if(NOT WIN32 AND TARGET ${PROJECT_NAME}_core) install(TARGETS ${PROJECT_NAME}_core ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} diff --git a/src/drone-software/src/acoustic/batch_test.bat b/src/drone-software/src/acoustic/batch_test.bat new file mode 100644 index 0000000..fc6f89b --- /dev/null +++ b/src/drone-software/src/acoustic/batch_test.bat @@ -0,0 +1,32 @@ +@echo off +chcp 65001 >nul +setlocal EnableDelayedExpansion + +echo ========================================== +echo Acoustic Analyzer Batch Validation Test +echo ========================================== +set DEMO=build\demo_offline.exe +if not exist %DEMO% ( + echo [ERROR] demo_offline.exe not found. Run build_demo.bat first. + exit /b 1 +) + +echo. +echo [TEST 1] Validation set (all subdirectories)... +%DEMO% dataset\binary\val +if %ERRORLEVEL% NEQ 0 ( + echo [FAIL] Validation test failed. + exit /b 1 +) + +echo. +echo [TEST 2] Single ambient file... +%DEMO% dataset\binary\val\ambient\drone_noise_val_000.wav + +echo. +echo [TEST 3] Single threat file... +%DEMO% dataset\binary\val\threat\synth_threat_val_000.wav + +echo. +echo [OK] All batch tests completed. +endlocal diff --git a/src/drone-software/src/acoustic/build/demo_offline.exe b/src/drone-software/src/acoustic/build/demo_offline.exe index 9af1e0a..6d0464a 100644 Binary files a/src/drone-software/src/acoustic/build/demo_offline.exe and b/src/drone-software/src/acoustic/build/demo_offline.exe differ diff --git a/src/drone-software/src/acoustic/build_demo.bat b/src/drone-software/src/acoustic/build_demo.bat index 78549b0..7499f38 100644 --- a/src/drone-software/src/acoustic/build_demo.bat +++ b/src/drone-software/src/acoustic/build_demo.bat @@ -3,7 +3,8 @@ chcp 65001 >nul setlocal EnableDelayedExpansion echo ========================================== -echo Acoustic Offline Demo Build (Windows)echo ========================================== +echo Acoustic Offline Demo Build (Windows) +echo ========================================== set SRC_ROOT=%~dp0 set EIGEN=%SRC_ROOT%third_party\eigen-3.4.0 diff --git a/src/drone-software/src/acoustic/src/core/fft_utils.cpp b/src/drone-software/src/acoustic/src/core/fft_utils.cpp index 0c9598c..347df83 100644 --- a/src/drone-software/src/acoustic/src/core/fft_utils.cpp +++ b/src/drone-software/src/acoustic/src/core/fft_utils.cpp @@ -1,7 +1,6 @@ #include "acoustic_analyzer/core/fft_utils.h" #include #include -#include namespace acoustic { @@ -20,24 +19,35 @@ void apply_hann_window(float* data, size_t n) { } } -// Simplified DFT-based power spectrum (no external FFT library needed) -// For n_fft = 2048 this is fast enough for demo/offline processing. +// Compute power spectrum via DFT. +// For n_fft = 2048 this is fast enough for offline/demo use. +// Twiddle factors are cached in a static lookup table per n_fft size. void compute_power_spectrum(const float* frame, int n_fft, float* power_out) { int bins = n_fft / 2 + 1; - std::vector cos_table(n_fft); - std::vector sin_table(n_fft); - for (int k = 0; k < n_fft; ++k) { - float angle = -2.0f * static_cast(M_PI) * k / n_fft; - cos_table[k] = std::cos(angle); - sin_table[k] = std::sin(angle); + + // Build or reuse cached twiddle table for this n_fft + static thread_local std::vector cos_table; + static thread_local std::vector sin_table; + static thread_local int cached_nfft = 0; + + if (cached_nfft != n_fft) { + cached_nfft = n_fft; + cos_table.resize(static_cast(n_fft)); + sin_table.resize(static_cast(n_fft)); + for (int k = 0; k < n_fft; ++k) { + float angle = -2.0f * static_cast(M_PI) * k / n_fft; + cos_table[k] = std::cos(angle); + sin_table[k] = std::sin(angle); + } } + for (int b = 0; b < bins; ++b) { float real = 0.0f; float imag = 0.0f; for (int n = 0; n < n_fft; ++n) { - float angle = -2.0f * static_cast(M_PI) * b * n / n_fft; - real += frame[n] * std::cos(angle); - imag += frame[n] * std::sin(angle); + int idx = (b * n) % n_fft; + real += frame[n] * cos_table[idx]; + imag += frame[n] * sin_table[idx]; } power_out[b] = real * real + imag * imag; } diff --git a/src/drone-software/src/acoustic/src/core/gcc_phat_localizer.cpp b/src/drone-software/src/acoustic/src/core/gcc_phat_localizer.cpp index 9a3ca11..66194a0 100644 --- a/src/drone-software/src/acoustic/src/core/gcc_phat_localizer.cpp +++ b/src/drone-software/src/acoustic/src/core/gcc_phat_localizer.cpp @@ -4,11 +4,55 @@ #include #include -extern "C" { -#include "kiss_fft.h" +namespace acoustic { + +namespace { + +// Simple iterative Cooley-Tukey FFT for power-of-2 sizes. +// Sufficient for GCC-PHAT delay estimation. +struct Complex { float r = 0.0f, i = 0.0f; }; + +void fft_iterative(Complex* data, int n, bool inverse) { + int j = 0; + for (int i = 1; i < n; ++i) { + int bit = n >> 1; + for (; j & bit; bit >>= 1) j ^= bit; + j ^= bit; + if (i < j) std::swap(data[i], data[j]); + } + for (int len = 2; len <= n; len <<= 1) { + float ang = 2.0f * static_cast(M_PI) / len * (inverse ? 1.0f : -1.0f); + float wlen_r = std::cos(ang); + float wlen_i = std::sin(ang); + for (int i = 0; i < n; i += len) { + float wr = 1.0f, wi = 0.0f; + for (int k = 0; k < len / 2; ++k) { + int u = i + k; + int v = i + k + len / 2; + float ur = data[u].r, ui = data[u].i; + float vr = data[v].r * wr - data[v].i * wi; + float vi = data[v].r * wi + data[v].i * wr; + data[u].r = ur + vr; + data[u].i = ui + vi; + data[v].r = ur - vr; + data[v].i = ui - vi; + float wnr = wr * wlen_r - wi * wlen_i; + float wni = wr * wlen_i + wi * wlen_r; + wr = wnr; + wi = wni; + } + } + } + if (inverse) { + float inv_n = 1.0f / n; + for (int i = 0; i < n; ++i) { + data[i].r *= inv_n; + data[i].i *= inv_n; + } + } } -namespace acoustic { +} // anonymous namespace struct GccPhatLocalizer::Impl { MicArrayConfig mic_config; @@ -45,16 +89,13 @@ struct GccPhatLocalizer::Impl { int nfft = 1; while (nfft < static_cast(2 * n)) nfft <<= 1; - kiss_fft_cfg cfg_fwd = kiss_fft_alloc(nfft, 0, nullptr, nullptr); - kiss_fft_cfg cfg_inv = kiss_fft_alloc(nfft, 1, nullptr, nullptr); - std::vector a(nfft), b(nfft), c(nfft); - + std::vector a(nfft), b(nfft), c(nfft); for (int i = 0; i < nfft; ++i) { a[i].r = (i < static_cast(n)) ? ch1[i] : 0.0f; a[i].i = 0; b[i].r = (i < static_cast(n)) ? ch2[i] : 0.0f; b[i].i = 0; } - kiss_fft(cfg_fwd, a.data(), a.data()); - kiss_fft(cfg_fwd, b.data(), b.data()); + fft_iterative(a.data(), nfft, false); + fft_iterative(b.data(), nfft, false); for (int i = 0; i < nfft; ++i) { float real = a[i].r * b[i].r + a[i].i * b[i].i; @@ -63,7 +104,7 @@ struct GccPhatLocalizer::Impl { c[i].r = real / mag; c[i].i = imag / mag; } - kiss_fft(cfg_inv, c.data(), c.data()); + fft_iterative(c.data(), nfft, true); float max_val = -1e30f; int max_idx = 0; @@ -86,9 +127,6 @@ struct GccPhatLocalizer::Impl { } float delay_samples = max_idx + p; float delay_sec = delay_samples / static_cast(sample_rate); - - kiss_fft_free(cfg_fwd); - kiss_fft_free(cfg_inv); return delay_sec; } diff --git a/src/drone-software/src/acoustic/src/core/gunshot_classifier.cpp b/src/drone-software/src/acoustic/src/core/gunshot_classifier.cpp index fdefd3f..494a83e 100644 --- a/src/drone-software/src/acoustic/src/core/gunshot_classifier.cpp +++ b/src/drone-software/src/acoustic/src/core/gunshot_classifier.cpp @@ -8,8 +8,6 @@ #ifdef _WIN32 #include -#include -#include #endif #include @@ -72,8 +70,12 @@ struct GunshotClassifier::Impl { bool LoadModel(const std::string& path) { #ifdef _WIN32 - std::wstring_convert> converter; - std::wstring wpath = converter.from_bytes(path); + int wlen = MultiByteToWideChar(CP_UTF8, 0, path.c_str(), -1, nullptr, 0); + std::wstring wpath; + if (wlen > 0) { + wpath.resize(static_cast(wlen)); + MultiByteToWideChar(CP_UTF8, 0, path.c_str(), -1, &wpath[0], wlen); + } try { session = std::make_unique(env, wpath.c_str(), *session_options); } catch (const Ort::Exception& e) { diff --git a/src/drone-software/src/acoustic/tests/demo_offline.cpp b/src/drone-software/src/acoustic/tests/demo_offline.cpp index a863f6d..f090700 100644 --- a/src/drone-software/src/acoustic/tests/demo_offline.cpp +++ b/src/drone-software/src/acoustic/tests/demo_offline.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include "acoustic_analyzer/core/feature_extractor.h" #include "acoustic_analyzer/core/gunshot_classifier.h" @@ -15,6 +16,14 @@ namespace fs = std::filesystem; using namespace acoustic; +struct Prediction { + std::string file_path; + std::string true_label; + std::string pred_label; + float confidence = 0.0f; + float distance = -1.0f; +}; + void print_usage(const char* prog) { std::cerr << "Usage: " << prog << " [--model ] [--label_map ]" << std::endl; } @@ -24,6 +33,14 @@ bool ends_with(const std::string& s, const std::string& suffix) { return s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0; } +std::string get_parent_folder_name(const std::string& path) { + fs::path p(path); + if (p.has_parent_path()) { + return p.parent_path().filename().string(); + } + return ""; +} + float compute_spl(const std::vector& audio) { if (audio.empty()) return -100.0f; float rms = 0.0f; @@ -32,58 +49,134 @@ float compute_spl(const std::vector& audio) { return 20.0f * std::log10(rms + 1e-10f) + 94.0f; } -void process_file(const std::string& path, - const std::string& model_path, - const std::string& label_map_path) { +Prediction process_file(const std::string& path, + GunshotClassifier& classifier) { + Prediction result; + result.file_path = path; + result.true_label = get_parent_folder_name(path); + WavFileSource wav(path); if (!wav.open()) { std::cerr << "[SKIP] Cannot open: " << path << std::endl; - return; + result.pred_label = "error"; + return result; } int sr = wav.sample_rate(); std::vector> audio; size_t chunk = static_cast(sr * 2.0); size_t got = wav.read(audio, chunk); - if (got == 0 || audio.empty()) return; + if (got == 0 || audio.empty()) { + result.pred_label = "empty"; + return result; + } - // Feature extraction FeatureExtractor extractor(sr, 2048, 512, 64, 0.0f, 8000.0f, 0.97f); Eigen::MatrixXf mel = extractor.MelSpectrogram(audio[0]); - // Classification - ClassifierConfig cc; - cc.model_path = model_path; - cc.label_map_path = label_map_path; - cc.threshold = 0.5f; - cc.smoothing_window = 1; - GunshotClassifier classifier(cc); - if (!classifier.IsLoaded()) { - std::cerr << "[ERROR] Failed to load classifier model" << std::endl; - return; - } - auto [label, confidence] = classifier.Predict(mel); + result.pred_label = label; + result.confidence = confidence; - // Distance estimation - float distance = -1.0f; - float distance_conf = 0.0f; if (label != "ambient" && confidence > 0.5f) { DistanceConfig dc; dc.ref_spl_gunshot = 150.0f; dc.attenuation_alpha = 0.6f; DistanceEstimator de(dc); float spl = compute_spl(audio[0]); - distance = de.Estimate(spl, label); - distance = de.UpdateKalman(distance); - distance_conf = 0.8f; + float dist = de.Estimate(spl, label); + dist = de.UpdateKalman(dist); + result.distance = dist; } - std::cout << "File: " << path << std::endl; - std::cout << " -> Label: " << label + std::cout << "File: " << fs::path(path).filename().string() + << " | True: " << result.true_label + << " | Pred: " << label << " | Conf: " << std::fixed << std::setprecision(4) << confidence - << " | Dist: " << std::setprecision(2) << distance << "m" - << " | DConf: " << distance_conf << std::endl; + << " | Dist: " << std::setprecision(2) << result.distance << "m" << std::endl; + return result; +} + +void collect_wav_files(const std::string& target, std::vector& out) { + if (fs::is_regular_file(target) && ends_with(target, ".wav")) { + out.push_back(target); + return; + } + if (!fs::is_directory(target)) return; + + for (const auto& entry : fs::recursive_directory_iterator(target)) { + if (entry.is_regular_file() && ends_with(entry.path().string(), ".wav")) { + out.push_back(entry.path().string()); + } + } + std::sort(out.begin(), out.end()); +} + +void print_report(const std::vector& results) { + std::map total_by_true; + std::map correct_by_true; + std::map conf_sum_by_true; + std::map> confusion; + + int total = 0, correct = 0; + for (const auto& r : results) { + if (r.pred_label == "error" || r.pred_label == "empty") continue; + total++; + total_by_true[r.true_label]++; + conf_sum_by_true[r.true_label] += r.confidence; + confusion[r.true_label][r.pred_label]++; + if (r.true_label == r.pred_label) { + correct++; + correct_by_true[r.true_label]++; + } + } + + std::cout << "\n==========================================" << std::endl; + std::cout << " VALIDATION REPORT" << std::endl; + std::cout << "==========================================" << std::endl; + std::cout << "Total samples: " << total << std::endl; + std::cout << "Correct: " << correct << std::endl; + std::cout << "Accuracy: " << std::fixed << std::setprecision(2) + << (total > 0 ? 100.0f * correct / total : 0.0f) << "%" << std::endl; + + std::cout << "\nPer-class breakdown:" << std::endl; + for (const auto& kv : total_by_true) { + const std::string& cls = kv.first; + int cls_total = kv.second; + int cls_correct = correct_by_true[cls]; + float avg_conf = conf_sum_by_true[cls] / cls_total; + std::cout << " " << std::setw(10) << std::left << cls + << " Count: " << std::setw(3) << cls_total + << " Correct: " << std::setw(3) << cls_correct + << " Acc: " << std::setw(6) << std::fixed << std::setprecision(2) + << (100.0f * cls_correct / cls_total) << "%" + << " AvgConf: " << std::setprecision(4) << avg_conf << std::endl; + } + + std::cout << "\nConfusion matrix (rows=true, cols=pred):" << std::endl; + std::vector labels; + for (const auto& row : confusion) labels.push_back(row.first); + for (const auto& row : confusion) { + for (const auto& col : row.second) { + if (std::find(labels.begin(), labels.end(), col.first) == labels.end()) { + labels.push_back(col.first); + } + } + } + std::sort(labels.begin(), labels.end()); + + std::cout << std::setw(12) << " "; + for (const auto& l : labels) std::cout << std::setw(10) << l; + std::cout << std::endl; + for (const auto& true_l : labels) { + std::cout << std::setw(10) << std::left << true_l << " "; + for (const auto& pred_l : labels) { + int count = confusion.count(true_l) ? confusion[true_l].count(pred_l) ? confusion[true_l].at(pred_l) : 0 : 0; + std::cout << std::setw(10) << count; + } + std::cout << std::endl; + } + std::cout << "==========================================" << std::endl; } int main(int argc, char** argv) { @@ -101,19 +194,35 @@ int main(int argc, char** argv) { else if (std::strcmp(argv[i], "--label_map") == 0 && i + 1 < argc) label_map_path = argv[++i]; } - if (fs::is_directory(target)) { - std::vector files; - for (const auto& entry : fs::directory_iterator(target)) { - if (entry.is_regular_file() && ends_with(entry.path().string(), ".wav")) { - files.push_back(entry.path()); - } - } - std::sort(files.begin(), files.end()); - for (const auto& f : files) { - process_file(f.string(), model_path, label_map_path); - } - } else { - process_file(target, model_path, label_map_path); + ClassifierConfig cc; + cc.model_path = model_path; + cc.label_map_path = label_map_path; + cc.threshold = 0.5f; + cc.smoothing_window = 1; + GunshotClassifier classifier(cc); + if (!classifier.IsLoaded()) { + std::cerr << "[ERROR] Failed to load classifier model: " << model_path << std::endl; + return 1; + } + std::cout << "Model loaded: " << model_path << std::endl; + std::cout << "Labels: "; + for (const auto& l : classifier.Labels()) std::cout << l << " "; + std::cout << "\n" << std::endl; + + std::vector files; + collect_wav_files(target, files); + if (files.empty()) { + std::cerr << "No .wav files found in: " << target << std::endl; + return 1; } + std::cout << "Found " << files.size() << " WAV file(s)." << std::endl; + + std::vector results; + results.reserve(files.size()); + for (const auto& f : files) { + results.push_back(process_file(f, classifier)); + } + + print_report(results); return 0; }