software/generate_blog_doc.js

const { Document, Packer, Paragraph, TextRun, Table, TableRow, TableCell,
        HeadingLevel, AlignmentType, BorderStyle, WidthType, ShadingType,
        PageBreak, Header, Footer, PageNumber } = require('docx');
const fs = require('fs');

const border = { style: BorderStyle.SINGLE, size: 1, color: "CCCCCC" };
const borders = { top: border, bottom: border, left: border, right: border };

function cell(text, width, opts = {}) {
    return new TableCell({
        borders,
        width: { size: width, type: WidthType.DXA },
        shading: opts.shading ? { fill: opts.shading, type: ShadingType.CLEAR } : undefined,
        margins: { top: 60, bottom: 60, left: 100, right: 100 },
        children: [new Paragraph({
            children: [new TextRun({ text, bold: opts.bold, size: 20, font: "微软雅黑" })]
        })]
    });
}

function h1(text) {
    return new Paragraph({
        heading: HeadingLevel.HEADING_1,
        children: [new TextRun({ text, bold: true, size: 32, font: "微软雅黑", color: "1a1a2e" })]
    });
}

function h2(text) {
    return new Paragraph({
        heading: HeadingLevel.HEADING_2,
        children: [new TextRun({ text, bold: true, size: 28, font: "微软雅黑", color: "2d3436" })]
    });
}

function h3(text) {
    return new Paragraph({
        heading: HeadingLevel.HEADING_3,
        children: [new TextRun({ text, bold: true, size: 24, font: "微软雅黑", color: "2d3436" })]
    });
}

function body(text, opts = {}) {
    return new Paragraph({
        children: [new TextRun({ text, size: 21, font: "微软雅黑", ...opts })],
        spacing: { after: 120, line: 360 }
    });
}

function quote(text) {
    return new Paragraph({
        children: [new TextRun({ text, italics: true, size: 21, font: "微软雅黑", color: "636e72" })],
        spacing: { after: 120, line: 360 },
        indent: { left: 400 }
    });
}

function code(text) {
    return new Paragraph({
        children: [new TextRun({ text, size: 18, font: "Consolas", color: "2d3436" })],
        shading: { fill: "f5f6fa", type: ShadingType.CLEAR },
        spacing: { after: 60 }
    });
}

const doc = new Document({
    styles: {
        default: { document: { run: { font: "微软雅黑", size: 21 } } },
        paragraphStyles: [
            { id: "Heading1", name: "Heading 1", basedOn: "Normal", next: "Normal", quickFormat: true,
              run: { size: 32, bold: true, font: "微软雅黑", color: "1a1a2e" },
              paragraph: { spacing: { before: 300, after: 200 }, outlineLevel: 0 } },
            { id: "Heading2", name: "Heading 2", basedOn: "Normal", next: "Normal", quickFormat: true,
              run: { size: 28, bold: true, font: "微软雅黑", color: "2d3436" },
              paragraph: { spacing: { before: 240, after: 160 }, outlineLevel: 1 } },
            { id: "Heading3", name: "Heading 3", basedOn: "Normal", next: "Normal", quickFormat: true,
              run: { size: 24, bold: true, font: "微软雅黑", color: "2d3436" },
              paragraph: { spacing: { before: 200, after: 120 }, outlineLevel: 2 } },
        ]
    },
    sections: [{
        properties: {
            page: { size: { width: 11906, height: 16838 }, margin: { top: 1440, right: 1440, bottom: 1440, left: 1440 } }
        },
        headers: {
            default: new Header({ children: [new Paragraph({
                children: [new TextRun({ text: "知识荟 · 技术博客", size: 16, color: "888888", font: "微软雅黑" })]
            })] })
        },
        footers: {
            default: new Footer({ children: [new Paragraph({
                alignment: AlignmentType.CENTER,
                children: [new TextRun({ children: ["第 ", PageNumber.CURRENT, " 页"], size: 16, color: "888888", font: "微软雅黑" })]
            })] })
        },
        children: [
            // 标题
            new Paragraph({ spacing: { before: 600 } }),
            new Paragraph({
                alignment: AlignmentType.CENTER,
                children: [new TextRun({ text: "基于 C++ 的无人机声源分析模块设计与实现", bold: true, size: 40, font: "微软雅黑", color: "1a1a2e" })]
            }),
            new Paragraph({
                alignment: AlignmentType.CENTER,
                children: [new TextRun({ text: "——「智途投送」战场末端补给系统的多模态感知实践", size: 26, font: "微软雅黑", color: "636e72" })],
                spacing: { after: 400 }
            }),
            new Paragraph({
                alignment: AlignmentType.CENTER,
                children: [new TextRun({ text: "作者：国防科大计算机学院 23 级软件工程小班", size: 21, font: "微软雅黑", color: "636e72" })]
            }),
            new Paragraph({
                alignment: AlignmentType.CENTER,
                children: [new TextRun({ text: "2026 年 4 月", size: 21, font: "微软雅黑", color: "636e72" })],
                spacing: { after: 600 }
            }),

            // 摘要
            h1("摘要"),
            body("在城市作战环境下，战场末端补给的「最后一公里」始终是制约作战效能的关键瓶颈。本文介绍「智途投送」软件系统中声源分析模块（Acoustic Analyzer）的设计与实现过程。该模块通过麦克风阵列采集音频信号，结合 GCC-PHAT 声源定位算法与 ONNX Runtime 神经网络推理引擎，实现了枪炮声的实时识别、方位估计与距离推算。模块采用 C++17 开发，遵循构件化设计原则，核心算法零 ROS 依赖，支持临时方案（手机单通道）与最终方案（麦克风阵列）的无缝切换。本文详细阐述了系统架构、核心算法、工程实践中的关键决策及踩坑记录，为同类嵌入式感知系统开发提供参考。"),
            new Paragraph({ spacing: { before: 100 } }),
            body("关键词：无人机；声源定位；GCC-PHAT；ONNX Runtime；构件化设计；战场感知", { bold: true }),

            // 一、项目背景
            h1("一、项目背景与需求分析"),
            h2("1.1 战场末端补给的痛点"),
            body("近年来多场局部战争的实战经验反复证明，后勤补给的「最后一公里」已成为制约战场持续作战能力的核心瓶颈。2023 年以哈战争中，以军在加沙城市巷战环境遭遇严重后勤困境；2022 年俄乌冲突中，前线部队长期面临弹药、食品、急救药品严重短缺。传统有人运输在最后几公里内频繁遭遇炮火覆盖，伤亡率极高。即便引入无人机等无人平台，也因缺乏统一调度、智能路径规划和安全投放策略，末端配送效率低下、协调困难。"),
            body("针对这一现实挑战，我们团队设计开发了「智途投送」智能化末端补给系统，通过无人机替代有人运输，结合多模态感知与智能路径规划，实现战场物资的安全精准投送。"),

            h2("1.2 声源分析模块的定位"),
            body("在「智途投送」系统的多模态感知架构中，声源分析模块与视觉分析模块、热成像分析模块并列，共同构成威胁感知层。其具体功能需求包括："),
            body("• 枪炮声识别：区分枪声、炮声、爆炸声与环境噪声"),
            body("• 声源定位：通过麦克风阵列估计威胁源的方位角与俯仰角"),
            body("• 距离估计：基于信号能量衰减模型推算威胁距离"),
            body("• 威胁跟踪：对连续帧检测结果进行关联与生命周期管理"),
            body("• 构件化设计：核心算法与 ROS 解耦，支持独立编译与测试"),

            // 二、系统架构
            h1("二、系统架构设计"),
            h2("2.1 总体架构"),
            body("模块采用三层架构设计，严格遵循「关注点分离」原则："),
            new Paragraph({ spacing: { before: 200 } }),
            code("┌──────────────────────────────────────────┐"),
            code("│  ROS 封装层（acoustic_node）              │"),
            code("│  - 话题订阅：/microphone_array/audio      │"),
            code("│  - 话题发布：/acoustic/threats            │"),
            code("├──────────────────────────────────────────┤"),
            code("│  IO 抽象层（AudioSource 接口）             │"),
            code("│  - WavFileSource     （离线测试）          │"),
            code("│  - MobilePhoneSource （临时方案）[TEMP]    │"),
            code("│  - MicArraySource    （最终方案）[FINAL]   │"),
            code("├──────────────────────────────────────────┤"),
            code("│  Core 算法层（零 ROS 依赖）                │"),
            code("│  Pipeline → {FeatureExtractor,            │"),
            code("│             GunshotClassifier,             │"),
            code("│             GccPhatLocalizer,              │"),
            code("│             DistanceEstimator,             │"),
            code("│             ThreatTracker}                 │"),
            code("└──────────────────────────────────────────┘"),
            new Paragraph({ spacing: { before: 200 } }),

            h2("2.2 构件化设计的工程意义"),
            body("将 Core 层设计为纯 C++ 库（零 ROS 依赖）带来了显著的工程优势："),
            body("1. 可测试性：可在无 ROS 环境中运行单元测试，CI/CD 友好"),
            body("2. 可移植性：未来迁移至 ROS2 时，仅需重写 ROS 层，Core 与 IO 层零改动"),
            body("3. 可复用性：Core 库可独立部署于地面站笔记本，用于离线数据分析"),
            body("4. 可分离性：满足课程项目对「构件化」的要求，模块边界清晰"),

            // 三、核心算法
            h1("三、核心算法详解"),
            h2("3.1 Mel Spectrogram 特征提取"),
            body("由于项目采用 C++ 开发，无法直接使用 Python 生态的 librosa 库。我们在 C++ 端从零实现了完整的 Mel Spectrogram 提取流程："),
            body("预加重 → 分帧加窗（Hamming）→ FFT（Kiss FFT）→ 功率谱 → Mel 滤波器组 → 对数压缩"),
            body("实现中特别注意了与 Python librosa 的参数对齐："),
            new Table({
                width: { size: 9360, type: WidthType.DXA },
                columnWidths: [3000, 3000, 3360],
                rows: [
                    new TableRow({ children: [
                        cell("参数", 3000, { bold: true, shading: "f0f2f5" }),
                        cell("C++ 值", 3000, { bold: true, shading: "f0f2f5" }),
                        cell("librosa 对应", 3360, { bold: true, shading: "f0f2f5" })
                    ] }),
                    new TableRow({ children: [cell("sample_rate", 3000), cell("16000", 3000), cell("sr=16000", 3360)] }),
                    new TableRow({ children: [cell("n_fft", 3000), cell("2048", 3000), cell("n_fft=2048", 3360)] }),
                    new TableRow({ children: [cell("hop_length", 3000), cell("512", 3000), cell("hop_length=512", 3360)] }),
                    new TableRow({ children: [cell("n_mels", 3000), cell("64", 3000), cell("n_mels=64", 3360)] }),
                    new TableRow({ children: [cell("f_max", 3000), cell("8000.0", 3000), cell("fmax=8000", 3360)] }),
                    new TableRow({ children: [cell("preemphasis", 3000), cell("0.97", 3000), cell("coef=0.97", 3360)] }),
                    new TableRow({ children: [cell("window", 3000), cell("Hamming", 3000), cell("window='hamming'", 3360)] }),
                    new TableRow({ children: [cell("center", 3000), cell("false", 3000), cell("center=False", 3360)] }),
                ]
            }),
            new Paragraph({ spacing: { before: 200 } }),
            body("其中 center=false 是关键对齐点。librosa 默认 center=true（帧中心对齐），会导致帧数比 C++ 实现多 1-2 帧。我们在训练脚本中显式设置 center=False，并在 C++ 端使用 (n_samples - n_fft) / hop + 1 的帧数计算，确保训练-推理特征完全一致。"),

            h2("3.2 GCC-PHAT 声源定位"),
            body("GCC-PHAT（Generalized Cross-Correlation with Phase Transform）是一种经典的时延估计（TDOA）算法。其核心思想是通过相位变换加权消除信号幅度的影响，仅保留相位信息用于互相关计算："),
            quote("R_ij(τ) = IFFT{ X_i(f) · X_j*(f) / |X_i(f) · X_j*(f)| }"),
            body("我们在实现中做了以下工程优化："),
            body("• 抛物线插值：在 GCC-PHAT 峰值附近进行抛物线拟合，将时延分辨率从采样点级提升至亚采样级"),
            body("• 最小二乘方向解算：利用多对麦克风的 TDOA 构建超定方程组，通过 SVD 求解声源方向向量"),
            body("• 阵列几何自适应：支持十字、线性、圆形及自定义阵列布局，通过配置文件热切换"),

            h2("3.3 枪炮声分类模型"),
            body("分类器采用轻量级 CNN-GRU 网络结构："),
            code("输入 (1, 64, T) → Conv1D(64→128→256) → MaxPool → GRU(128, bidirectional)"),
            code("        → GlobalAvgPool → Dense(64) → Dropout(0.3) → Dense(4) → Softmax"),
            new Paragraph({ spacing: { before: 200 } }),
            body("模型推理使用 ONNX Runtime C++ API，相比 LibTorch 轻量一个数量级，推理延迟约 10-50ms，满足实时性要求。模型从 PyTorch 训练后导出为 ONNX 格式，支持动态 batch 和动态时间轴。"),

            h2("3.4 距离估计与威胁跟踪"),
            body("距离估计采用能量衰减模型："),
            quote("d = d₀ · 10^((L₀ - L_measured) / (20·α))"),
            body("其中 L₀ 根据分类结果动态选择（枪声 150dB、炮声 180dB、爆炸 170dB），α 为城市环境衰减系数（默认 0.6）。为降低单帧估计噪声，引入一维卡尔曼滤波进行时序平滑。"),
            body("威胁跟踪采用最近邻数据关联算法：连续帧中方位角差 < 15° 且类型一致则判定为同一威胁，分配唯一 ID 并持续跟踪，连续 5 帧未检测到则淘汰。"),

            // 四、工程实践
            h1("四、工程实践与关键决策"),
            h2("4.1 临时方案与最终方案的分离设计"),
            body("项目初期面临一个现实问题：麦克风阵列硬件尚未到位，但需要提前验证算法通路和系统集成。我们设计了一套「source_type」配置切换机制："),
            body("• mobile_phone：手机通过 WiFi/UDP 发送单通道音频，仅做分类检测，定位模块自动跳过"),
            body("• mic_array：4 通道阵列，完整分类+定位+距离估计"),
            body("• wav_file：离线 WAV 回放，用于算法调试"),
            body("这种设计使得团队可以在无硬件条件下并行推进软件开发，硬件到位后仅需修改一行配置即可切换至最终方案。"),

            h2("4.2 踩坑记录"),
            h3("坑 1：librosa center 参数导致的训练-推理不一致"),
            body("初期发现 C++ 端 Mel Spectrogram 与 Python 端存在系统性偏差，排查后发现是 librosa 默认 center=true 导致的帧数差异。修复方案：训练时显式设置 center=False，并在 C++ 端严格对齐分帧逻辑。"),
            h3("坑 2：ONNX 导出动态轴与 torch 2.x 的兼容性"),
            body("torch 2.11 默认使用 dynamo 导出器，对 GRU 层的动态轴支持存在问题。修复方案：使用传统 TorchScript 导出器（dynamo=False），并将 opset 提升至 13。"),
            h3("坑 3：数据增强导致时间维度变化"),
            body("训练脚本中的时间拉伸增强改变了 Mel Spectrogram 的时间帧数，导致 batch 拼接失败。修复方案：增强后统一插值对齐到目标帧数（63 帧）。"),

            // 五、实验验证
            h1("五、实验验证"),
            h2("5.1 合成数据训练验证"),
            body("在硬件到位前，我们使用合成数据集验证了完整的训练-导出-部署流程："),
            new Table({
                width: { size: 9360, type: WidthType.DXA },
                columnWidths: [4000, 5360],
                rows: [
                    new TableRow({ children: [
                        cell("指标", 4000, { bold: true, shading: "f0f2f5" }),
                        cell("结果", 5360, { bold: true, shading: "f0f2f5" })
                    ] }),
                    new TableRow({ children: [cell("数据集", 4000), cell("200 合成样本 + 10 份模拟无人机噪声", 5360)] }),
                    new TableRow({ children: [cell("训练 epoch", 4000), cell("30", 5360)] }),
                    new TableRow({ children: [cell("验证准确率", 4000), cell("100%（合成数据，过拟合预期）", 5360)] }),
                    new TableRow({ children: [cell("ONNX 模型大小", 4000), cell("1.9 MB", 5360)] }),
                    new TableRow({ children: [cell("ONNX 推理验证", 4000), cell("枪声识别置信度 97.92%", 5360)] }),
                    new TableRow({ children: [cell("C++ 编译", 4000), cell("g++ 15.2 + CMake 4.1 通过", 5360)] }),
                ]
            }),
            new Paragraph({ spacing: { before: 200 } }),
            body("需要强调的是，合成数据上的高准确率不代表真实场景性能。当前模型仅用于验证代码通路，后续需用真实数据集（MIVIA、FSD50K 等）重新训练。"),

            h2("5.2 特征一致性验证"),
            body("我们编写了纯 NumPy 参考实现与 C++ FeatureExtractor 进行比对验证。在相同 WAV 输入下，两者 Mel Spectrogram 的逐元素最大误差 < 1e-4，验证了 C++ 端特征提取的正确性。"),

            // 六、总结与展望
            h1("六、总结与展望"),
            h2("6.1 已完成工作"),
            body("• 完成了声源分析模块的全部 C++ 代码开发（34 个文件）"),
            body("• 实现了 Mel Spectrogram、GCC-PHAT、ONNX 推理、距离估计、威胁跟踪五大核心算法"),
            body("• 设计了临时/最终方案分离机制，支持无硬件条件下的软件开发"),
            body("• 在 Windows 上完成了 Python 训练环境搭建、模型训练、ONNX 导出与验证"),
            h2("6.2 后续计划"),
            body("• 下载真实数据集（MIVIA、FSD50K、UrbanSound8K）替换合成数据"),
            body("• 录制无人机自噪声作为 ambient 负样本，解决旋翼噪声误报问题"),
            body("• 在 Ubuntu + ROS Noetic 环境下编译 C++ 代码，完成特征一致性端到端验证"),
            body("• 麦克风阵列硬件到位后，实现 ALSA 驱动并完成实机联调"),
            body("• 考虑模型量化（INT8）以进一步降低 Jetson 平台的推理延迟"),

            // 参考文献
            h1("参考文献"),
            body("[1] Knapp C H, Carter G C. The generalized correlation method for estimation of time delay[J]. IEEE Trans. on ASSP, 1976."),
            body("[2] Microsoft. ONNX Runtime documentation[EB/OL]. https://onnxruntime.ai/docs/"),
            body("[3] McFee B, et al. librosa: Audio and Music Signal Analysis in Python[C]. SciPy, 2015."),
            body("[4] 智途投送软件开发方案（内部文档），国防科大计算机学院，2026."),
        ]
    }]
});

Packer.toBuffer(doc).then(buffer => {
    fs.writeFileSync("基于C++的无人机声源分析模块设计与实现_技术博客.docx", buffer);
    console.log("技术博客已生成：基于C++的无人机声源分析模块设计与实现_技术博客.docx");
});