From acdac5391dd6a511f844b9b3556b42e62b2ff7c4 Mon Sep 17 00:00:00 2001 From: lzkk <956449176@qq.com> Date: Mon, 25 May 2026 21:00:32 +0800 Subject: [PATCH] =?UTF-8?q?fix(backend):=20EmitLargeImmediate=20=E8=B7=B3?= =?UTF-8?q?=E8=BF=87=E5=89=8D=E5=AF=BC=E9=9B=B6=EF=BC=8C=E9=81=BF=E5=85=8D?= =?UTF-8?q?=E5=86=97=E4=BD=99=20movz=20#0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 32-bit 立即数低 16 位为零时(如 0x00020000),直接发射移位 movz 而非 movz #0 + movk 双指令。crypto -7, fft -2, h-4 -1, h-10 -1,总计 -33 条,零退化。 --- src/mir/AsmPrinter.cpp | 5 +++++ 优化记录.md | 12 ++++++++++++ 指令数基线.md | 24 ++++++++++++------------ 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/mir/AsmPrinter.cpp b/src/mir/AsmPrinter.cpp index 827b053f..756deeb1 100644 --- a/src/mir/AsmPrinter.cpp +++ b/src/mir/AsmPrinter.cpp @@ -223,6 +223,11 @@ namespace mir { continue; } + // 跳过前导零——直接用移位后的 movz,避免浪费 movz #0 + if (!emitted && part == 0) + { + continue; + } if (!emitted) { diff --git a/优化记录.md b/优化记录.md index 8580c0e1..1013368e 100644 --- a/优化记录.md +++ b/优化记录.md @@ -45,3 +45,15 @@ - **效果**:functional 测试从 87/88 → **100/100 全部通过** - **已知局限**:30_many_dimensions(19 维多维数组参数)仍失败,该 bug 在降级层(无优化也错),需专项修复 GEP 偏移计算 - **后续**:30_many_dimensions 已知根因在多维数组 GEP 降级,待后续处理 + +--- + +## 2026-05-25 | Movz #0 前导零优化 + +- **类型**:后端(AsmPrinter) +- **假设**:EmitLargeImmediate 中,当 32-bit 立即数的低 16-bit 为零时,应该直接用移位后的 movz,而不是先 `movz #0` 再 `movk`。例如 `0x00020000` → `movz w8, #2, lsl #16` 而非 `movz w8, #0; movk w8, #2, lsl #16` +- **实现**:AsmPrinter.cpp EmitLargeImmediate 循环中,`!emitted && part == 0` 时跳过(3 行),保持底部 `!emitted → mov #0` 兜底处理全零情况 +- **指令数效果**:减少 33 条,crypto -7×3、fft -2×3、h-4 -1×3、h-10 -1×3 +- **退化**:无 +- **功能测试**:100/100 functional 通过,30/31 h_functional 通过(1 个预存故障 30_many_dimensions) +- **已知局限**:仅修复 EmitLargeImmediate;EmitStackAdjust/EmitAddressFromBase 中的 movz 模式仍有同样问题,可后续统一 diff --git a/指令数基线.md b/指令数基线.md index e2640a91..7238b973 100644 --- a/指令数基线.md +++ b/指令数基线.md @@ -32,21 +32,21 @@ | performance/crc1 | 279 | | performance/crc2 | 279 | | performance/crc3 | 279 | -| performance/crypto-1 | 1926 | -| performance/crypto-2 | 1926 | -| performance/crypto-3 | 1926 | -| performance/fft0 | 597 | -| performance/fft1 | 597 | -| performance/fft2 | 597 | +| performance/crypto-1 | 1919 | +| performance/crypto-2 | 1919 | +| performance/crypto-3 | 1919 | +| performance/fft0 | 595 | +| performance/fft1 | 595 | +| performance/fft2 | 595 | | performance/h-1-01 | 157 | | performance/h-1-02 | 157 | | performance/h-1-03 | 157 | -| performance/h-10-01 | 328 | -| performance/h-10-02 | 328 | -| performance/h-10-03 | 328 | -| performance/h-4-01 | 163 | -| performance/h-4-02 | 163 | -| performance/h-4-03 | 163 | +| performance/h-10-01 | 327 | +| performance/h-10-02 | 327 | +| performance/h-10-03 | 327 | +| performance/h-4-01 | 162 | +| performance/h-4-02 | 162 | +| performance/h-4-03 | 162 | | performance/h-5-01 | 338 | | performance/h-5-02 | 338 | | performance/h-5-03 | 338 |