From 4403dc08b82af54a8edd19172503faa7f5f4840e Mon Sep 17 00:00:00 2001 From: lzkk <956449176@qq.com> Date: Thu, 28 May 2026 16:12:27 +0800 Subject: [PATCH] =?UTF-8?q?perf(mir):=20=E5=B8=B8=E9=87=8F=E9=99=A4?= =?UTF-8?q?=E6=B3=95=E9=AD=94=E6=B3=95=E6=95=B0=E4=BC=98=E5=8C=96=E2=80=94?= =?UTF-8?q?=E2=80=94smull+asr=20=E6=9B=BF=E4=BB=A3=20sdiv?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 实现 Hacker's Delight 有符号除法魔法数算法,将 x/C 替换为乘法逆元序列。 32/60 性能测试受益,sdiv 全部消除(仅剩变量除法无法优化)。 - M 为正(MSB=0):smull + asr #(32+s) + sub 符号修正 - M 为负(MSB=1):smull + lsr #32 + add + asr #s + sub 修正 - ModRR 同样受益:q = magic_div(x,C); r = x - q*C (msub) - 添加 Umull 操作码(与 Smull 对称,为后续优化预留) - 性能分 33→55(+65%),几何平均 0.55→0.55(因 gcc ref 更多成功运行) --- src/include/mir/MIR.h | 1 + src/mir/AsmPrinter.cpp | 16 ++++ src/mir/Lowering.cpp | 192 ++++++++++++++++++++++++++++++++++++++++- src/mir/RegAlloc.cpp | 1 + 4 files changed, 208 insertions(+), 2 deletions(-) diff --git a/src/include/mir/MIR.h b/src/include/mir/MIR.h index 2c9cd6ea..a0864890 100644 --- a/src/include/mir/MIR.h +++ b/src/include/mir/MIR.h @@ -169,6 +169,7 @@ namespace mir Csel, Csneg, Smull, + Umull, Msub, NegRR, FAddRR, diff --git a/src/mir/AsmPrinter.cpp b/src/mir/AsmPrinter.cpp index f76e4a1b..dc94fa16 100644 --- a/src/mir/AsmPrinter.cpp +++ b/src/mir/AsmPrinter.cpp @@ -744,6 +744,22 @@ namespace mir } return; + case Opcode::Umull: + if (operands.size() >= 3) + { + os << " umull "; + if (operands[0].GetKind() == Operand::Kind::Reg && IsWReg(operands[0].GetReg())) + os << PhysRegName(static_cast(static_cast(operands[0].GetReg()) + 31)); + else + PrintOperand(operands[0], os); + os << ", "; + PrintOperand(operands[1], os); + os << ", "; + PrintOperand(operands[2], os); + os << "\n"; + } + return; + case Opcode::Msub: case Opcode::Madd: if (operands.size() >= 4) diff --git a/src/mir/Lowering.cpp b/src/mir/Lowering.cpp index a6ea0660..471be3f8 100644 --- a/src/mir/Lowering.cpp +++ b/src/mir/Lowering.cpp @@ -253,6 +253,140 @@ namespace mir return false; } + // 常量除法的魔法数计算结果 + struct SignedDivMagic + { + unsigned M; // 魔法数(32 位无符号) + int s; // 移位量 + }; + + // 计算有符号除法 x/d 的魔法数(Hacker's Delight 第 10 章) + // d 不能为 0, 1, -1, -2^31, 或 2 的幂(上层已过滤) + // 返回 {M, s},其中 M 为 32 位无符号值,s 为后续移位量 + static SignedDivMagic ComputeSignedDivMagic(int d) + { + const unsigned two31 = 0x80000000; + unsigned ad = d > 0 ? d : -d; + unsigned t = two31 + (static_cast(d) >> 31); + unsigned anc = t - 1 - t % ad; + int p = 31; + unsigned q1 = two31 / anc; + unsigned r1 = two31 - q1 * anc; + unsigned q2 = two31 / ad; + unsigned r2 = two31 - q2 * ad; + + for (;;) + { + ++p; + q1 = 2 * q1; + r1 = 2 * r1; + if (r1 >= anc) { ++q1; r1 -= anc; } + q2 = 2 * q2; + r2 = 2 * r2; + if (r2 >= ad) { ++q2; r2 -= ad; } + unsigned delta = ad - r2; + if (q1 < delta || (q1 == delta && r1 == 0)) + continue; + break; + } + + SignedDivMagic mag; + mag.M = q2 + 1; + if (d < 0) mag.M = -mag.M; + mag.s = p - 32; + return mag; + } + + // 发射魔法数除法序列:x / d(d 为常量,d != 1/-1/2^n) + // 返回 dst vreg + static void EmitMagicDiv(int dst, int lhs, int d, MachineFunction &function, + MachineBasicBlock &block) + { + auto mag = ComputeSignedDivMagic(d); + int magic_vreg = function.CreateVReg(VRegClass::Int); + int sign_vreg = function.CreateVReg(VRegClass::Int); + + // 加载魔法数常量 + block.Append(Opcode::MovImm, + {Operand::VReg(magic_vreg, VRegClass::Int), + Operand::Imm(static_cast(mag.M))}) + .SetRematerializable(true).SetRematImm(static_cast(mag.M)); + + if ((mag.M & 0x80000000) == 0) + { + // Case A: M 为正(signed 32-bit 正数) + // smull xh, w_n, w_M → 64-bit 有符号乘积 + int xh_vreg = function.CreateVReg(VRegClass::Int); + block.Append(Opcode::Smull, + {Operand::VReg(xh_vreg, VRegClass::Int), + Operand::VReg(lhs, VRegClass::Int), + Operand::VReg(magic_vreg, VRegClass::Int)}); + + // asr/lsr xh, xh, #(32+s) + int total_shift = 32 + mag.s; + if (mag.s == 0) + block.Append(Opcode::Lsr64RR, + {Operand::VReg(xh_vreg, VRegClass::Int), + Operand::VReg(xh_vreg, VRegClass::Int), + Operand::Imm(total_shift)}); + else + block.Append(Opcode::Asr64RR, + {Operand::VReg(xh_vreg, VRegClass::Int), + Operand::VReg(xh_vreg, VRegClass::Int), + Operand::Imm(total_shift)}); + + // sub w_dst, w_h, w_n, asr #31(符号修正) + block.Append(Opcode::AsrRR, + {Operand::VReg(sign_vreg, VRegClass::Int), + Operand::VReg(lhs, VRegClass::Int), + Operand::Imm(31)}); + block.Append(Opcode::SubRR, + {Operand::VReg(dst, VRegClass::Int), + Operand::VReg(xh_vreg, VRegClass::Int), + Operand::VReg(sign_vreg, VRegClass::Int)}); + } + else + { + // Case B: M 的 MSB 为 1(signed 32-bit 中为负数) + // smull xh, w_n, w_M → 有符号 64 位乘积(M 解释为有符号负数) + int xh_vreg = function.CreateVReg(VRegClass::Int); + block.Append(Opcode::Smull, + {Operand::VReg(xh_vreg, VRegClass::Int), + Operand::VReg(lhs, VRegClass::Int), + Operand::VReg(magic_vreg, VRegClass::Int)}); + + // lsr xh, xh, #32 → 取高 32 位(无符号移位) + block.Append(Opcode::Lsr64RR, + {Operand::VReg(xh_vreg, VRegClass::Int), + Operand::VReg(xh_vreg, VRegClass::Int), + Operand::Imm(32)}); + + // add w_tmp, w_n, w_h + int tmp_vreg = function.CreateVReg(VRegClass::Int); + block.Append(Opcode::AddRR, + {Operand::VReg(tmp_vreg, VRegClass::Int), + Operand::VReg(lhs, VRegClass::Int), + Operand::VReg(xh_vreg, VRegClass::Int)}); + + // asr w_tmp, w_tmp, #s + if (mag.s > 0) + block.Append(Opcode::AsrRR, + {Operand::VReg(tmp_vreg, VRegClass::Int), + Operand::VReg(tmp_vreg, VRegClass::Int), + Operand::Imm(mag.s)}); + + // sub w_dst, w_tmp, w_n, asr #31(符号修正) + block.Append(Opcode::AsrRR, + {Operand::VReg(sign_vreg, VRegClass::Int), + Operand::VReg(lhs, VRegClass::Int), + Operand::Imm(31)}); + block.Append(Opcode::SubRR, + {Operand::VReg(dst, VRegClass::Int), + Operand::VReg(tmp_vreg, VRegClass::Int), + Operand::VReg(sign_vreg, VRegClass::Int)}); + } + } + static int EmitIntValue(const ir::Value *value, MachineFunction &function, ValueVRegMap &value_vregs, const LocalScalarMap &scalar_slots, const LocalArrayMap &array_slots, MachineBasicBlock &block); @@ -636,6 +770,25 @@ namespace mir value_vregs[value] = dst; return dst; } + + // 魔法数除法:x / C(C 不是 1/-1/2^n) + if (val > 2 && (val & (val - 1)) != 0) + { + EmitMagicDiv(dst, lhs, val, function, block); + value_vregs[value] = dst; + return dst; + } + // 负常量除法:x / (-C) → -(x / C) + if (val < -1 && ((-val) & ((-val) - 1)) != 0) + { + int pos_dst = function.CreateVReg(VRegClass::Int); + EmitMagicDiv(pos_dst, lhs, -val, function, block); + block.Append(Opcode::NegRR, + {Operand::VReg(dst, VRegClass::Int), + Operand::VReg(pos_dst, VRegClass::Int)}); + value_vregs[value] = dst; + return dst; + } } } @@ -727,10 +880,45 @@ namespace mir value_vregs[value] = dst; return dst; } + + // 魔法数取模:x % C = x - (x / C) * C + if (val > 2 && (val & (val - 1)) != 0) + { + int q_vreg = function.CreateVReg(VRegClass::Int); + EmitMagicDiv(q_vreg, lhs, val, function, block); + int d_vreg = function.CreateVReg(VRegClass::Int); + block.Append(Opcode::MovImm, + {Operand::VReg(d_vreg, VRegClass::Int), + Operand::Imm(val)}) + .SetRematerializable(true).SetRematImm(val); + block.Append(Opcode::Msub, + {Operand::VReg(dst, VRegClass::Int), + Operand::VReg(q_vreg, VRegClass::Int), + Operand::VReg(d_vreg, VRegClass::Int), + Operand::VReg(lhs, VRegClass::Int)}); + value_vregs[value] = dst; + return dst; + } + if (val < -1 && ((-val) & ((-val) - 1)) != 0) + { + int pos_val = -val; + int q_vreg = function.CreateVReg(VRegClass::Int); + EmitMagicDiv(q_vreg, lhs, pos_val, function, block); + int d_vreg = function.CreateVReg(VRegClass::Int); + block.Append(Opcode::MovImm, + {Operand::VReg(d_vreg, VRegClass::Int), + Operand::Imm(pos_val)}) + .SetRematerializable(true).SetRematImm(pos_val); + block.Append(Opcode::Msub, + {Operand::VReg(dst, VRegClass::Int), + Operand::VReg(q_vreg, VRegClass::Int), + Operand::VReg(d_vreg, VRegClass::Int), + Operand::VReg(lhs, VRegClass::Int)}); + value_vregs[value] = dst; + return dst; + } } } - - // 立即数折叠:AddRR/SubRR 的操作数 2 如果是常量且 <=4095,直接用 Imm int imm_val = 0; if ((opcode == Opcode::AddRR || opcode == Opcode::SubRR) && TryGetConstantInt(bin->GetRhs(), imm_val)) diff --git a/src/mir/RegAlloc.cpp b/src/mir/RegAlloc.cpp index d68b482f..d0b649ba 100644 --- a/src/mir/RegAlloc.cpp +++ b/src/mir/RegAlloc.cpp @@ -234,6 +234,7 @@ namespace mir break; case Opcode::Smull: + case Opcode::Umull: if (ops.size() >= 3) { if (ops[0].GetKind() == Operand::Kind::VReg)