perf(mir): Lowering 直接生成 AddShiftRR——x+(x*2^n) → add x,x,lsl#n

在 Lowering 阶段检测 Add 的某个操作数是 Mul by 幂次方常量,
直接发射 AddShiftRR 而非分开发射 ShlRR+AddRR。
配合 Peephole 残余合并,共生成 5 处 add lsl。
lzk
lzkk 5 days ago
parent 8d0c5ebcd0
commit 422f1848fc

@ -410,6 +410,52 @@ namespace mir
int dst = function.CreateVReg(VRegClass::Int);
// AddShift 折叠x + (x * 2^n) → add x, x, lsl #n
if (opcode == Opcode::AddRR)
{
auto *mul_rhs2 = dynamic_cast<const ir::BinaryInst *>(bin->GetRhs());
if (mul_rhs2 && mul_rhs2->GetOpcode() == ir::Opcode::Mul)
{
int shift_val = 0;
if (TryGetConstantInt(mul_rhs2->GetRhs(), shift_val) &&
shift_val > 0 && (shift_val & (shift_val - 1)) == 0)
{
// rhs is x * 2^n, lhs should match x
int mul_lhs = EmitIntValue(mul_rhs2->GetLhs(), function, value_vregs,
scalar_slots, array_slots, block);
int add_lhs = EmitIntValue(bin->GetLhs(), function, value_vregs,
scalar_slots, array_slots, block);
int sh = 0; while (shift_val > 1) { shift_val >>= 1; ++sh; }
block.Append(Opcode::AddShiftRR,
{Operand::VReg(dst, VRegClass::Int),
Operand::VReg(add_lhs, VRegClass::Int),
Operand::VReg(mul_lhs, VRegClass::Int),
Operand::Imm(sh)});
value_vregs[value] = dst;
value_vregs[mul_rhs2] = dst;
return dst;
}
if (TryGetConstantInt(mul_rhs2->GetLhs(), shift_val) &&
shift_val > 0 && (shift_val & (shift_val - 1)) == 0)
{
// rhs is 2^n * x, lhs should match x
int mul_rhs = EmitIntValue(mul_rhs2->GetRhs(), function, value_vregs,
scalar_slots, array_slots, block);
int add_lhs = EmitIntValue(bin->GetLhs(), function, value_vregs,
scalar_slots, array_slots, block);
int sh = 0; while (shift_val > 1) { shift_val >>= 1; ++sh; }
block.Append(Opcode::AddShiftRR,
{Operand::VReg(dst, VRegClass::Int),
Operand::VReg(add_lhs, VRegClass::Int),
Operand::VReg(mul_rhs, VRegClass::Int),
Operand::Imm(sh)});
value_vregs[value] = dst;
value_vregs[mul_rhs2] = dst;
return dst;
}
}
}
// Madd 折叠sum + (a * b) → madd sum, a, b, sum必须在 EmitIntValue 之前)
if (opcode == Opcode::AddRR)
{

Loading…
Cancel
Save