perf: 函数内联 + csneg 取模优化,crypto 1.76x→1.69x

- IR 保守内联:纯算术单 BB leaf 函数自底向上迭代内联
- MIR 取模优化:x%2^n 用 negs+and+and+csneg(6→4 指令)
- 添加 CondCode::MI/PL 支持 csneg 的 mi/pl 条件码
- 修正 NegRR 发射 negs(设标志位)供 csneg 使用
lzk
lzkk 6 days ago
parent 2f1d7dc856
commit 10a59110eb

@ -191,7 +191,9 @@ namespace mir
LT,
LE,
GT,
GE
GE,
MI,
PL
};
class Operand

@ -120,6 +120,10 @@ namespace mir
return "gt";
case CondCode::GE:
return "ge";
case CondCode::MI:
return "mi";
case CondCode::PL:
return "pl";
default:
return "";
}
@ -733,7 +737,7 @@ namespace mir
case Opcode::NegRR:
if (operands.size() >= 2)
{
os << " neg ";
os << " negs ";
PrintOperand(operands[0], os);
os << ", ";
PrintOperand(operands[1], os);

@ -603,128 +603,83 @@ namespace mir
int val = rhs_const->GetValue();
if (val > 0 && (val & (val - 1)) == 0)
{
// x % 2^n → and + sign fixup: (x & mask) then if x<0 subtract 2^n
// x % 2^n -> negs+and+and+csneg4 指令,含零值正确语义)
int mask = val - 1;
int masked = function.CreateVReg(VRegClass::Int);
if (mask <= 4095)
{
int neg = function.CreateVReg(VRegClass::Int);
int pos = function.CreateVReg(VRegClass::Int);
int neg_masked = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::NegRR,
{Operand::VReg(neg, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int)});
if (static_cast<unsigned int>(mask) <= 4095) {
block.Append(Opcode::AndRR,
{Operand::VReg(masked, VRegClass::Int),
{Operand::VReg(pos, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int),
Operand::Imm(mask)});
}
else
{
block.Append(Opcode::AndRR,
{Operand::VReg(neg_masked, VRegClass::Int),
Operand::VReg(neg, VRegClass::Int),
Operand::Imm(mask)});
} else {
int mask_reg = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::MovImm,
{Operand::VReg(mask_reg, VRegClass::Int),
Operand::Imm(mask)}).SetRematerializable(true).SetRematImm(mask);
block.Append(Opcode::AndRR,
{Operand::VReg(masked, VRegClass::Int),
{Operand::VReg(pos, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int),
Operand::VReg(mask_reg, VRegClass::Int)});
block.Append(Opcode::AndRR,
{Operand::VReg(neg_masked, VRegClass::Int),
Operand::VReg(neg, VRegClass::Int),
Operand::VReg(mask_reg, VRegClass::Int)});
}
block.Append(Opcode::CmpImm,
{Operand::VReg(lhs, VRegClass::Int),
Operand::Imm(0)});
int neg_fixup = function.CreateVReg(VRegClass::Int);
if (static_cast<unsigned int>(val) <= 4095)
{
block.Append(Opcode::SubRR,
{Operand::VReg(neg_fixup, VRegClass::Int),
Operand::VReg(masked, VRegClass::Int),
Operand::Imm(val)});
}
else
{
int val_reg = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::MovImm,
{Operand::VReg(val_reg, VRegClass::Int),
Operand::Imm(val)}).SetRematerializable(true).SetRematImm(val);
block.Append(Opcode::SubRR,
{Operand::VReg(neg_fixup, VRegClass::Int),
Operand::VReg(masked, VRegClass::Int),
Operand::VReg(val_reg, VRegClass::Int)});
}
int after_sign = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::Csel,
{Operand::VReg(after_sign, VRegClass::Int),
Operand::VReg(neg_fixup, VRegClass::Int),
Operand::VReg(masked, VRegClass::Int),
Operand::Imm(static_cast<int>(CondCode::LT))});
// 修正:若 masked==0 则结果必须为 0-4 % 2 = 0不是 -2
block.Append(Opcode::CmpImm,
{Operand::VReg(masked, VRegClass::Int),
Operand::Imm(0)});
block.Append(Opcode::Csel,
block.Append(Opcode::Csneg,
{Operand::VReg(dst, VRegClass::Int),
Operand::Reg(PhysReg::WZR),
Operand::VReg(after_sign, VRegClass::Int),
Operand::Imm(static_cast<int>(CondCode::EQ))});
Operand::VReg(pos, VRegClass::Int),
Operand::VReg(neg_masked, VRegClass::Int),
Operand::Imm(static_cast<int>(CondCode::MI))});
value_vregs[value] = dst;
return dst;
}
if (val < 0 && (-val & (-val - 1)) == 0 && val != -1)
{
// x % -2^n → 同 x % 2^n: (x & (2^n-1)) + sign fixup
int abs_val = -val;
int mask = abs_val - 1;
int masked = function.CreateVReg(VRegClass::Int);
if (mask <= 4095)
{
// x % -2^n -> 同 x % 2^nnegs+and+and+csneg
int mask = (-val) - 1;
int neg = function.CreateVReg(VRegClass::Int);
int pos = function.CreateVReg(VRegClass::Int);
int neg_masked = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::NegRR,
{Operand::VReg(neg, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int)});
if (static_cast<unsigned int>(mask) <= 4095) {
block.Append(Opcode::AndRR,
{Operand::VReg(masked, VRegClass::Int),
{Operand::VReg(pos, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int),
Operand::Imm(mask)});
}
else
{
block.Append(Opcode::AndRR,
{Operand::VReg(neg_masked, VRegClass::Int),
Operand::VReg(neg, VRegClass::Int),
Operand::Imm(mask)});
} else {
int mask_reg = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::MovImm,
{Operand::VReg(mask_reg, VRegClass::Int),
Operand::Imm(mask)}).SetRematerializable(true).SetRematImm(mask);
block.Append(Opcode::AndRR,
{Operand::VReg(masked, VRegClass::Int),
{Operand::VReg(pos, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int),
Operand::VReg(mask_reg, VRegClass::Int)});
block.Append(Opcode::AndRR,
{Operand::VReg(neg_masked, VRegClass::Int),
Operand::VReg(neg, VRegClass::Int),
Operand::VReg(mask_reg, VRegClass::Int)});
}
block.Append(Opcode::CmpImm,
{Operand::VReg(lhs, VRegClass::Int),
Operand::Imm(0)});
int neg_fixup = function.CreateVReg(VRegClass::Int);
if (static_cast<unsigned int>(abs_val) <= 4095)
{
block.Append(Opcode::SubRR,
{Operand::VReg(neg_fixup, VRegClass::Int),
Operand::VReg(masked, VRegClass::Int),
Operand::Imm(abs_val)});
}
else
{
int val_reg = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::MovImm,
{Operand::VReg(val_reg, VRegClass::Int),
Operand::Imm(abs_val)}).SetRematerializable(true).SetRematImm(abs_val);
block.Append(Opcode::SubRR,
{Operand::VReg(neg_fixup, VRegClass::Int),
Operand::VReg(masked, VRegClass::Int),
Operand::VReg(val_reg, VRegClass::Int)});
}
int after_sign2 = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::Csel,
{Operand::VReg(after_sign2, VRegClass::Int),
Operand::VReg(neg_fixup, VRegClass::Int),
Operand::VReg(masked, VRegClass::Int),
Operand::Imm(static_cast<int>(CondCode::LT))});
// 修正:若 masked==0 则结果必须为 0
block.Append(Opcode::CmpImm,
{Operand::VReg(masked, VRegClass::Int),
Operand::Imm(0)});
block.Append(Opcode::Csel,
block.Append(Opcode::Csneg,
{Operand::VReg(dst, VRegClass::Int),
Operand::Reg(PhysReg::WZR),
Operand::VReg(after_sign2, VRegClass::Int),
Operand::Imm(static_cast<int>(CondCode::EQ))});
Operand::VReg(pos, VRegClass::Int),
Operand::VReg(neg_masked, VRegClass::Int),
Operand::Imm(static_cast<int>(CondCode::MI))});
value_vregs[value] = dst;
return dst;
}

Loading…
Cancel
Save