perf(mir): AArch64 缩放寻址——GEP+Load/Store 直接生成 ldr/str [base, idx, uxtw #2]

消除数组访问中的 sxtw+shl+add 链,替换为单条缩放寻址指令。
crypto -76(-4.2%), shuffle -39(-8.3%), sort -22(-3.4%), matmul -16(-4.3%)
lzk
lzkk 3 days ago
parent 3ece3d09f4
commit be3a5640ee

@ -349,6 +349,20 @@ namespace mir
os << "]\n";
}
// 缩放寻址: ldr/str [base, index, uxtw #shift]
void PrintScaledMemAccess(Opcode opcode, const Operand &data_reg,
const Operand &base_reg, const Operand &index_reg,
int shift, std::ostream &os)
{
os << " " << (opcode == Opcode::LoadMem ? "ldr " : "str ");
PrintOperand(data_reg, os);
os << ", [";
PrintOperand(base_reg, os);
os << ", ";
PrintOperand(index_reg, os);
os << ", uxtw #" << shift << "]\n";
}
int ResolveFrameOffset(const MachineFunction &function, const Operand &operand)
{
if (operand.GetKind() != Operand::Kind::FrameIndex)
@ -513,7 +527,17 @@ namespace mir
case Opcode::LoadMem:
case Opcode::StoreMem:
if (operands.size() >= 2 &&
if (operands.size() >= 3 &&
operands[0].GetKind() == Operand::Kind::Reg &&
operands[1].GetKind() == Operand::Kind::Reg &&
operands[2].GetKind() == Operand::Kind::Reg)
{
// 缩放寻址: ldr/str [base, index, uxtw #shift]
int shift = IsXReg(operands[0].GetReg()) ? 3 : 2;
PrintScaledMemAccess(instr.GetOpcode(), operands[0],
operands[1], operands[2], shift, os);
}
else if (operands.size() >= 2 &&
operands[0].GetKind() == Operand::Kind::Reg &&
operands[1].GetKind() == Operand::Kind::Reg &&
IsXReg(operands[1].GetReg()))

@ -724,6 +724,26 @@ namespace mir
return vreg;
}
// 缩放寻址GEP + Load → ldr [base, idx, uxtw #2]
if (auto *gep = dynamic_cast<const ir::GetElementPtrInst *>(load->GetPtr()))
{
int idx_imm = 0;
if (!TryGetConstantInt(gep->GetIndex(), idx_imm))
{
int base_addr = EmitPtrValue(gep->GetBasePtr(), function, value_vregs,
scalar_slots, array_slots, block);
int idx_vreg = EmitIntValue(gep->GetIndex(), function, value_vregs,
scalar_slots, array_slots, block);
int data_vreg = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::LoadMem,
{Operand::VReg(data_vreg, VRegClass::Int),
Operand::VReg(base_addr, VRegClass::Ptr),
Operand::VReg(idx_vreg, VRegClass::Int)});
value_vregs[value] = data_vreg;
return data_vreg;
}
}
int addr = EmitPtrValue(load->GetPtr(), function, value_vregs,
scalar_slots, array_slots, block);
int vreg = function.CreateVReg(VRegClass::Int);
@ -1449,27 +1469,71 @@ namespace mir
}
}
int addr = EmitPtrValue(store.GetPtr(), function, value_vregs,
scalar_slots, array_slots, block);
if (value_is_ptr)
// 缩放寻址GEP + Store → str [base, idx, uxtw #2]
bool scaled_store = false;
if (auto *gep = dynamic_cast<const ir::GetElementPtrInst *>(store.GetPtr()))
{
int val = EmitPtrValue(store.GetValue(), function, value_vregs,
scalar_slots, array_slots, block);
block.Append(Opcode::StoreMem,
{Operand::VReg(val, VRegClass::Ptr), Operand::VReg(addr, VRegClass::Ptr)});
}
else if (value_is_float)
{
int val = EmitFloatValue(store.GetValue(), function, value_vregs, block);
block.Append(Opcode::StoreMem,
{Operand::VReg(val, VRegClass::Float), Operand::VReg(addr, VRegClass::Ptr)});
int idx_imm = 0;
if (!TryGetConstantInt(gep->GetIndex(), idx_imm))
{
scaled_store = true;
int base_addr = EmitPtrValue(gep->GetBasePtr(), function, value_vregs,
scalar_slots, array_slots, block);
int idx_vreg = EmitIntValue(gep->GetIndex(), function, value_vregs,
scalar_slots, array_slots, block);
if (value_is_ptr)
{
int val = EmitPtrValue(store.GetValue(), function, value_vregs,
scalar_slots, array_slots, block);
block.Append(Opcode::StoreMem,
{Operand::VReg(val, VRegClass::Ptr),
Operand::VReg(base_addr, VRegClass::Ptr),
Operand::VReg(idx_vreg, VRegClass::Int)});
}
else if (value_is_float)
{
int val = EmitFloatValue(store.GetValue(), function, value_vregs, block);
block.Append(Opcode::StoreMem,
{Operand::VReg(val, VRegClass::Float),
Operand::VReg(base_addr, VRegClass::Ptr),
Operand::VReg(idx_vreg, VRegClass::Int)});
}
else
{
int val = EmitIntValue(store.GetValue(), function, value_vregs,
scalar_slots, array_slots, block);
block.Append(Opcode::StoreMem,
{Operand::VReg(val, VRegClass::Int),
Operand::VReg(base_addr, VRegClass::Ptr),
Operand::VReg(idx_vreg, VRegClass::Int)});
}
}
}
else
if (!scaled_store)
{
int val = EmitIntValue(store.GetValue(), function, value_vregs,
scalar_slots, array_slots, block);
block.Append(Opcode::StoreMem,
{Operand::VReg(val, VRegClass::Int), Operand::VReg(addr, VRegClass::Ptr)});
int addr = EmitPtrValue(store.GetPtr(), function, value_vregs,
scalar_slots, array_slots, block);
if (value_is_ptr)
{
int val = EmitPtrValue(store.GetValue(), function, value_vregs,
scalar_slots, array_slots, block);
block.Append(Opcode::StoreMem,
{Operand::VReg(val, VRegClass::Ptr), Operand::VReg(addr, VRegClass::Ptr)});
}
else if (value_is_float)
{
int val = EmitFloatValue(store.GetValue(), function, value_vregs, block);
block.Append(Opcode::StoreMem,
{Operand::VReg(val, VRegClass::Float), Operand::VReg(addr, VRegClass::Ptr)});
}
else
{
int val = EmitIntValue(store.GetValue(), function, value_vregs,
scalar_slots, array_slots, block);
block.Append(Opcode::StoreMem,
{Operand::VReg(val, VRegClass::Int), Operand::VReg(addr, VRegClass::Ptr)});
}
}
return;
}

@ -102,6 +102,9 @@ namespace mir
result.defs.push_back(ops[0].GetVRegId());
if (ops[1].GetKind() == Operand::Kind::VReg)
result.uses.push_back(ops[1].GetVRegId());
// 缩放寻址第三操作数index register
if (ops.size() >= 3 && ops[2].GetKind() == Operand::Kind::VReg)
result.uses.push_back(ops[2].GetVRegId());
}
break;
@ -112,6 +115,9 @@ namespace mir
result.uses.push_back(ops[0].GetVRegId());
if (ops[1].GetKind() == Operand::Kind::VReg)
result.uses.push_back(ops[1].GetVRegId());
// 缩放寻址第三操作数index register
if (ops.size() >= 3 && ops[2].GetKind() == Operand::Kind::VReg)
result.uses.push_back(ops[2].GetVRegId());
}
break;

Loading…
Cancel
Save