feat(backend):complete AArch64 arg passing (>8 args + mixed int/float) and add test timeout guards

Oliveira 1 month ago
parent 4764bd2e27
commit 3573e709d7

4
.gitignore vendored

@ -69,4 +69,6 @@ Thumbs.db
# Project outputs
# =========================
test/test_result/
sema_check
sema_check
.codex

@ -12,7 +12,7 @@
## 2. 当前实现状态
**目前处于初步完成阶段**。虽然初步测试能够通过全部 21 个官方功能与性能测试用例,但部分用例仍存在缺陷,后端生成效率和代码质量仍有较大提升空间。
**目前处于可用但仍待优化阶段**。功能测试可稳定通过,性能测试中个别样例仍存在运行时间过长或行为不稳定的问题,后端生成效率和代码质量仍有较大提升空间。
## 3. 核心逻辑与关键实现点
@ -32,6 +32,13 @@
- **多函数栈帧管理**
- 实现了每个函数独立的 `Prologue`(序言)和 `Epilogue`(尾声)。
- 严格遵循 16 字节栈对齐规范,正确保存和恢复 FPX29与 LRX30
- **调用约定补全(本次更新)**
- 补齐了“超过 8 个参数”的栈传参与取参逻辑。
- 修复了混合参数(`int/ptr` 与 `float`)场景下寄存器编号错误的问题,按 AArch64 规则分别为 GPR/FPR 计数分配。
- 调用点新增栈参数区的 16 字节对齐分配与回收。
- **测试链路健壮性(本次更新)**
- `verify_asm.sh` 新增 QEMU 执行超时控制(默认 90 秒,可通过 `SY_QEMU_TIMEOUT` 覆盖)。
- `test_lab3_final.sh` 默认设置 `SY_QEMU_TIMEOUT=180`,避免性能样例导致整轮测试卡死。
## 4. 遗留问题与不足
@ -43,7 +50,7 @@
- **性能测试耗时过长:目前的 10 个性能测试用例运行速度非常慢看对lab3是否有影响**。
- **冗余指令严重**:由于采用了全栈槽模型(所有变量均存储在内存中),导致生成的汇编中充斥着大量的 `ldr/str` 指令。
- **寄存器分配缺失**目前完全没有实现真正的寄存器分配逻辑Lab5 任务),寄存器利用率极低。
- **调用约定限制**:当前仅支持前 8 个参数通过寄存器传递,尚未实现参数超过 8 个时的栈传参逻辑,不满足复杂函数调用的全量要求
- **调用约定仍不完整**:虽然已支持 `>8` 参数与混合 `int/float` 参数寄存器分配,但尚未覆盖更完整 ABI 细节(如更复杂聚合类型参数传递)
- **缺乏指令优化**:生成的指令序列较为死板,未进行窥孔优化或指令合并(如 `add` 移位操作的充分利用)。
## 5. 编译与运行指南
@ -68,4 +75,3 @@ cmake --build build -j "$(nproc)"
# 格式:./scripts/verify_asm.sh <.sy> <结果目录> --run
./scripts/verify_asm.sh test/test_case/functional/simple_add.sy test/test_result/manual --run
```

@ -10,6 +10,7 @@ PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
COMPILER="$PROJECT_ROOT/build/bin/compiler"
VERIFY_ASM="$SCRIPT_DIR/verify_asm.sh"
RESULT_DIR="$PROJECT_ROOT/test/test_result/lab3_final"
export SY_QEMU_TIMEOUT="${SY_QEMU_TIMEOUT:-180}"
# 颜色输出
RED='\033[0;31m'

@ -75,17 +75,30 @@ if [[ "$run_exec" == true ]]; then
stdout_file="$out_dir/$stem.stdout"
actual_file="$out_dir/$stem.actual.out"
run_timeout="${SY_QEMU_TIMEOUT:-90}"
echo "运行 $exe ..."
set +e
ulimit -s unlimited 2>/dev/null || true
export QEMU_STACK_SIZE=67108864
if [[ -f "$stdin_file" ]]; then
qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" < "$stdin_file" > "$stdout_file"
if command -v timeout >/dev/null 2>&1; then
if [[ -f "$stdin_file" ]]; then
timeout "${run_timeout}s" qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" < "$stdin_file" > "$stdout_file"
else
timeout "${run_timeout}s" qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" > "$stdout_file"
fi
else
qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" > "$stdout_file"
if [[ -f "$stdin_file" ]]; then
qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" < "$stdin_file" > "$stdout_file"
else
qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" > "$stdout_file"
fi
fi
status=$?
set -e
if [[ $status -eq 124 ]]; then
echo "运行超时: ${run_timeout}s" >&2
exit 124
fi
cat "$stdout_file"
echo "退出码: $status"
{

@ -12,6 +12,16 @@ namespace {
using ValueSlotMap = std::unordered_map<const ir::Value*, int>;
int AlignTo(int value, int align) {
return ((value + align - 1) / align) * align;
}
bool IsPointerLike(const ir::Type& ty) {
return ty.IsPointer() || ty.IsPtrInt32() || ty.IsPtrFloat();
}
bool IsFloatLike(const ir::Type& ty) { return ty.IsFloat(); }
PhysReg ToXReg(PhysReg reg) {
if ((int)reg >= (int)PhysReg::W0 && (int)reg <= (int)PhysReg::W15) {
return static_cast<PhysReg>((int)reg - (int)PhysReg::W0 + (int)PhysReg::X0);
@ -26,10 +36,50 @@ PhysReg ToSReg(PhysReg reg) {
return reg;
}
struct ArgLoc {
bool in_reg = false;
PhysReg reg = PhysReg::W0;
int stack_offset = 0; // bytes from stack-args base
};
ArgLoc GetFunctionArgLoc(const ir::Function& func, size_t arg_no) {
int gpr_idx = 0;
int fpr_idx = 0;
int stack_slots = 0;
const auto& args = func.GetArgs();
for (size_t i = 0; i < args.size(); ++i) {
const auto& ty = *args[i]->GetType();
const bool is_float = IsFloatLike(ty);
const bool is_ptr = IsPointerLike(ty);
ArgLoc loc;
if (is_float && fpr_idx < 8) {
loc.in_reg = true;
loc.reg = static_cast<PhysReg>((int)PhysReg::S0 + fpr_idx);
++fpr_idx;
} else if (!is_float && gpr_idx < 8) {
loc.in_reg = true;
loc.reg = is_ptr ? static_cast<PhysReg>((int)PhysReg::X0 + gpr_idx)
: static_cast<PhysReg>((int)PhysReg::W0 + gpr_idx);
++gpr_idx;
} else {
loc.in_reg = false;
loc.stack_offset = stack_slots * 8;
++stack_slots;
}
if (i == arg_no) return loc;
}
throw std::runtime_error(
FormatError("mir", "函数参数索引越界: " + std::to_string(arg_no)));
}
void EmitValueToReg(const ir::Value* value, PhysReg target,
const ValueSlotMap& slots, MachineBasicBlock& block) {
bool is_ptr = value->GetType()->IsPointer() || value->GetType()->IsPtrInt32() || value->GetType()->IsPtrFloat();
bool is_float = value->GetType()->IsFloat();
bool is_ptr = IsPointerLike(*value->GetType());
bool is_float = IsFloatLike(*value->GetType());
if (is_ptr) {
target = ToXReg(target);
@ -61,18 +111,29 @@ void EmitValueToReg(const ir::Value* value, PhysReg target,
}
if (auto* arg = dynamic_cast<const ir::Argument*>(value)) {
if (arg->GetArgNo() < 8) {
PhysReg src;
if (is_ptr) {
src = static_cast<PhysReg>((int)PhysReg::X0 + arg->GetArgNo());
} else if (is_float) {
src = static_cast<PhysReg>((int)PhysReg::S0 + arg->GetArgNo());
const auto* parent = arg->GetParent();
if (!parent) {
throw std::runtime_error(FormatError("mir", "参数未绑定到函数"));
}
const ArgLoc loc = GetFunctionArgLoc(*parent, arg->GetArgNo());
if (loc.in_reg) {
block.Append(Opcode::MovRR, {Operand::Reg(target), Operand::Reg(loc.reg)});
} else {
// Incoming stack args are at [old_sp + offset]. After prologue:
// x29 = old_sp - 16, so address is [x29 + 16 + offset].
const int fp_offset = 16 + loc.stack_offset;
if (fp_offset <= 4095) {
block.Append(Opcode::AddRRI, {Operand::Reg(PhysReg::X10),
Operand::Reg(PhysReg::X29),
Operand::Imm(fp_offset)});
} else {
src = static_cast<PhysReg>((int)PhysReg::W0 + arg->GetArgNo());
block.Append(Opcode::MovImm, {Operand::Reg(PhysReg::X11),
Operand::Imm(fp_offset)});
block.Append(Opcode::AddRR, {Operand::Reg(PhysReg::X10),
Operand::Reg(PhysReg::X29),
Operand::Reg(PhysReg::X11)});
}
block.Append(Opcode::MovRR, {Operand::Reg(target), Operand::Reg(src)});
} else {
throw std::runtime_error(FormatError("mir", "暂不支持超过 8 个参数"));
block.Append(Opcode::LoadR, {Operand::Reg(target), Operand::Reg(PhysReg::X10)});
}
return;
}
@ -145,9 +206,9 @@ void LowerInstruction(const ir::Instruction& inst, MachineFunction& function,
auto& store = static_cast<const ir::StoreInst&>(inst);
PhysReg val_reg = PhysReg::W8;
EmitValueToReg(store.GetValue(), val_reg, slots, block);
if (store.GetValue()->GetType()->IsPointer() || store.GetValue()->GetType()->IsPtrInt32() || store.GetValue()->GetType()->IsPtrFloat()) {
if (IsPointerLike(*store.GetValue()->GetType())) {
val_reg = ToXReg(val_reg);
} else if (store.GetValue()->GetType()->IsFloat()) {
} else if (IsFloatLike(*store.GetValue()->GetType())) {
val_reg = ToSReg(val_reg);
}
@ -169,9 +230,9 @@ void LowerInstruction(const ir::Instruction& inst, MachineFunction& function,
auto& load = static_cast<const ir::LoadInst&>(inst);
int dst_slot = function.CreateFrameIndex(static_cast<int>(GetTypeSize(*load.GetType())));
PhysReg dst_reg = PhysReg::W8;
if (load.GetType()->IsPointer() || load.GetType()->IsPtrInt32() || load.GetType()->IsPtrFloat()) {
if (IsPointerLike(*load.GetType())) {
dst_reg = ToXReg(dst_reg);
} else if (load.GetType()->IsFloat()) {
} else if (IsFloatLike(*load.GetType())) {
dst_reg = ToSReg(dst_reg);
}
@ -253,25 +314,90 @@ void LowerInstruction(const ir::Instruction& inst, MachineFunction& function,
case ir::Opcode::Call: {
auto& call = static_cast<const ir::CallInst&>(inst);
const auto& args = call.GetArgs();
std::vector<ArgLoc> arg_locs(args.size());
int gpr_idx = 0;
int fpr_idx = 0;
int stack_slots = 0;
for (size_t i = 0; i < args.size(); ++i) {
if (i < 8) {
// Determine if arg is a pointer
bool is_ptr = args[i]->GetType()->IsPointer() || args[i]->GetType()->IsPtrInt32() || args[i]->GetType()->IsPtrFloat();
PhysReg target = is_ptr ? static_cast<PhysReg>((int)PhysReg::X0 + i)
: static_cast<PhysReg>((int)PhysReg::W0 + i);
EmitValueToReg(args[i], target, slots, block);
const auto& ty = *args[i]->GetType();
const bool is_float = IsFloatLike(ty);
const bool is_ptr = IsPointerLike(ty);
if (is_float && fpr_idx < 8) {
arg_locs[i] = ArgLoc{true, static_cast<PhysReg>((int)PhysReg::S0 + fpr_idx), 0};
++fpr_idx;
} else if (!is_float && gpr_idx < 8) {
arg_locs[i] = ArgLoc{
true,
is_ptr ? static_cast<PhysReg>((int)PhysReg::X0 + gpr_idx)
: static_cast<PhysReg>((int)PhysReg::W0 + gpr_idx),
0};
++gpr_idx;
} else {
throw std::runtime_error("Only up to 8 arguments supported for now");
arg_locs[i] = ArgLoc{false, PhysReg::W0, stack_slots * 8};
++stack_slots;
}
}
int stack_arg_size = 0;
if (stack_slots > 0) {
stack_arg_size = AlignTo(stack_slots * 8, 16);
block.Append(Opcode::MovImm,
{Operand::Reg(PhysReg::X11), Operand::Imm(stack_arg_size)});
block.Append(Opcode::SubRR, {Operand::Reg(PhysReg::SP),
Operand::Reg(PhysReg::SP),
Operand::Reg(PhysReg::X11)});
}
for (size_t i = 0; i < args.size(); ++i) {
const ArgLoc& loc = arg_locs[i];
if (loc.in_reg) {
EmitValueToReg(args[i], loc.reg, slots, block);
continue;
}
PhysReg val_reg = PhysReg::W8;
if (IsPointerLike(*args[i]->GetType())) {
val_reg = ToXReg(val_reg);
} else if (IsFloatLike(*args[i]->GetType())) {
val_reg = ToSReg(val_reg);
}
EmitValueToReg(args[i], val_reg, slots, block);
if (loc.stack_offset == 0) {
block.Append(Opcode::MovRR,
{Operand::Reg(PhysReg::X10), Operand::Reg(PhysReg::SP)});
} else if (loc.stack_offset <= 4095) {
block.Append(Opcode::AddRRI, {Operand::Reg(PhysReg::X10),
Operand::Reg(PhysReg::SP),
Operand::Imm(loc.stack_offset)});
} else {
block.Append(Opcode::MovImm,
{Operand::Reg(PhysReg::X11), Operand::Imm(loc.stack_offset)});
block.Append(Opcode::AddRR, {Operand::Reg(PhysReg::X10),
Operand::Reg(PhysReg::SP),
Operand::Reg(PhysReg::X11)});
}
block.Append(Opcode::StoreR,
{Operand::Reg(val_reg), Operand::Reg(PhysReg::X10)});
}
block.Append(Opcode::Call, {Operand::Label(call.GetFunc()->GetName())});
if (stack_arg_size > 0) {
block.Append(Opcode::MovImm,
{Operand::Reg(PhysReg::X11), Operand::Imm(stack_arg_size)});
block.Append(Opcode::AddRR, {Operand::Reg(PhysReg::SP),
Operand::Reg(PhysReg::SP),
Operand::Reg(PhysReg::X11)});
}
if (!call.GetType()->IsVoid()) {
int dst_slot = function.CreateFrameIndex(static_cast<int>(GetTypeSize(*call.GetType())));
PhysReg ret_reg = PhysReg::W0;
if (call.GetType()->IsFloat()) {
if (IsFloatLike(*call.GetType())) {
ret_reg = ToSReg(ret_reg);
} else if (call.GetType()->IsPointer() || call.GetType()->IsPtrInt32() || call.GetType()->IsPtrFloat()) {
} else if (IsPointerLike(*call.GetType())) {
ret_reg = ToXReg(ret_reg);
}
block.Append(Opcode::StoreStack, {Operand::Reg(ret_reg), Operand::FrameIndex(dst_slot)});

Loading…
Cancel
Save