diff --git a/.gitignore b/.gitignore index d321707..3f53e35 100644 --- a/.gitignore +++ b/.gitignore @@ -69,4 +69,6 @@ Thumbs.db # Project outputs # ========================= test/test_result/ -sema_check \ No newline at end of file +sema_check + +.codex \ No newline at end of file diff --git a/doc/lab3-进度.md b/doc/lab3-进度.md index 7bb93ec..288b576 100644 --- a/doc/lab3-进度.md +++ b/doc/lab3-进度.md @@ -12,7 +12,7 @@ ## 2. 当前实现状态 -**目前处于初步完成阶段**。虽然初步测试能够通过全部 21 个官方功能与性能测试用例,但部分用例仍存在缺陷,后端生成效率和代码质量仍有较大提升空间。 +**目前处于可用但仍待优化阶段**。功能测试可稳定通过,性能测试中个别样例仍存在运行时间过长或行为不稳定的问题,后端生成效率和代码质量仍有较大提升空间。 ## 3. 核心逻辑与关键实现点 @@ -32,6 +32,13 @@ - **多函数栈帧管理**: - 实现了每个函数独立的 `Prologue`(序言)和 `Epilogue`(尾声)。 - 严格遵循 16 字节栈对齐规范,正确保存和恢复 FP(X29)与 LR(X30)。 +- **调用约定补全(本次更新)**: + - 补齐了“超过 8 个参数”的栈传参与取参逻辑。 + - 修复了混合参数(`int/ptr` 与 `float`)场景下寄存器编号错误的问题,按 AArch64 规则分别为 GPR/FPR 计数分配。 + - 调用点新增栈参数区的 16 字节对齐分配与回收。 +- **测试链路健壮性(本次更新)**: + - `verify_asm.sh` 新增 QEMU 执行超时控制(默认 90 秒,可通过 `SY_QEMU_TIMEOUT` 覆盖)。 + - `test_lab3_final.sh` 默认设置 `SY_QEMU_TIMEOUT=180`,避免性能样例导致整轮测试卡死。 ## 4. 遗留问题与不足 @@ -43,7 +50,7 @@ - **性能测试耗时过长:目前的 10 个性能测试用例运行速度非常慢,看对lab3是否有影响**。 - **冗余指令严重**:由于采用了全栈槽模型(所有变量均存储在内存中),导致生成的汇编中充斥着大量的 `ldr/str` 指令。 - **寄存器分配缺失**:目前完全没有实现真正的寄存器分配逻辑(Lab5 任务),寄存器利用率极低。 -- **调用约定限制**:当前仅支持前 8 个参数通过寄存器传递,尚未实现参数超过 8 个时的栈传参逻辑,不满足复杂函数调用的全量要求。 +- **调用约定仍不完整**:虽然已支持 `>8` 参数与混合 `int/float` 参数寄存器分配,但尚未覆盖更完整 ABI 细节(如更复杂聚合类型参数传递)。 - **缺乏指令优化**:生成的指令序列较为死板,未进行窥孔优化或指令合并(如 `add` 移位操作的充分利用)。 ## 5. 编译与运行指南 @@ -68,4 +75,3 @@ cmake --build build -j "$(nproc)" # 格式:./scripts/verify_asm.sh <.sy文件> <结果目录> --run ./scripts/verify_asm.sh test/test_case/functional/simple_add.sy test/test_result/manual --run ``` - diff --git a/scripts/test_lab3_final.sh b/scripts/test_lab3_final.sh index 9da5836..f739597 100755 --- a/scripts/test_lab3_final.sh +++ b/scripts/test_lab3_final.sh @@ -10,6 +10,7 @@ PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" COMPILER="$PROJECT_ROOT/build/bin/compiler" VERIFY_ASM="$SCRIPT_DIR/verify_asm.sh" RESULT_DIR="$PROJECT_ROOT/test/test_result/lab3_final" +export SY_QEMU_TIMEOUT="${SY_QEMU_TIMEOUT:-180}" # 颜色输出 RED='\033[0;31m' diff --git a/scripts/verify_asm.sh b/scripts/verify_asm.sh index fb7dcb4..1c45d13 100755 --- a/scripts/verify_asm.sh +++ b/scripts/verify_asm.sh @@ -75,17 +75,30 @@ if [[ "$run_exec" == true ]]; then stdout_file="$out_dir/$stem.stdout" actual_file="$out_dir/$stem.actual.out" + run_timeout="${SY_QEMU_TIMEOUT:-90}" echo "运行 $exe ..." set +e ulimit -s unlimited 2>/dev/null || true export QEMU_STACK_SIZE=67108864 - if [[ -f "$stdin_file" ]]; then - qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" < "$stdin_file" > "$stdout_file" + if command -v timeout >/dev/null 2>&1; then + if [[ -f "$stdin_file" ]]; then + timeout "${run_timeout}s" qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" < "$stdin_file" > "$stdout_file" + else + timeout "${run_timeout}s" qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" > "$stdout_file" + fi else - qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" > "$stdout_file" + if [[ -f "$stdin_file" ]]; then + qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" < "$stdin_file" > "$stdout_file" + else + qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" > "$stdout_file" + fi fi status=$? set -e + if [[ $status -eq 124 ]]; then + echo "运行超时: ${run_timeout}s" >&2 + exit 124 + fi cat "$stdout_file" echo "退出码: $status" { diff --git a/src/mir/Lowering.cpp b/src/mir/Lowering.cpp index 9382220..233378b 100644 --- a/src/mir/Lowering.cpp +++ b/src/mir/Lowering.cpp @@ -12,6 +12,16 @@ namespace { using ValueSlotMap = std::unordered_map; +int AlignTo(int value, int align) { + return ((value + align - 1) / align) * align; +} + +bool IsPointerLike(const ir::Type& ty) { + return ty.IsPointer() || ty.IsPtrInt32() || ty.IsPtrFloat(); +} + +bool IsFloatLike(const ir::Type& ty) { return ty.IsFloat(); } + PhysReg ToXReg(PhysReg reg) { if ((int)reg >= (int)PhysReg::W0 && (int)reg <= (int)PhysReg::W15) { return static_cast((int)reg - (int)PhysReg::W0 + (int)PhysReg::X0); @@ -26,10 +36,50 @@ PhysReg ToSReg(PhysReg reg) { return reg; } +struct ArgLoc { + bool in_reg = false; + PhysReg reg = PhysReg::W0; + int stack_offset = 0; // bytes from stack-args base +}; + +ArgLoc GetFunctionArgLoc(const ir::Function& func, size_t arg_no) { + int gpr_idx = 0; + int fpr_idx = 0; + int stack_slots = 0; + + const auto& args = func.GetArgs(); + for (size_t i = 0; i < args.size(); ++i) { + const auto& ty = *args[i]->GetType(); + const bool is_float = IsFloatLike(ty); + const bool is_ptr = IsPointerLike(ty); + + ArgLoc loc; + if (is_float && fpr_idx < 8) { + loc.in_reg = true; + loc.reg = static_cast((int)PhysReg::S0 + fpr_idx); + ++fpr_idx; + } else if (!is_float && gpr_idx < 8) { + loc.in_reg = true; + loc.reg = is_ptr ? static_cast((int)PhysReg::X0 + gpr_idx) + : static_cast((int)PhysReg::W0 + gpr_idx); + ++gpr_idx; + } else { + loc.in_reg = false; + loc.stack_offset = stack_slots * 8; + ++stack_slots; + } + + if (i == arg_no) return loc; + } + + throw std::runtime_error( + FormatError("mir", "函数参数索引越界: " + std::to_string(arg_no))); +} + void EmitValueToReg(const ir::Value* value, PhysReg target, const ValueSlotMap& slots, MachineBasicBlock& block) { - bool is_ptr = value->GetType()->IsPointer() || value->GetType()->IsPtrInt32() || value->GetType()->IsPtrFloat(); - bool is_float = value->GetType()->IsFloat(); + bool is_ptr = IsPointerLike(*value->GetType()); + bool is_float = IsFloatLike(*value->GetType()); if (is_ptr) { target = ToXReg(target); @@ -61,18 +111,29 @@ void EmitValueToReg(const ir::Value* value, PhysReg target, } if (auto* arg = dynamic_cast(value)) { - if (arg->GetArgNo() < 8) { - PhysReg src; - if (is_ptr) { - src = static_cast((int)PhysReg::X0 + arg->GetArgNo()); - } else if (is_float) { - src = static_cast((int)PhysReg::S0 + arg->GetArgNo()); + const auto* parent = arg->GetParent(); + if (!parent) { + throw std::runtime_error(FormatError("mir", "参数未绑定到函数")); + } + const ArgLoc loc = GetFunctionArgLoc(*parent, arg->GetArgNo()); + if (loc.in_reg) { + block.Append(Opcode::MovRR, {Operand::Reg(target), Operand::Reg(loc.reg)}); + } else { + // Incoming stack args are at [old_sp + offset]. After prologue: + // x29 = old_sp - 16, so address is [x29 + 16 + offset]. + const int fp_offset = 16 + loc.stack_offset; + if (fp_offset <= 4095) { + block.Append(Opcode::AddRRI, {Operand::Reg(PhysReg::X10), + Operand::Reg(PhysReg::X29), + Operand::Imm(fp_offset)}); } else { - src = static_cast((int)PhysReg::W0 + arg->GetArgNo()); + block.Append(Opcode::MovImm, {Operand::Reg(PhysReg::X11), + Operand::Imm(fp_offset)}); + block.Append(Opcode::AddRR, {Operand::Reg(PhysReg::X10), + Operand::Reg(PhysReg::X29), + Operand::Reg(PhysReg::X11)}); } - block.Append(Opcode::MovRR, {Operand::Reg(target), Operand::Reg(src)}); - } else { - throw std::runtime_error(FormatError("mir", "暂不支持超过 8 个参数")); + block.Append(Opcode::LoadR, {Operand::Reg(target), Operand::Reg(PhysReg::X10)}); } return; } @@ -145,9 +206,9 @@ void LowerInstruction(const ir::Instruction& inst, MachineFunction& function, auto& store = static_cast(inst); PhysReg val_reg = PhysReg::W8; EmitValueToReg(store.GetValue(), val_reg, slots, block); - if (store.GetValue()->GetType()->IsPointer() || store.GetValue()->GetType()->IsPtrInt32() || store.GetValue()->GetType()->IsPtrFloat()) { + if (IsPointerLike(*store.GetValue()->GetType())) { val_reg = ToXReg(val_reg); - } else if (store.GetValue()->GetType()->IsFloat()) { + } else if (IsFloatLike(*store.GetValue()->GetType())) { val_reg = ToSReg(val_reg); } @@ -169,9 +230,9 @@ void LowerInstruction(const ir::Instruction& inst, MachineFunction& function, auto& load = static_cast(inst); int dst_slot = function.CreateFrameIndex(static_cast(GetTypeSize(*load.GetType()))); PhysReg dst_reg = PhysReg::W8; - if (load.GetType()->IsPointer() || load.GetType()->IsPtrInt32() || load.GetType()->IsPtrFloat()) { + if (IsPointerLike(*load.GetType())) { dst_reg = ToXReg(dst_reg); - } else if (load.GetType()->IsFloat()) { + } else if (IsFloatLike(*load.GetType())) { dst_reg = ToSReg(dst_reg); } @@ -253,25 +314,90 @@ void LowerInstruction(const ir::Instruction& inst, MachineFunction& function, case ir::Opcode::Call: { auto& call = static_cast(inst); const auto& args = call.GetArgs(); + + std::vector arg_locs(args.size()); + int gpr_idx = 0; + int fpr_idx = 0; + int stack_slots = 0; for (size_t i = 0; i < args.size(); ++i) { - if (i < 8) { - // Determine if arg is a pointer - bool is_ptr = args[i]->GetType()->IsPointer() || args[i]->GetType()->IsPtrInt32() || args[i]->GetType()->IsPtrFloat(); - PhysReg target = is_ptr ? static_cast((int)PhysReg::X0 + i) - : static_cast((int)PhysReg::W0 + i); - EmitValueToReg(args[i], target, slots, block); + const auto& ty = *args[i]->GetType(); + const bool is_float = IsFloatLike(ty); + const bool is_ptr = IsPointerLike(ty); + if (is_float && fpr_idx < 8) { + arg_locs[i] = ArgLoc{true, static_cast((int)PhysReg::S0 + fpr_idx), 0}; + ++fpr_idx; + } else if (!is_float && gpr_idx < 8) { + arg_locs[i] = ArgLoc{ + true, + is_ptr ? static_cast((int)PhysReg::X0 + gpr_idx) + : static_cast((int)PhysReg::W0 + gpr_idx), + 0}; + ++gpr_idx; } else { - throw std::runtime_error("Only up to 8 arguments supported for now"); + arg_locs[i] = ArgLoc{false, PhysReg::W0, stack_slots * 8}; + ++stack_slots; } } + + int stack_arg_size = 0; + if (stack_slots > 0) { + stack_arg_size = AlignTo(stack_slots * 8, 16); + block.Append(Opcode::MovImm, + {Operand::Reg(PhysReg::X11), Operand::Imm(stack_arg_size)}); + block.Append(Opcode::SubRR, {Operand::Reg(PhysReg::SP), + Operand::Reg(PhysReg::SP), + Operand::Reg(PhysReg::X11)}); + } + + for (size_t i = 0; i < args.size(); ++i) { + const ArgLoc& loc = arg_locs[i]; + if (loc.in_reg) { + EmitValueToReg(args[i], loc.reg, slots, block); + continue; + } + + PhysReg val_reg = PhysReg::W8; + if (IsPointerLike(*args[i]->GetType())) { + val_reg = ToXReg(val_reg); + } else if (IsFloatLike(*args[i]->GetType())) { + val_reg = ToSReg(val_reg); + } + EmitValueToReg(args[i], val_reg, slots, block); + + if (loc.stack_offset == 0) { + block.Append(Opcode::MovRR, + {Operand::Reg(PhysReg::X10), Operand::Reg(PhysReg::SP)}); + } else if (loc.stack_offset <= 4095) { + block.Append(Opcode::AddRRI, {Operand::Reg(PhysReg::X10), + Operand::Reg(PhysReg::SP), + Operand::Imm(loc.stack_offset)}); + } else { + block.Append(Opcode::MovImm, + {Operand::Reg(PhysReg::X11), Operand::Imm(loc.stack_offset)}); + block.Append(Opcode::AddRR, {Operand::Reg(PhysReg::X10), + Operand::Reg(PhysReg::SP), + Operand::Reg(PhysReg::X11)}); + } + block.Append(Opcode::StoreR, + {Operand::Reg(val_reg), Operand::Reg(PhysReg::X10)}); + } + block.Append(Opcode::Call, {Operand::Label(call.GetFunc()->GetName())}); + + if (stack_arg_size > 0) { + block.Append(Opcode::MovImm, + {Operand::Reg(PhysReg::X11), Operand::Imm(stack_arg_size)}); + block.Append(Opcode::AddRR, {Operand::Reg(PhysReg::SP), + Operand::Reg(PhysReg::SP), + Operand::Reg(PhysReg::X11)}); + } if (!call.GetType()->IsVoid()) { int dst_slot = function.CreateFrameIndex(static_cast(GetTypeSize(*call.GetType()))); PhysReg ret_reg = PhysReg::W0; - if (call.GetType()->IsFloat()) { + if (IsFloatLike(*call.GetType())) { ret_reg = ToSReg(ret_reg); - } else if (call.GetType()->IsPointer() || call.GetType()->IsPtrInt32() || call.GetType()->IsPtrFloat()) { + } else if (IsPointerLike(*call.GetType())) { ret_reg = ToXReg(ret_reg); } block.Append(Opcode::StoreStack, {Operand::Reg(ret_reg), Operand::FrameIndex(dst_slot)});