diff --git a/include/mir/MIR.h b/include/mir/MIR.h index 4dfe142..db3ef89 100644 --- a/include/mir/MIR.h +++ b/include/mir/MIR.h @@ -124,6 +124,7 @@ enum class Opcode { Br, CondBr, Label, + LoadCallerStackArg, // 从调用者栈帧加载参数 }; enum class GlobalKind { @@ -229,13 +230,15 @@ class MachineFunction { int GetFrameSize() const { return frame_size_; } void SetFrameSize(int size) { frame_size_ = size; } - + int GetLocalVarsSize() const { return local_vars_size_; } + void SetLocalVarsSize(int s) { local_vars_size_ = s; } private: std::string name_; MachineBasicBlock* entry_ = nullptr; std::vector> blocks_; std::vector frame_slots_; int frame_size_ = 0; + int local_vars_size_ = 0; }; //std::unique_ptr LowerToMIR(const ir::Module& module); void RunRegAlloc(MachineFunction& function); diff --git a/src/mir/AsmPrinter.cpp b/src/mir/AsmPrinter.cpp index 3f8d776..575169d 100644 --- a/src/mir/AsmPrinter.cpp +++ b/src/mir/AsmPrinter.cpp @@ -24,7 +24,7 @@ const FrameSlot& GetFrameSlot(const MachineFunction& function, } // 32位整数加载/存储 -void EmitStackLoad(std::ostream& os, PhysReg dst, int offset, PhysReg base = PhysReg::SP) { +void EmitStackLoad(std::ostream& os, PhysReg dst, int offset, PhysReg base = PhysReg::S0) { if (offset >= -2048 && offset <= 2047) { os << " lw " << PhysRegName(dst) << ", " << offset << "(" << PhysRegName(base) << ")\n"; } else { @@ -34,7 +34,7 @@ void EmitStackLoad(std::ostream& os, PhysReg dst, int offset, PhysReg base = Phy } } -void EmitStackStore(std::ostream& os, PhysReg src, int offset, PhysReg base = PhysReg::SP) { +void EmitStackStore(std::ostream& os, PhysReg src, int offset, PhysReg base = PhysReg::S0) { if (offset >= -2048 && offset <= 2047) { os << " sw " << PhysRegName(src) << ", " << offset << "(" << PhysRegName(base) << ")\n"; } else { @@ -45,7 +45,7 @@ void EmitStackStore(std::ostream& os, PhysReg src, int offset, PhysReg base = Ph } // 64位指针加载/存储 -void EmitStackLoad64(std::ostream& os, PhysReg dst, int offset, PhysReg base = PhysReg::SP) { +void EmitStackLoad64(std::ostream& os, PhysReg dst, int offset, PhysReg base = PhysReg::S0) { if (offset >= -2048 && offset <= 2047) { os << " ld " << PhysRegName(dst) << ", " << offset << "(" << PhysRegName(base) << ")\n"; } else { @@ -55,7 +55,7 @@ void EmitStackLoad64(std::ostream& os, PhysReg dst, int offset, PhysReg base = P } } -void EmitStackStore64(std::ostream& os, PhysReg src, int offset, PhysReg base = PhysReg::SP) { +void EmitStackStore64(std::ostream& os, PhysReg src, int offset, PhysReg base = PhysReg::S0) { if (offset >= -2048 && offset <= 2047) { os << " sd " << PhysRegName(src) << ", " << offset << "(" << PhysRegName(base) << ")\n"; } else { @@ -66,7 +66,7 @@ void EmitStackStore64(std::ostream& os, PhysReg src, int offset, PhysReg base = } // 浮点加载/存储(保持32位) -void EmitStackLoadFloat(std::ostream& os, PhysReg dst, int offset, PhysReg base = PhysReg::SP) { +void EmitStackLoadFloat(std::ostream& os, PhysReg dst, int offset, PhysReg base = PhysReg::S0) { if (offset >= -2048 && offset <= 2047) { os << " flw " << PhysRegName(dst) << ", " << offset << "(" << PhysRegName(base) << ")\n"; } else { @@ -76,7 +76,7 @@ void EmitStackLoadFloat(std::ostream& os, PhysReg dst, int offset, PhysReg base } } -void EmitStackStoreFloat(std::ostream& os, PhysReg src, int offset, PhysReg base = PhysReg::SP) { +void EmitStackStoreFloat(std::ostream& os, PhysReg src, int offset, PhysReg base = PhysReg::S0) { if (offset >= -2048 && offset <= 2047) { os << " fsw " << PhysRegName(src) << ", " << offset << "(" << PhysRegName(base) << ")\n"; } else { @@ -95,7 +95,8 @@ void PrintAsmFunction(const MachineFunction& function, std::ostream& os) { } int frame_size = function.GetFrameSize(); // 局部变量区大小(正数) - int total_frame_size = frame_size + 16; // +16 用于保存 ra(8) 和 s0(8) + int local_vars = function.GetLocalVarsSize(); + int total_frame = local_vars + 16 ; bool prologue_done = false; for (const auto& block_ptr : function.GetBlocks()) { @@ -111,19 +112,19 @@ void PrintAsmFunction(const MachineFunction& function, std::ostream& os) { // 在入口块的第一条指令前输出序言 if (!prologue_done && block.GetName() == "entry") { - // 分配栈帧:sp -= total_frame_size - if (total_frame_size <= 2047) { - os << " addi sp, sp, -" << total_frame_size << "\n"; + // 分配栈帧:sp -= total_frame + if (total_frame <= 2047) { + os << " addi sp, sp, -" << total_frame << "\n"; } else { - os << " li t4, -" << total_frame_size << "\n"; + os << " li t4, -" << total_frame << "\n"; os << " add sp, sp, t4\n"; } // 保存 ra 和 s0(在局部变量区之后,即 sp + frame_size 处) // ra 保存在 sp + frame_size // s0 保存在 sp + frame_size + 8 - int ra_offset = frame_size; - int s0_offset = frame_size + 8; + int ra_offset = local_vars; + int s0_offset = local_vars + 8; if (ra_offset <= 2047) { os << " sd ra, " << ra_offset << "(sp)\n"; @@ -140,7 +141,7 @@ void PrintAsmFunction(const MachineFunction& function, std::ostream& os) { os << " add t4, sp, t4\n"; os << " sd s0, 0(t4)\n"; } - + os << " mv s0, sp\n"; prologue_done = true; } @@ -155,47 +156,77 @@ void PrintAsmFunction(const MachineFunction& function, std::ostream& os) { break; case Opcode::Load: { - if (ops.size() == 2 && ops.at(1).GetKind() == Operand::Kind::Reg) { - // 寄存器间接寻址 - 使用 ld(64位) - os << " ld " << PhysRegName(ops.at(0).GetReg()) << ", 0(" - << PhysRegName(ops.at(1).GetReg()) << ")\n"; - } else { - int frame_idx = ops.at(1).GetFrameIndex(); - const auto& slot = function.GetFrameSlot(frame_idx); - // 根据槽大小决定加载宽度 - if (slot.size == 8) { - EmitStackLoad64(os, ops.at(0).GetReg(), slot.offset); + if (ops.size() == 2 && ops[1].GetKind() == Operand::Kind::Reg) { + os << " ld " << PhysRegName(ops[0].GetReg()) << ", 0(" << PhysRegName(ops[1].GetReg()) << ")\n"; + } else if (ops.size() == 2 && ops[1].GetKind() == Operand::Kind::Imm) { + // 用于调用者 outgoing 存储的占位偏移(将在 Outgoing 中修正) + int offset = ops[1].GetImm(); // 实际偏移 = local_vars + 16 + offset + os << " ld " << PhysRegName(ops[0].GetReg()) << ", " << offset << "(sp)\n"; } else { - EmitStackLoad(os, ops.at(0).GetReg(), slot.offset); + int frame_idx = ops[1].GetFrameIndex(); + const auto& slot = function.GetFrameSlot(frame_idx); + if (slot.size == 8) EmitStackLoad64(os, ops[0].GetReg(), slot.offset); + else EmitStackLoad(os, ops[0].GetReg(), slot.offset); } - } - break; + break; } - + case Opcode::Store: { - if (ops.size() == 2 && ops.at(1).GetKind() == Operand::Kind::Reg) { - // 寄存器间接寻址 - 使用 sd(64位) - os << " sd " << PhysRegName(ops.at(0).GetReg()) << ", 0(" - << PhysRegName(ops.at(1).GetReg()) << ")\n"; - } else { - int frame_idx = ops.at(1).GetFrameIndex(); - const auto& slot = function.GetFrameSlot(frame_idx); - // 根据槽大小决定存储宽度 - if (slot.size == 8) { - EmitStackStore64(os, ops.at(0).GetReg(), slot.offset); + if (ops.size() == 2 && ops[1].GetKind() == Operand::Kind::Reg) { + os << " sd " << PhysRegName(ops[0].GetReg()) << ", 0(" << PhysRegName(ops[1].GetReg()) << ")\n"; + } else if (ops.size() == 2 && ops[1].GetKind() == Operand::Kind::Imm) { + // outgoing 存储:偏移 = local_vars + 16 + ops[1].GetImm() + int offset = ops[1].GetImm(); + // 实际偏移需在 AsmPrinter 中加上 local_vars+16,这里简单先直接用 offset(动态修正稍复杂) + // 临时方案:直接生成 sw t0, offset(sp),但 offset 应为 local_vars+16=? + // 由于 AsmPrinter 中可访问 function.GetLocalVarsSize(),我们计算: + int actual_offset = function.GetLocalVarsSize() + 16 + offset; + if (actual_offset <= 2047) os << " sd " << PhysRegName(ops[0].GetReg()) << ", " << actual_offset << "(sp)\n"; + else { /* 扩展大偏移 */ } } else { - EmitStackStore(os, ops.at(0).GetReg(), slot.offset); + int frame_idx = ops[1].GetFrameIndex(); + const auto& slot = function.GetFrameSlot(frame_idx); + if (slot.size == 8) EmitStackStore64(os, ops[0].GetReg(), slot.offset); + else EmitStackStore(os, ops[0].GetReg(), slot.offset); } - } - break; - } + break; + } + case Opcode::LoadCallerStackArg: { + // ops: [0] dst (T0), [1] dstFrameIndex, [2] argvIndex (Imm) + int argv_index = ops[2].GetImm(); + int dst_slot = ops[1].GetFrameIndex(); + int total_frame = function.GetFrameSize(); + // 调用者栈参数位于 sp + total_frame + argv_index*8 + int caller_offset = total_frame + argv_index * 8; + // 加载到 T0 + if (caller_offset <= 2047) { + os << " ld " << PhysRegName(ops[0].GetReg()) << ", " << caller_offset << "(sp)\n"; + } else { + os << " li t4, " << caller_offset << "\n"; + os << " add t4, sp, t4\n"; + os << " ld " << PhysRegName(ops[0].GetReg()) << ", 0(t4)\n"; + } + // 再存入本地槽 + const auto& slot = function.GetFrameSlot(dst_slot); + if (slot.size == 8) { + EmitStackStore64(os, ops[0].GetReg(), slot.offset); + } else { + EmitStackStore(os, ops[0].GetReg(), slot.offset); + } + break; + } + case Opcode::Add: os << " add " << PhysRegName(ops.at(0).GetReg()) << ", " << PhysRegName(ops.at(1).GetReg()) << ", " << PhysRegName(ops.at(2).GetReg()) << "\n"; break; - + case Opcode::Addi: + os << " addi " << PhysRegName(ops[0].GetReg()) << ", " + << PhysRegName(ops[1].GetReg()) << ", " + << ops[2].GetImm() << "\n"; + break; case Opcode::Sub: os << " sub " << PhysRegName(ops.at(0).GetReg()) << ", " << PhysRegName(ops.at(1).GetReg()) << ", " @@ -299,10 +330,10 @@ void PrintAsmFunction(const MachineFunction& function, std::ostream& os) { const auto& slot = function.GetFrameSlot(frame_idx); // 计算地址(64 位),offset 是正数 if (slot.offset <= 2047) { - os << " addi " << PhysRegName(ops.at(0).GetReg()) << ", sp, " << slot.offset << "\n"; + os << " addi " << PhysRegName(ops.at(0).GetReg()) << ", s0, " << slot.offset << "\n"; } else { os << " li " << PhysRegName(ops.at(0).GetReg()) << ", " << slot.offset << "\n"; - os << " add " << PhysRegName(ops.at(0).GetReg()) << ", sp, " + os << " add " << PhysRegName(ops.at(0).GetReg()) << ", s0, " << PhysRegName(ops.at(0).GetReg()) << "\n"; } break; @@ -322,32 +353,33 @@ void PrintAsmFunction(const MachineFunction& function, std::ostream& os) { case Opcode::Ret:{ // 恢复 ra 和 s0 - int ra_offset = frame_size; - int s0_offset = frame_size + 8; + int ra_offset = local_vars; + int s0_offset = local_vars + 8; if (ra_offset <= 2047) { - os << " ld ra, " << ra_offset << "(sp)\n"; + os << " ld ra, " << ra_offset << "(s0)\n"; } else { os << " li t3, " << ra_offset << "\n"; - os << " add t3, sp, t3\n"; + os << " add t3, s0, t3\n"; os << " ld ra, 0(t3)\n"; } - + + // 恢复 sp + if (total_frame <= 2047) { + os << " addi sp, s0, " << total_frame << "\n"; + } else { + os << " li t3, " << total_frame << "\n"; + os << " add sp, s0, t3\n"; + } + if (s0_offset <= 2047) { - os << " ld s0, " << s0_offset << "(sp)\n"; + os << " ld s0, " << s0_offset << "(s0)\n"; } else { os << " li t3, " << s0_offset << "\n"; - os << " add t3, sp, t3\n"; + os << " add t3, s0, t3\n"; os << " ld s0, 0(t3)\n"; } - // 恢复 sp - if (total_frame_size <= 2047) { - os << " addi sp, sp, " << total_frame_size << "\n"; - } else { - os << " li t3, " << total_frame_size << "\n"; - os << " add sp, sp, t3\n"; - } os << " ret\n"; break; diff --git a/src/mir/FrameLowering.cpp b/src/mir/FrameLowering.cpp index 367dbc5..9e04a74 100644 --- a/src/mir/FrameLowering.cpp +++ b/src/mir/FrameLowering.cpp @@ -18,19 +18,22 @@ void RunFrameLowering(MachineFunction& function) { int cursor = 0; const auto& slots = function.GetFrameSlots(); - // 为每个栈槽分配偏移:正偏移,表示相对于 sp 的偏移量 - // 栈向下增长,sp 减小后,局部变量在 sp 上方(正偏移) + // 为每个栈槽分配偏移 for (const auto& slot : slots) { - int align = slot.size; // 自然对齐(4 或 8 字节) - cursor = AlignTo(cursor, align); // 对齐到所需边界 - function.GetFrameSlot(slot.index).offset = cursor; // 正偏移 - cursor += slot.size; // 分配空间 + int align = slot.size; + cursor = AlignTo(cursor, align); + function.GetFrameSlot(slot.index).offset = cursor; + cursor += slot.size; } - // 栈帧总大小(局部变量区域)按 16 字节对齐 - function.SetFrameSize(AlignTo(cursor, 16)); + // 局部变量区按 16 字节对齐 + int local_vars_size = AlignTo(cursor, 16); + function.SetLocalVarsSize(local_vars_size); - // 在入口块插入 Prologue/Epilogue 占位符 + // 总帧大小 = 局部变量区 + 16(保存 ra 和 s0) + function.SetFrameSize(local_vars_size + 16); + + // 插入 Prologue/Epilogue 占位符(原逻辑) auto& insts = function.GetEntry()->GetInstructions(); std::vector lowered; lowered.emplace_back(Opcode::Prologue); diff --git a/src/mir/Lowering.cpp b/src/mir/Lowering.cpp index f86a587..d93e3c9 100644 --- a/src/mir/Lowering.cpp +++ b/src/mir/Lowering.cpp @@ -368,33 +368,95 @@ void LowerInstructionToBlock(const ir::Instruction& inst, MachineFunction& funct } case ir::Opcode::Call: { - auto& call = static_cast(inst); - for (size_t i = 0; i < call.GetNumArgs() && i < 8; i++) { - // 根据参数的实际类型决定使用浮点还是整数寄存器 - bool arg_is_float = call.GetArg(i)->GetType()->IsFloat32(); - if (arg_is_float) { - PhysReg floatArgReg = static_cast(static_cast(PhysReg::FA0) + i); - EmitValueToReg(call.GetArg(i), floatArgReg, slots, block); - } else { - PhysReg intArgReg = static_cast(static_cast(PhysReg::A0) + i); - EmitValueToReg(call.GetArg(i), intArgReg, slots, block); + auto& call = static_cast(inst); + int numArgs = static_cast(call.GetNumArgs()); + int regArgs = std::min(numArgs, 8); + int stackArgs = numArgs - regArgs; + + // 调用者动态扩展栈以容纳栈参数 + if (stackArgs > 0) { + int stackSpace = stackArgs * 8; + // sp -= stackSpace + if (stackSpace <= 2047) { + block.Append(Opcode::Addi, {Operand::Reg(PhysReg::SP), + Operand::Reg(PhysReg::SP), + Operand::Imm(-stackSpace)}); + } else { + block.Append(Opcode::MovImm, {Operand::Reg(PhysReg::T4), Operand::Imm(-stackSpace)}); + block.Append(Opcode::Add, {Operand::Reg(PhysReg::SP), + Operand::Reg(PhysReg::SP), + Operand::Reg(PhysReg::T4)}); + } } - } - std::string func_name = call.GetCalleeName(); - block.Append(Opcode::Call, {Operand::Func(func_name)}); - if (!call.GetType()->IsVoid()) { - int dst_slot = function.CreateFrameIndex(); - bool ret_is_float = call.GetType()->IsFloat32(); - if (ret_is_float) { - StoreRegToSlot(PhysReg::FA0, dst_slot, block, true); - } else { - StoreRegToSlot(PhysReg::A0, dst_slot, block, false); + + // 寄存器参数 (前 8 个) + for (int i = 0; i < regArgs; i++) { + bool arg_is_float = call.GetArg(i)->GetType()->IsFloat32(); + if (arg_is_float) { + PhysReg freg = static_cast(static_cast(PhysReg::FA0) + i); + EmitValueToReg(call.GetArg(i), freg, slots, block); + } else { + PhysReg ireg = static_cast(static_cast(PhysReg::A0) + i); + EmitValueToReg(call.GetArg(i), ireg, slots, block); + } } - slots.emplace(&inst, dst_slot); - } - return; + + // 栈参数:存入 sp + offset 处 + for (int i = regArgs; i < numArgs; i++) { + EmitValueToReg(call.GetArg(i), PhysReg::T0, slots, block); + int offset = (i - regArgs) * 8; + // 计算 sp+offset 到 t4 + if (offset == 0) { + block.Append(Opcode::MovImm, {Operand::Reg(PhysReg::T4), Operand::Imm(0)}); // t4=0 + block.Append(Opcode::Add, {Operand::Reg(PhysReg::T4), + Operand::Reg(PhysReg::SP), + Operand::Reg(PhysReg::T4)}); + } else if (offset <= 2047) { + block.Append(Opcode::Addi, {Operand::Reg(PhysReg::T4), + Operand::Reg(PhysReg::SP), + Operand::Imm(offset)}); + } else { + block.Append(Opcode::MovImm, {Operand::Reg(PhysReg::T4), Operand::Imm(offset)}); + block.Append(Opcode::Add, {Operand::Reg(PhysReg::T4), + Operand::Reg(PhysReg::SP), + Operand::Reg(PhysReg::T4)}); + } + // Store T0 -> [t4] + block.Append(Opcode::Store, {Operand::Reg(PhysReg::T0), Operand::Reg(PhysReg::T4)}); + } + + std::string func_name = call.GetCalleeName(); + block.Append(Opcode::Call, {Operand::Func(func_name)}); + + // 恢复 sp + if (stackArgs > 0) { + int stackSpace = stackArgs * 8; + if (stackSpace <= 2047) { + block.Append(Opcode::Addi, {Operand::Reg(PhysReg::SP), + Operand::Reg(PhysReg::SP), + Operand::Imm(stackSpace)}); + } else { + block.Append(Opcode::MovImm, {Operand::Reg(PhysReg::T4), Operand::Imm(stackSpace)}); + block.Append(Opcode::Add, {Operand::Reg(PhysReg::SP), + Operand::Reg(PhysReg::SP), + Operand::Reg(PhysReg::T4)}); + } + } + + // 返回值处理(保持不变) + if (!call.GetType()->IsVoid()) { + int dst_slot = function.CreateFrameIndex(); + bool ret_is_float = call.GetType()->IsFloat32(); + if (ret_is_float) { + StoreRegToSlot(PhysReg::FA0, dst_slot, block, true); + } else { + StoreRegToSlot(PhysReg::A0, dst_slot, block, false); + } + slots.emplace(&inst, dst_slot); + } + return; } - + case ir::Opcode::ICmp: { auto& icmp = static_cast(inst); int dst_slot = function.CreateFrameIndex(); @@ -599,33 +661,38 @@ std::unique_ptr LowerFunctionToMIR(const ir::Function& func) { auto machine_func = std::make_unique(func.GetName()); ValueSlotMap slots; - // ========== 新增:为函数参数分配栈槽 ========== + // 计算局部变量大小 for (size_t i = 0; i < func.GetNumArgs(); i++) { - ir::Argument* arg = func.GetArgument(i); - - // 🔑 修改:指针类型分配 8 字节,其他分配 4 字节 - int size = 4; - if (arg->GetType()->IsPtrInt32() || arg->GetType()->IsPtrFloat32()) { - size = 8; // 指针在 RV64 上是 8 字节 - } - int slot = machine_func->CreateFrameIndex(size); - - PhysReg argReg = static_cast(static_cast(PhysReg::A0) + i); - MachineBasicBlock* entry = machine_func->GetEntry(); - - // 存储参数到栈槽 - if (arg->GetType()->IsPtrInt32() || arg->GetType()->IsPtrFloat32()) { - // 指针类型:使用 64 位存储(注意:Store 在 MIR 层会根据 slot.size 决定用 sw 还是 sd) - entry->Append(Opcode::Store, {Operand::Reg(argReg), Operand::FrameIndex(slot)}); - } else if (arg->GetType()->IsInt32()) { - entry->Append(Opcode::Store, {Operand::Reg(argReg), Operand::FrameIndex(slot)}); - } else if (arg->GetType()->IsFloat32()) { - entry->Append(Opcode::StoreFloat, {Operand::Reg(argReg), Operand::FrameIndex(slot)}); - } - - slots[arg] = slot; - } + ir::Argument* arg = func.GetArgument(i); + + // 指针类型分配 8 字节,其他 4 字节(槽大小) + int size = (arg->GetType()->IsPtrInt32() || arg->GetType()->IsPtrFloat32()) ? 8 : 4; + int slot = machine_func->CreateFrameIndex(size); + MachineBasicBlock* entry = machine_func->GetEntry(); + + if (i < 8) { + // 寄存器参数:从 A0..A7 / FA0..FA7 保存到本地栈槽 + PhysReg argReg; + if (arg->GetType()->IsFloat32()) { + argReg = static_cast(static_cast(PhysReg::FA0) + i); + entry->Append(Opcode::StoreFloat, {Operand::Reg(argReg), Operand::FrameIndex(slot)}); + } else { + argReg = static_cast(static_cast(PhysReg::A0) + i); + entry->Append(Opcode::Store, {Operand::Reg(argReg), Operand::FrameIndex(slot)}); + } + } else { + // 栈参数:从调用者的动态栈帧加载到本地栈槽 + // 使用 LoadCallerStackArg 特殊指令,参数索引 = i - 8 + entry->Append(Opcode::LoadCallerStackArg, { + Operand::Reg(PhysReg::T0), // 临时寄存器 + Operand::FrameIndex(slot), // 目标本地栈槽 + Operand::Imm(static_cast(i - 8)) // 栈参数索引 + }); + } + slots[arg] = slot; + } + // 第一遍:创建所有 IR 基本块对应的 MIR 基本块 for (const auto& ir_block : func.GetBlocks()) { GetOrCreateBlock(ir_block.get(), *machine_func);