nudt-compiler-cpp/src/mir/Lowering.cpp

#include "mir/MIR.h"

#include <stdexcept>
#include <unordered_map>
#include <cstring>

#include "ir/IR.h"
#include "utils/Log.h"

#define DEBUG_Lower

#ifdef DEBUG_Lower
#include <iostream>
#define DEBUG_MSG(msg) std::cerr << "[Lower Debug] " << msg << std::endl
#else
#define DEBUG_MSG(msg)
#endif

namespace mir {
namespace {

using ValueSlotMap = std::unordered_map<const ir::Value*, int>;

static uint32_t FloatToBits(float f) {
  uint32_t bits;
  memcpy(&bits, &f, sizeof(bits));
  return bits;
}

// 获取类型大小（字节）
int GetTypeSize(const ir::Type* type) {
  if (!type) return 4;
  size_t size = type->Size();
  return size > 0 ? static_cast<int>(size) : 4;
}

// 将 IR 整数比较谓词转换为 ARMv8 条件码
CondCode IcmpToCondCode(ir::IcmpInst::Predicate pred) {
  switch (pred) {
    case ir::IcmpInst::Predicate::EQ: return CondCode::EQ;
    case ir::IcmpInst::Predicate::NE: return CondCode::NE;
    case ir::IcmpInst::Predicate::LT: return CondCode::LT;
    case ir::IcmpInst::Predicate::GT: return CondCode::GT;
    case ir::IcmpInst::Predicate::LE: return CondCode::LE;
    case ir::IcmpInst::Predicate::GE: return CondCode::GE;
    default: return CondCode::AL;
  }
}

// 将 IR 浮点比较谓词转换为 ARMv8 条件码
CondCode FcmpToCondCode(ir::FcmpInst::Predicate pred, bool& isOrdered) {
  isOrdered = true;
  switch (pred) {
    case ir::FcmpInst::Predicate::OEQ: return CondCode::EQ;
    case ir::FcmpInst::Predicate::ONE: return CondCode::NE;
    case ir::FcmpInst::Predicate::OLT: return CondCode::LT;
    case ir::FcmpInst::Predicate::OGT: return CondCode::GT;
    case ir::FcmpInst::Predicate::OLE: return CondCode::LE;
    case ir::FcmpInst::Predicate::OGE: return CondCode::GE;
    case ir::FcmpInst::Predicate::UEQ: isOrdered = false; return CondCode::EQ;
    case ir::FcmpInst::Predicate::UNE: isOrdered = false; return CondCode::NE;
    case ir::FcmpInst::Predicate::ULT: isOrdered = false; return CondCode::LT;
    case ir::FcmpInst::Predicate::UGT: isOrdered = false; return CondCode::GT;
    case ir::FcmpInst::Predicate::ULE: isOrdered = false; return CondCode::LE;
    case ir::FcmpInst::Predicate::UGE: isOrdered = false; return CondCode::GE;
    default: return CondCode::AL;
  }
}

// 获取基本块的标签名（用于汇编输出）
std::string GetBlockLabel(const ir::BasicBlock* bb) {
  if (!bb || !bb->GetParent()) {
    return ".Lunknown";
  }
  // 格式：.L函数名_基本块名
  std::string funcName = bb->GetParent()->GetName();
  std::string blockName = bb->GetName();

  // 如果基本块没有名字，使用地址作为标识
  if (blockName.empty()) {
    blockName = std::to_string(reinterpret_cast<uintptr_t>(bb));
  }

  return ".L" + funcName + "_" + blockName;
}

// 获取数组类型的维度信息
static const ir::ArrayType* GetArrayType(const ir::Type* type) {
  if (type->IsArray()) {
    return static_cast<const ir::ArrayType*>(type);
  }
  return nullptr;
}

static std::vector<int> GetArrayStrides(const ir::ArrayType* arrayType) {
  std::vector<int> strides;
  const std::vector<int>& dims = arrayType->GetDimensions();
  int stride = 4;  // 元素大小（int/float 是 4 字节）

  // 从最后一维向前计算步长
  for (int i = dims.size() - 1; i >= 0; --i) {
    strides.insert(strides.begin(), stride);
    stride *= dims[i];
  }
  return strides;
}

// 在 Lowering.cpp 中添加辅助函数
const ir::Value* GetOperand(const ir::Instruction& inst, size_t index) {
    if (index < inst.GetNumOperands()) {
        return inst.GetOperand(index);
    }
    return nullptr;
}

const ir::BasicBlock* GetBasicBlockOperand(const ir::Instruction& inst, size_t index) {
    const ir::Value* operand = GetOperand(inst, index);
    if (operand) {
        return dynamic_cast<const ir::BasicBlock*>(operand);
    }
    return nullptr;
}

void EmitValueToReg(const ir::Value* value, PhysReg target,
                    const ValueSlotMap& slots, MachineBasicBlock& block,
                    MachineFunction& function) {
  // 处理整数常量
  if (auto* constant = dynamic_cast<const ir::ConstantInt*>(value)) {
    block.Append(Opcode::MovImm,
                 {Operand::Reg(target), Operand::Imm(constant->GetValue())});
    return;
  }
  // 处理浮点常量
  if (auto* fconstant = dynamic_cast<const ir::ConstantFloat*>(value)) {
    // 检查是否已经为这个常量分配了栈槽
    auto it = slots.find(value);
    int slot;
    if (it == slots.end()) {
        DEBUG_MSG("Value not found: " << value->GetName());
        // 输出所有 slots 的键名用于调试
        for (auto& p : slots) {
            DEBUG_MSG("  Slot key: " << p.first->GetName());
        }
        // 分配新的栈槽
        slot = function.CreateFrameIndex(4);
        // 将浮点常量存储到栈槽
        float fval = fconstant->GetValue();
        uint32_t int_val = FloatToBits(fval);

        // 使用临时寄存器加载常量并存储
        block.Append(Opcode::MovImm, {Operand::Reg(PhysReg::W8), Operand::Imm(static_cast<int>(int_val))});
        block.Append(Opcode::StoreStack, {Operand::Reg(PhysReg::W8), Operand::FrameIndex(slot)});
        const_cast<ValueSlotMap&>(slots).emplace(value, slot);
    } else {
        slot = it->second;
    }

    // 从栈槽加载到目标寄存器
    block.Append(Opcode::LoadStack, {Operand::Reg(target), Operand::FrameIndex(slot)});
    return;
  }
  // 处理全局变量
  if (auto* global = dynamic_cast<const ir::GlobalValue*>(value)) {
      // 全局变量：需要加载其地址
      // 在 ARM64 中，使用 ADRP + ADD 指令获取全局变量地址
      // 简化版本：先为全局变量分配一个栈槽，然后加载地址到该栈槽

      // 检查是否已经为这个全局变量分配了栈槽
      auto it = slots.find(value);
      if (it == slots.end()) {
          // 为全局变量创建栈槽
          const_cast<ValueSlotMap&>(slots).emplace(value,
              const_cast<MachineFunction&>(function).CreateFrameIndex(8));
          it = slots.find(value);
      }

      // 从栈槽加载地址到目标寄存器
      block.Append(Opcode::LoadStack,
                  {Operand::Reg(target), Operand::FrameIndex(it->second)});
      return;
  }
  // 处理零常量
  if (dynamic_cast<const ir::ConstantZero*>(value) ||
      dynamic_cast<const ir::ConstantAggregateZero*>(value)) {
    // 零常量：直接加载 0
    block.Append(Opcode::MovImm,
                 {Operand::Reg(target), Operand::Imm(0)});
    return;
  }

  auto it = slots.find(value);
  if (it == slots.end()) {
    DEBUG_MSG("Value not found: " << value->GetName());
    // 输出所有 slots 的键名用于调试
    for (auto& p : slots) {
        DEBUG_MSG("  Slot key: " << p.first->GetName());
    }
    throw std::runtime_error(
        FormatError("mir", "找不到值对应的栈槽: " + value->GetName()));
  }

  block.Append(Opcode::LoadStack,
               {Operand::Reg(target), Operand::FrameIndex(it->second)});
}

void LowerInstruction(const ir::Instruction& inst, MachineFunction& function,
                      ValueSlotMap& slots, MachineBasicBlock& block,
                      std::unordered_map<const ir::BasicBlock*,
                      MachineBasicBlock*>& blockMap) {
  //auto& block = function.GetEntry();
  DEBUG_MSG("Processing instruction: " << inst.GetName()
              << " (opcode: " << static_cast<int>(inst.GetOpcode()) << ")");

  switch (inst.GetOpcode()) {
    case ir::Opcode::Alloca: {
      slots.emplace(&inst, function.CreateFrameIndex(GetTypeSize(inst.GetType().get())));
      return;
    }
    case ir::Opcode::Store: {
      auto& store = static_cast<const ir::StoreInst&>(inst);
      auto dst = slots.find(store.GetPtr());
      if (dst == slots.end()) {
        //throw std::runtime_error(
        //    FormatError("mir", "暂不支持对非栈变量地址进行写入"));
        // 对于非栈变量地址（如 GEP 结果），地址本身在栈槽中
        // 需要先加载地址，然后存储值到该地址
        // 先加载地址到 x8
        EmitValueToReg(store.GetPtr(), PhysReg::X8, slots, block, function);
        // 加载值到 w9
        EmitValueToReg(store.GetValue(), PhysReg::W9, slots, block, function);
        // 存储值到地址
        block.Append(Opcode::StoreStack,
                    {Operand::Reg(PhysReg::W9), Operand::Reg(PhysReg::X8)});
        return;
      }
      EmitValueToReg(store.GetValue(), PhysReg::W8, slots, block, function);
      block.Append(Opcode::StoreStack,
                   {Operand::Reg(PhysReg::W8), Operand::FrameIndex(dst->second)});
      return;
    }
    case ir::Opcode::Load: {
      auto& load = static_cast<const ir::LoadInst&>(inst);
      auto src = slots.find(load.GetPtr());
      if (src == slots.end()) {
        //throw std::runtime_error(
        //    FormatError("mir", "暂不支持对非栈变量地址进行读取"));
        // 对于非栈变量地址（如 GEP 结果），地址本身在栈槽中
        // 需要先加载地址，然后从该地址加载值
        int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));
        // 加载地址到 x8
        EmitValueToReg(load.GetPtr(), PhysReg::X8, slots, block, function);
        // 从地址加载值到 w9
        block.Append(Opcode::LoadStack,
                    {Operand::Reg(PhysReg::W9), Operand::Reg(PhysReg::X8)});
        // 存储值到结果栈槽
        block.Append(Opcode::StoreStack,
                    {Operand::Reg(PhysReg::W9), Operand::FrameIndex(dst_slot)});
        slots.emplace(&inst, dst_slot);
        return;
      }
      int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get())); // 分配结果槽
      block.Append(Opcode::LoadStack,
                   {Operand::Reg(PhysReg::W8), Operand::FrameIndex(src->second)});
      block.Append(Opcode::StoreStack,
                   {Operand::Reg(PhysReg::W8), Operand::FrameIndex(dst_slot)});
      slots.emplace(&inst, dst_slot);
      return;
    }
    case ir::Opcode::Add: {
      auto& bin = static_cast<const ir::BinaryInst&>(inst);
      int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));
      EmitValueToReg(bin.GetLhs(), PhysReg::W8, slots, block, function);
      EmitValueToReg(bin.GetRhs(), PhysReg::W9, slots, block, function);
      block.Append(Opcode::AddRR, {Operand::Reg(PhysReg::W8),
                                   Operand::Reg(PhysReg::W8),
                                   Operand::Reg(PhysReg::W9)});
      block.Append(Opcode::StoreStack,
                   {Operand::Reg(PhysReg::W8), Operand::FrameIndex(dst_slot)});
      slots.emplace(&inst, dst_slot);
      return;
    }
    case ir::Opcode::Sub: {
      auto& bin = static_cast<const ir::BinaryInst&>(inst);
      int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));  // 分配结果栈槽
      EmitValueToReg(bin.GetLhs(), PhysReg::W8, slots, block, function);
      EmitValueToReg(bin.GetRhs(), PhysReg::W9, slots, block, function);
      block.Append(Opcode::SubRR, {Operand::Reg(PhysReg::W8),
                                   Operand::Reg(PhysReg::W8),
                                   Operand::Reg(PhysReg::W9)});
      block.Append(Opcode::StoreStack,
                   {Operand::Reg(PhysReg::W8), Operand::FrameIndex(dst_slot)});
      slots.emplace(&inst, dst_slot);
      return;
    }
    case ir::Opcode::Mul: {
      auto& bin = static_cast<const ir::BinaryInst&>(inst);
      int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));  // 分配结果栈槽
      EmitValueToReg(bin.GetLhs(), PhysReg::W8, slots, block, function);
      EmitValueToReg(bin.GetRhs(), PhysReg::W9, slots, block, function);
      block.Append(Opcode::MulRR, {Operand::Reg(PhysReg::W8),
                                   Operand::Reg(PhysReg::W8),
                                   Operand::Reg(PhysReg::W9)});
      block.Append(Opcode::StoreStack,
                   {Operand::Reg(PhysReg::W8), Operand::FrameIndex(dst_slot)});
      slots.emplace(&inst, dst_slot);
      return;
    }
    case ir::Opcode::Div: {
      auto& bin = static_cast<const ir::BinaryInst&>(inst);
      int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));  // 分配结果栈槽
      EmitValueToReg(bin.GetLhs(), PhysReg::W8, slots, block, function);
      EmitValueToReg(bin.GetRhs(), PhysReg::W9, slots, block, function);
      block.Append(Opcode::SDivRR, {Operand::Reg(PhysReg::W8),
                                    Operand::Reg(PhysReg::W8),
                                    Operand::Reg(PhysReg::W9)});
      block.Append(Opcode::StoreStack,
                   {Operand::Reg(PhysReg::W8), Operand::FrameIndex(dst_slot)});
      slots.emplace(&inst, dst_slot);
      return;
    }
    case ir::Opcode::Ret: {
      auto& ret = static_cast<const ir::ReturnInst&>(inst);
      EmitValueToReg(ret.GetValue(), PhysReg::W0, slots, block, function);
      block.Append(Opcode::Ret);
      return;
    }
    case ir::Opcode::FAdd: {
      auto& bin = static_cast<const ir::BinaryInst&>(inst);
      int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));  // 分配结果栈槽
      // 浮点值加载到 S0, S1（使用浮点寄存器）
      EmitValueToReg(bin.GetLhs(), PhysReg::S0, slots, block, function);
      EmitValueToReg(bin.GetRhs(), PhysReg::S1, slots, block, function);
      block.Append(Opcode::FAddRR, {Operand::Reg(PhysReg::S0),
                                    Operand::Reg(PhysReg::S0),
                                    Operand::Reg(PhysReg::S1)});
      block.Append(Opcode::StoreStack,
                  {Operand::Reg(PhysReg::S0), Operand::FrameIndex(dst_slot)});
      slots.emplace(&inst, dst_slot);
      return;
    }
    case ir::Opcode::FSub: {
      auto& bin = static_cast<const ir::BinaryInst&>(inst);
      int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));  // 分配结果栈槽
      // 浮点值加载到 S0, S1（使用浮点寄存器）
      EmitValueToReg(bin.GetLhs(), PhysReg::S0, slots, block, function);
      EmitValueToReg(bin.GetRhs(), PhysReg::S1, slots, block, function);
      block.Append(Opcode::FSubRR, {Operand::Reg(PhysReg::S0),
                                    Operand::Reg(PhysReg::S0),
                                    Operand::Reg(PhysReg::S1)});
      block.Append(Opcode::StoreStack,
                  {Operand::Reg(PhysReg::S0), Operand::FrameIndex(dst_slot)});
      slots.emplace(&inst, dst_slot);
      return;
    }
    case ir::Opcode::FMul: {
      auto& bin = static_cast<const ir::BinaryInst&>(inst);
      int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));  // 分配结果栈槽
      // 浮点值加载到 S0, S1（使用浮点寄存器）
      EmitValueToReg(bin.GetLhs(), PhysReg::S0, slots, block, function);
      EmitValueToReg(bin.GetRhs(), PhysReg::S1, slots, block, function);
      block.Append(Opcode::FMulRR, {Operand::Reg(PhysReg::S0),
                                    Operand::Reg(PhysReg::S0),
                                    Operand::Reg(PhysReg::S1)});
      block.Append(Opcode::StoreStack,
                  {Operand::Reg(PhysReg::S0), Operand::FrameIndex(dst_slot)});
      slots.emplace(&inst, dst_slot);
      return;
    }
    case ir::Opcode::FDiv: {
      auto& bin = static_cast<const ir::BinaryInst&>(inst);
      int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));  // 分配结果栈槽
      // 浮点值加载到 S0, S1（使用浮点寄存器）
      EmitValueToReg(bin.GetLhs(), PhysReg::S0, slots, block, function);
      EmitValueToReg(bin.GetRhs(), PhysReg::S1, slots, block, function);
      block.Append(Opcode::FDivRR, {Operand::Reg(PhysReg::S0),
                                    Operand::Reg(PhysReg::S0),
                                    Operand::Reg(PhysReg::S1)});
      block.Append(Opcode::StoreStack,
                  {Operand::Reg(PhysReg::S0), Operand::FrameIndex(dst_slot)});
      slots.emplace(&inst, dst_slot);
      return;
    }
    // ========== 整数比较指令（修正版）==========
    case ir::Opcode::Icmp: {
      auto& icmp = static_cast<const ir::IcmpInst&>(inst);
      int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));

      EmitValueToReg(icmp.GetLhs(), PhysReg::W8, slots, block, function);
      EmitValueToReg(icmp.GetRhs(), PhysReg::W9, slots, block, function);

      block.Append(Opcode::CmpRR, {Operand::Reg(PhysReg::W8), Operand::Reg(PhysReg::W9)});
      CondCode cc = IcmpToCondCode(icmp.GetPredicate());

      // 使用 CSET 模式
      block.Append(Opcode::MovImm, {Operand::Reg(PhysReg::W8), Operand::Imm(1)});
      block.Append(Opcode::MovImm, {Operand::Reg(PhysReg::W9), Operand::Imm(0)});

      std::string true_label = ".L_cset_true_" + std::to_string(reinterpret_cast<uintptr_t>(&icmp));
      std::string end_label = ".L_cset_end_" + std::to_string(reinterpret_cast<uintptr_t>(&icmp));

      block.Append(Opcode::BCond, {Operand::Cond(cc), Operand::Label(true_label)});
      block.Append(Opcode::MovReg, {Operand::Reg(PhysReg::W8), Operand::Reg(PhysReg::W9)});
      block.Append(Opcode::B, {Operand::Label(end_label)});
      block.Append(Opcode::Label, {Operand::Label(true_label)});
      block.Append(Opcode::Label, {Operand::Label(end_label)});

      block.Append(Opcode::StoreStack,
                  {Operand::Reg(PhysReg::W8), Operand::FrameIndex(dst_slot)});
      slots.emplace(&inst, dst_slot);
      return;
    }
    // ========== 浮点比较指令 ==========
    case ir::Opcode::FCmp: {
      auto& fcmp = static_cast<const ir::FcmpInst&>(inst);
      int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));

      // 加载浮点操作数到 s0, s1
      EmitValueToReg(fcmp.GetLhs(), PhysReg::S0, slots, block, function);
      EmitValueToReg(fcmp.GetRhs(), PhysReg::S1, slots, block, function);

      // 生成浮点比较指令
      block.Append(Opcode::FCmpRR, {Operand::Reg(PhysReg::S0), Operand::Reg(PhysReg::S1)});

      // 简化实现：存储 1 作为结果
      block.Append(Opcode::MovImm, {Operand::Reg(PhysReg::W8), Operand::Imm(1)});
      block.Append(Opcode::StoreStack,
                   {Operand::Reg(PhysReg::W8), Operand::FrameIndex(dst_slot)});
      slots.emplace(&inst, dst_slot);
      return;
    }
    // ========== 跳转指令（使用标签操作数）==========
    case ir::Opcode::Br: {
      DEBUG_MSG("Processing Br");
      auto& br = static_cast<const ir::BranchInst&>(inst);

      auto* cond = br.GetCondition();
      DEBUG_MSG("Condition value ptr: " << cond);
      DEBUG_MSG("Condition name: " << cond->GetName());
      auto it = slots.find(cond);
      if (it == slots.end()) {
          DEBUG_MSG("Condition not found in slots!");
          // 输出所有 slots 的键名
          for (auto& p : slots) {
              DEBUG_MSG("  Slot key: " << p.first->GetName());
          }
      }

      if (br.IsConditional()) {
        // 条件跳转: br i1 %cond, label %then, label %else
        // 加载条件值到 w8
        EmitValueToReg(br.GetCondition(), PhysReg::W8, slots, block, function);

        // 比较条件值是否为 0
        block.Append(Opcode::CmpRI, {Operand::Reg(PhysReg::W8), Operand::Imm(0)});

        // 获取目标基本块的标签名
        const ir::BasicBlock* irTrueTarget = br.GetTrueTarget();
        const ir::BasicBlock* irFalseTarget = br.GetFalseTarget();

        std::string trueLabel = GetBlockLabel(irTrueTarget);
        std::string falseLabel = GetBlockLabel(irFalseTarget);

        // 生成 B.NE true_label
        block.Append(Opcode::BCond, {Operand::Cond(CondCode::NE), Operand::Label(trueLabel)});
        // 生成 B false_label
        block.Append(Opcode::B, {Operand::Label(falseLabel)});
        DEBUG_MSG("Generating conditional branch: cond=" << br.GetCondition()->GetName()
         << ", true=" << trueLabel << ", false=" << falseLabel);
      } else {
        // 无条件跳转: br label %target
        const ir::BasicBlock* irTarget = br.GetTarget();
        std::string targetLabel = GetBlockLabel(irTarget);
        DEBUG_MSG("b: targetLabel is " << GetBlockLabel(irTarget));

        // 生成 B target_label
        block.Append(Opcode::B, {Operand::Label(targetLabel)});
      }
      return;
    }
    // ========== 函数调用 ==========
    case ir::Opcode::Call: {
      auto& call = static_cast<const ir::CallInst&>(inst);
      const ir::Function* callee = call.GetCallee();
      const std::string& calleeName = callee->GetName();

      // 分配结果栈槽（如果有返回值）
      int dst_slot = -1;
      if (!inst.GetType()->IsVoid()) {
        dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));
      }

      // 按照 ARM64 调用约定传递参数
      const auto& args = call.GetArgs();
      size_t intArgCount = 0;
      size_t fpArgCount = 0;

      for (size_t i = 0; i < args.size(); ++i) {
        const auto* arg = args[i];
        const ir::Type* argType = arg->GetType().get();

        if (argType->IsFloat()) {
          // 浮点参数
          PhysReg reg = static_cast<PhysReg>(static_cast<int>(PhysReg::S0) + fpArgCount);
          EmitValueToReg(arg, reg, slots, block, function);
          fpArgCount++;
        } else {
          // 整数参数
          PhysReg reg = static_cast<PhysReg>(static_cast<int>(PhysReg::W0) + intArgCount);
          EmitValueToReg(arg, reg, slots, block, function);
          intArgCount++;
        }
      }

      // 生成调用指令
      block.Append(Opcode::Call, {Operand::Imm(0)});  // 实际需要传递函数名

      // 保存返回值
      if (dst_slot != -1) {
        if (inst.GetType()->IsFloat()) {
          block.Append(Opcode::StoreStack,
                       {Operand::Reg(PhysReg::S0), Operand::FrameIndex(dst_slot)});
        } else {
          block.Append(Opcode::StoreStack,
                       {Operand::Reg(PhysReg::W0), Operand::FrameIndex(dst_slot)});
        }
        slots.emplace(&inst, dst_slot);
      }
      return;
    }
    // ========== 类型转换指令 ==========
    case ir::Opcode::ZExt: {
      auto& zext = static_cast<const ir::ZExtInst&>(inst);
      int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));

      // 加载源值到 w8
      EmitValueToReg(zext.GetValue(), PhysReg::W8, slots, block, function);

      // 零扩展：i1 -> i32，直接存储即可（因为 i1 已经是 0 或 1）
      block.Append(Opcode::StoreStack,
                   {Operand::Reg(PhysReg::W8), Operand::FrameIndex(dst_slot)});
      slots.emplace(&inst, dst_slot);
      return;
    }
    case ir::Opcode::SIToFP: {
      auto& sitofp = static_cast<const ir::SIToFPInst&>(inst);
      int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));

      // 加载整数到 w8
      EmitValueToReg(sitofp.GetValue(), PhysReg::W8, slots, block, function);

      // 整数转浮点：SCVTF s0, w8
      block.Append(Opcode::SIToFP, {Operand::Reg(PhysReg::S0), Operand::Reg(PhysReg::W8)});
      block.Append(Opcode::StoreStack,
                   {Operand::Reg(PhysReg::S0), Operand::FrameIndex(dst_slot)});
      slots.emplace(&inst, dst_slot);
      return;
    }
    case ir::Opcode::FPToSI: {
      auto& fptosi = static_cast<const ir::FPToSIInst&>(inst);
      int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));

      // 加载浮点数到 s0
      EmitValueToReg(fptosi.GetValue(), PhysReg::S0, slots, block, function);

      // 浮点转整数：FCVTZS w8, s0
      block.Append(Opcode::FPToSI, {Operand::Reg(PhysReg::W8), Operand::Reg(PhysReg::S0)});
      block.Append(Opcode::StoreStack,
                   {Operand::Reg(PhysReg::W8), Operand::FrameIndex(dst_slot)});
      slots.emplace(&inst, dst_slot);
      return;
    }
    // ========== GEP 指令（计算数组元素地址）==========
    case ir::Opcode::GEP: {
      auto& gep = static_cast<const ir::GEPInst&>(inst);

      // GEP 返回指针类型，在 ARM64 上指针是 8 字节
      int dst_slot = function.CreateFrameIndex(8);

      // 获取基地址（数组的起始地址）
      ir::Value* base = gep.GetBase();
      const auto& indices = gep.GetIndices();

      // 加载基地址到 x8（使用 64 位寄存器存储地址）
      EmitValueToReg(base, PhysReg::X8, slots, block, function);

      if (indices.empty()) {
        // 没有索引，直接返回基地址
        block.Append(Opcode::StoreStack,
                    {Operand::Reg(PhysReg::X8), Operand::FrameIndex(dst_slot)});
        slots.emplace(&inst, dst_slot);
        return;
      }

      // 获取数组类型信息，计算每个维度的步长
      const ir::Type* baseType = base->GetType().get();

      // 如果基地址是指针类型，需要解引用获取元素类型
      if (baseType->IsPtrInt32() || baseType->IsPtrFloat() || baseType->IsPtrInt1()) {
        // 对于指针类型，第一个索引是偏移量（以元素为单位）
        // 例如：int* p; p[1] 的 GEP 中 indices[0] = 1
        if (indices.size() >= 1) {
          // 加载索引到 x9
          EmitValueToReg(indices[0], PhysReg::X9, slots, block, function);

          // 乘以元素大小（int/float 是 4 字节）
          block.Append(Opcode::MovImm, {Operand::Reg(PhysReg::X10), Operand::Imm(4)});
          block.Append(Opcode::MulRR, {Operand::Reg(PhysReg::X9),
                                      Operand::Reg(PhysReg::X9),
                                      Operand::Reg(PhysReg::X10)});

          // 地址 = base + index * 4
          block.Append(Opcode::AddRR, {Operand::Reg(PhysReg::X8),
                                      Operand::Reg(PhysReg::X8),
                                      Operand::Reg(PhysReg::X9)});
        }

        // 存储计算出的地址
        block.Append(Opcode::StoreStack,
                    {Operand::Reg(PhysReg::X8), Operand::FrameIndex(dst_slot)});
        slots.emplace(&inst, dst_slot);
        return;
      }

      // 如果基地址是数组类型，需要处理多维数组
      if (baseType->IsArray()) {
        const ir::ArrayType* arrayType = static_cast<const ir::ArrayType*>(baseType);
        const std::vector<int>& dims = arrayType->GetDimensions();

        // 计算每个维度的步长
        std::vector<int> strides(dims.size());
        int stride = 4;  // 元素大小（int/float 是 4 字节）
        for (int i = dims.size() - 1; i >= 0; --i) {
          strides[i] = stride;
          stride *= dims[i];
        }

        // 计算总偏移量
        // 地址 = base + index0 * stride0 + index1 * stride1 + ...
        size_t numIndices = indices.size();

        // 限制索引数量（不能超过维度数）
        if (numIndices > dims.size()) {
          numIndices = dims.size();
        }

        // 加载当前地址到 x9 作为偏移量累加器
        block.Append(Opcode::MovImm, {Operand::Reg(PhysReg::X9), Operand::Imm(0)});

        for (size_t i = 0; i < numIndices; ++i) {
          // 加载当前索引到 x10
          EmitValueToReg(indices[i], PhysReg::X10, slots, block, function);

          // 乘以步长
          block.Append(Opcode::MovImm, {Operand::Reg(PhysReg::X11), Operand::Imm(strides[i])});
          block.Append(Opcode::MulRR, {Operand::Reg(PhysReg::X10),
                                      Operand::Reg(PhysReg::X10),
                                      Operand::Reg(PhysReg::X11)});

          // 累加到偏移量
          block.Append(Opcode::AddRR, {Operand::Reg(PhysReg::X9),
                                      Operand::Reg(PhysReg::X9),
                                      Operand::Reg(PhysReg::X10)});
        }

        // 最终地址 = base + offset
        block.Append(Opcode::AddRR, {Operand::Reg(PhysReg::X8),
                                    Operand::Reg(PhysReg::X8),
                                    Operand::Reg(PhysReg::X9)});

        // 存储计算出的地址
        block.Append(Opcode::StoreStack,
                    {Operand::Reg(PhysReg::X8), Operand::FrameIndex(dst_slot)});
        slots.emplace(&inst, dst_slot);
        return;
      }

      // 其他情况：简单处理
      // 只处理第一个索引
      if (indices.size() >= 1) {
        EmitValueToReg(indices[0], PhysReg::X9, slots, block, function);

        // 乘以元素大小（默认 4 字节）
        block.Append(Opcode::MovImm, {Operand::Reg(PhysReg::X10), Operand::Imm(4)});
        block.Append(Opcode::MulRR, {Operand::Reg(PhysReg::X9),
                                    Operand::Reg(PhysReg::X9),
                                    Operand::Reg(PhysReg::X10)});

        block.Append(Opcode::AddRR, {Operand::Reg(PhysReg::X8),
                                    Operand::Reg(PhysReg::X8),
                                    Operand::Reg(PhysReg::X9)});
      }

      // 存储计算出的地址
      block.Append(Opcode::StoreStack,
                  {Operand::Reg(PhysReg::X8), Operand::FrameIndex(dst_slot)});
      slots.emplace(&inst, dst_slot);
      return;
    }
    // 处理 Trunc 指令
    case ir::Opcode::Trunc: {
        auto& inst_ref = static_cast<const ir::Instruction&>(inst);
        int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));

        // 假设 Trunc 指令有 GetValue() 方法
        // 如果没有，需要通过操作数列表获取
        const ir::Value* src_val = nullptr;
        if (inst.GetNumOperands() > 0) {
            src_val = inst.GetOperand(0);
        }
        if (src_val) {
            EmitValueToReg(src_val, PhysReg::W8, slots, block, function);
        }

        block.Append(Opcode::StoreStack,
                    {Operand::Reg(PhysReg::W8), Operand::FrameIndex(dst_slot)});
        slots.emplace(&inst, dst_slot);
        return;
    }
    // 处理 Mod 指令
    case ir::Opcode::Mod: {
        int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));

        // 通过操作数获取左右值
        const ir::Value* lhs = nullptr;
        const ir::Value* rhs = nullptr;
        if (inst.GetNumOperands() >= 2) {
            lhs = inst.GetOperand(0);
            rhs = inst.GetOperand(1);
        }

        if (lhs && rhs) {
            EmitValueToReg(lhs, PhysReg::W8, slots, block, function);
            EmitValueToReg(rhs, PhysReg::W9, slots, block, function);

            // a % b = a - (a / b) * b
            block.Append(Opcode::SDivRR, {Operand::Reg(PhysReg::W10),
                                          Operand::Reg(PhysReg::W8),
                                          Operand::Reg(PhysReg::W9)});
            block.Append(Opcode::MulRR, {Operand::Reg(PhysReg::W10),
                                        Operand::Reg(PhysReg::W10),
                                        Operand::Reg(PhysReg::W9)});
            block.Append(Opcode::SubRR, {Operand::Reg(PhysReg::W8),
                                        Operand::Reg(PhysReg::W8),
                                        Operand::Reg(PhysReg::W10)});
        }

        block.Append(Opcode::StoreStack,
                    {Operand::Reg(PhysReg::W8), Operand::FrameIndex(dst_slot)});
        slots.emplace(&inst, dst_slot);
        return;
    }
    // 处理 And 指令
    case ir::Opcode::And: {
        int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));

        const ir::Value* lhs = nullptr;
        const ir::Value* rhs = nullptr;
        if (inst.GetNumOperands() >= 2) {
            lhs = inst.GetOperand(0);
            rhs = inst.GetOperand(1);
        }

        if (lhs && rhs) {
            EmitValueToReg(lhs, PhysReg::W8, slots, block, function);
            EmitValueToReg(rhs, PhysReg::W9, slots, block, function);
            block.Append(Opcode::AndRR, {Operand::Reg(PhysReg::W8),
                                        Operand::Reg(PhysReg::W8),
                                        Operand::Reg(PhysReg::W9)});
        }

        block.Append(Opcode::StoreStack,
                    {Operand::Reg(PhysReg::W8), Operand::FrameIndex(dst_slot)});
        slots.emplace(&inst, dst_slot);
        return;
    }
    // 处理 Or 指令
    case ir::Opcode::Or: {
        int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));

        const ir::Value* lhs = nullptr;
        const ir::Value* rhs = nullptr;
        if (inst.GetNumOperands() >= 2) {
            lhs = inst.GetOperand(0);
            rhs = inst.GetOperand(1);
        }

        if (lhs && rhs) {
            EmitValueToReg(lhs, PhysReg::W8, slots, block, function);
            EmitValueToReg(rhs, PhysReg::W9, slots, block, function);
            block.Append(Opcode::OrRR, {Operand::Reg(PhysReg::W8),
                                        Operand::Reg(PhysReg::W8),
                                        Operand::Reg(PhysReg::W9)});
        }

        block.Append(Opcode::StoreStack,
                    {Operand::Reg(PhysReg::W8), Operand::FrameIndex(dst_slot)});
        slots.emplace(&inst, dst_slot);
        return;
    }
    // 处理 Not 指令
    case ir::Opcode::Not: {
        int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));

        const ir::Value* src_val = nullptr;
        if (inst.GetNumOperands() > 0) {
            src_val = inst.GetOperand(0);
        }

        if (src_val) {
            EmitValueToReg(src_val, PhysReg::W8, slots, block, function);
            // NOT  = XOR with -1
            block.Append(Opcode::MovImm, {Operand::Reg(PhysReg::W9), Operand::Imm(-1)});
            block.Append(Opcode::EorRR, {Operand::Reg(PhysReg::W8),
                                        Operand::Reg(PhysReg::W8),
                                        Operand::Reg(PhysReg::W9)});
        }

        block.Append(Opcode::StoreStack,
                    {Operand::Reg(PhysReg::W8), Operand::FrameIndex(dst_slot)});
        slots.emplace(&inst, dst_slot);
        return;
    }
    // 处理 CondBr 指令
    case ir::Opcode::CondBr: {
        // CondBr 通常有两个目标基本块和一个条件
        const ir::Value* condition = nullptr;
        const ir::BasicBlock* trueTarget = nullptr;
        const ir::BasicBlock* falseTarget = nullptr;

        if (inst.GetNumOperands() >= 3) {
            condition = inst.GetOperand(0);
            // 操作数1和2应该是 BasicBlock 引用
            // 具体获取方式取决于你的 IR 实现
            if (auto* bb = dynamic_cast<const ir::BasicBlock*>(inst.GetOperand(1))) {
                trueTarget = bb;
            }
            if (auto* bb = dynamic_cast<const ir::BasicBlock*>(inst.GetOperand(2))) {
                falseTarget = bb;
            }
        }

        if (condition && trueTarget && falseTarget) {
            EmitValueToReg(condition, PhysReg::W8, slots, block, function);
            block.Append(Opcode::CmpRI, {Operand::Reg(PhysReg::W8), Operand::Imm(0)});

            std::string trueLabel = GetBlockLabel(trueTarget);
            std::string falseLabel = GetBlockLabel(falseTarget);

            block.Append(Opcode::BCond, {Operand::Cond(CondCode::NE), Operand::Label(trueLabel)});
            block.Append(Opcode::B, {Operand::Label(falseLabel)});
        }
        return;
    }
    // 处理 FPExt（浮点扩展）
    case ir::Opcode::FPExt: {
        int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));

        const ir::Value* src_val = nullptr;
        if (inst.GetNumOperands() > 0) {
            src_val = inst.GetOperand(0);
        }

        if (src_val) {
            EmitValueToReg(src_val, PhysReg::S0, slots, block, function);
        }

        block.Append(Opcode::StoreStack,
                    {Operand::Reg(PhysReg::S0), Operand::FrameIndex(dst_slot)});
        slots.emplace(&inst, dst_slot);
        return;
    }
    // 处理 FPTrunc（浮点截断）
    case ir::Opcode::FPTrunc: {
        int dst_slot = function.CreateFrameIndex(GetTypeSize(inst.GetType().get()));

        const ir::Value* src_val = nullptr;
        if (inst.GetNumOperands() > 0) {
            src_val = inst.GetOperand(0);
        }

        if (src_val) {
            EmitValueToReg(src_val, PhysReg::S0, slots, block, function);
        }

        block.Append(Opcode::StoreStack,
                    {Operand::Reg(PhysReg::S0), Operand::FrameIndex(dst_slot)});
        slots.emplace(&inst, dst_slot);
        return;
    }
    //throw std::runtime_error(FormatError("mir", "暂不支持该 IR 指令"));
    throw std::runtime_error(FormatError("mir", "暂不支持该 IR 指令，opcode: "
                        + std::to_string(static_cast<int>(inst.GetOpcode()))));
  }
}

} // namespace

// 辅助函数，将单个 IR 函数转换为 MachineFunction
std::unique_ptr<MachineFunction> LowerFunction(const ir::Function& func) {
  auto machine_func = std::make_unique<MachineFunction>(func.GetName());
  ValueSlotMap slots;

  // 存储参数信息，稍后处理
  struct ParamInfo {
      const ir::Value* arg;
      int slot;
      bool isFloat;
  };
  std::vector<ParamInfo> paramInfos;

  // 为函数参数分配栈槽
  for (const auto& arg : func.GetArguments()) {
    int slot = machine_func->CreateFrameIndex(GetTypeSize(arg->GetType().get()));
    slots.emplace(arg.get(), slot);
    bool isFloat = arg->GetType()->IsFloat();
    paramInfos.push_back({arg.get(), slot, isFloat});
  }

  // IR 基本块到 MIR 基本块的映射
  std::unordered_map<const ir::BasicBlock*, MachineBasicBlock*> blockMap;

  // 第一遍：为每个 IR 基本块创建 MIR 基本块
  std::string func_name = func.GetName();
  for (const auto& bb : func.GetBlocks()) {
    // 格式: .L函数名_基本块名
    auto mirBB = std::make_unique<MachineBasicBlock>(".L" + func_name + "_" + bb->GetName());
    blockMap[bb.get()] = mirBB.get();
    machine_func->AddBasicBlock(std::move(mirBB));
  }

  // 在入口基本块的开头添加参数加载指令
  if (!func.GetBlocks().empty()) {
    MachineBasicBlock* entryBB = blockMap[func.GetEntry()];
    if (entryBB) {
        size_t intArgIdx = 0;
        size_t fpArgIdx = 0;

        for (const auto& param : paramInfos) {
            if (param.isFloat) {
                if (fpArgIdx < 8) {
                    PhysReg reg = static_cast<PhysReg>(static_cast<int>(PhysReg::S0) + fpArgIdx);
                    entryBB->Append(Opcode::StoreStack,
                                   {Operand::Reg(reg), Operand::FrameIndex(param.slot)});
                }
                fpArgIdx++;
            } else {
                if (intArgIdx < 8) {
                    PhysReg reg = static_cast<PhysReg>(static_cast<int>(PhysReg::W0) + intArgIdx);
                    entryBB->Append(Opcode::StoreStack,
                                   {Operand::Reg(reg), Operand::FrameIndex(param.slot)});
                }
                intArgIdx++;
            }
        }
    }
  }

  // 第二遍：遍历每个基本块，转换指令
  for (const auto& bb : func.GetBlocks()) {
    MachineBasicBlock* mirBB = blockMap[bb.get()];
    if (!mirBB) {
      throw std::runtime_error(FormatError("mir", "找不到基本块对应的 MIR 基本块"));
    }

    for (const auto& inst : bb->GetInstructions()) {
      LowerInstruction(*inst, *machine_func, slots, *mirBB, blockMap);
    }
  }

  return machine_func;
}

std::unique_ptr<MachineModule> LowerToMIR(const ir::Module& module) {
  DefaultContext();

  auto machine_module = std::make_unique<MachineModule>();

  // 处理全局变量
  for (const auto& global : module.GetGlobals()) {
      // 为全局变量在数据段分配空间
      // 这里需要扩展 MachineModule 来支持全局变量
      DEBUG_MSG("Global variable: " << global->GetName());
  }

  // 遍历模块中的所有函数
  for (const auto& func : module.GetFunctions()) {
    try {
      auto machine_func = LowerFunction(*func);
      machine_module->AddFunction(std::move(machine_func));
    } catch (const std::runtime_error& e) {
      // 记录错误但继续处理其他函数
      throw std::runtime_error(FormatError("mir", "转换函数失败: " + func->GetName() + " - " + e.what()));
    }
  }

  if (machine_module->GetFunctions().empty()) {
    throw std::runtime_error(FormatError("mir", "模块中没有成功转换的函数"));
  }

  return machine_module;
}

}  // namespace mir