jing 2 weeks ago
commit d4f4b77b1e

@ -0,0 +1,119 @@
#pragma once
#include <initializer_list>
#include <iosfwd>
#include <memory>
#include <string>
#include <vector>
namespace ir {
class Module;
}
namespace mir {
class MIRContext {
public:
MIRContext() = default;
};
MIRContext& DefaultContext();
enum class PhysReg { W0, W8, W9, X29, X30, SP };
const char* PhysRegName(PhysReg reg);
enum class Opcode {
Prologue,
Epilogue,
MovImm,
LoadStack,
StoreStack,
AddRR,
Ret,
};
class Operand {
public:
enum class Kind { Reg, Imm, FrameIndex };
static Operand Reg(PhysReg reg);
static Operand Imm(int value);
static Operand FrameIndex(int index);
Kind kind() const { return kind_; }
PhysReg reg() const { return reg_; }
int imm() const { return imm_; }
int frame_index() const { return imm_; }
private:
Operand(Kind kind, PhysReg reg, int imm);
Kind kind_;
PhysReg reg_;
int imm_;
};
class MachineInstr {
public:
MachineInstr(Opcode opcode, std::vector<Operand> operands = {});
Opcode opcode() const { return opcode_; }
const std::vector<Operand>& operands() const { return operands_; }
private:
Opcode opcode_;
std::vector<Operand> operands_;
};
struct FrameSlot {
int index = 0;
int size = 4;
int offset = 0;
};
class MachineBasicBlock {
public:
explicit MachineBasicBlock(std::string name);
const std::string& name() const { return name_; }
std::vector<MachineInstr>& instructions() { return instructions_; }
const std::vector<MachineInstr>& instructions() const { return instructions_; }
MachineInstr& Append(Opcode opcode,
std::initializer_list<Operand> operands = {});
private:
std::string name_;
std::vector<MachineInstr> instructions_;
};
class MachineFunction {
public:
explicit MachineFunction(std::string name);
const std::string& name() const { return name_; }
MachineBasicBlock& entry() { return entry_; }
const MachineBasicBlock& entry() const { return entry_; }
int CreateFrameIndex(int size = 4);
FrameSlot& frame_slot(int index);
const FrameSlot& frame_slot(int index) const;
const std::vector<FrameSlot>& frame_slots() const { return frame_slots_; }
int frame_size() const { return frame_size_; }
void set_frame_size(int size) { frame_size_ = size; }
private:
std::string name_;
MachineBasicBlock entry_;
std::vector<FrameSlot> frame_slots_;
int frame_size_ = 0;
};
std::unique_ptr<MachineFunction> LowerToMIR(const ir::Module& module);
void RunRegAlloc(MachineFunction& function);
void RunFrameLowering(MachineFunction& function);
void PrintAsm(const MachineFunction& function, std::ostream& os);
} // namespace mir

@ -0,0 +1,67 @@
#!/usr/bin/env bash
set -euo pipefail
if [[ $# -lt 1 || $# -gt 3 ]]; then
echo "用法: $0 <input.sy> [output_dir] [--run]" >&2
exit 1
fi
input=$1
out_dir="out/asm"
run_exec=false
shift
while [[ $# -gt 0 ]]; do
case "$1" in
--run)
run_exec=true
;;
*)
out_dir="$1"
;;
esac
shift
done
if [[ ! -f "$input" ]]; then
echo "输入文件不存在: $input" >&2
exit 1
fi
compiler="./build/bin/compiler"
if [[ ! -x "$compiler" ]]; then
echo "未找到编译器: $compiler ,请先构建。" >&2
exit 1
fi
if ! command -v aarch64-linux-gnu-gcc >/dev/null 2>&1; then
echo "未找到 aarch64-linux-gnu-gcc无法汇编/链接。" >&2
exit 1
fi
mkdir -p "$out_dir"
base=$(basename "$input")
stem=${base%.sy}
asm_file="$out_dir/$stem.s"
exe="$out_dir/$stem.exe"
"$compiler" --emit-asm "$input" > "$asm_file"
echo "汇编已生成: $asm_file"
aarch64-linux-gnu-gcc "$asm_file" -o "$exe"
echo "可执行文件已生成: $exe"
if [[ "$run_exec" == true ]]; then
if ! command -v qemu-aarch64 >/dev/null 2>&1; then
echo "未找到 qemu-aarch64无法运行生成的可执行文件。" >&2
exit 1
fi
echo "运行 $exe ..."
set +e
qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe"
status=$?
set -e
echo "退出码: $status"
fi

@ -7,6 +7,7 @@
#include "frontend/AstBuilder.h"
#include "ir/IR.h"
#include "irgen/IRGen.h"
#include "mir/MIR.h"
#include "sem/Sema.h"
#include "utils/CLI.h"
#include "utils/Log.h"
@ -21,8 +22,10 @@ int main(int argc, char** argv) {
}
auto antlr = ParseFileWithAntlr(opts.input);
auto ast = BuildAst(antlr.tree);
bool need_blank_line = false;
if (opts.emit_ast) {
ast::PrintAST(*ast); // 调试 AST
need_blank_line = true;
}
if (!opts.ast_dot_output.empty()) {
std::ofstream dot_out(opts.ast_dot_output);
@ -38,7 +41,21 @@ int main(int argc, char** argv) {
if (opts.emit_ir) {
ir::IRPrinter printer;
if (need_blank_line) {
std::cout << "\n";
}
printer.Print(*module);
need_blank_line = true;
}
if (opts.emit_asm) {
auto machine_func = mir::LowerToMIR(*module);
mir::RunRegAlloc(*machine_func);
mir::RunFrameLowering(*machine_func);
if (need_blank_line) {
std::cout << "\n";
}
mir::PrintAsm(*machine_func, std::cout);
}
} catch (const std::exception& ex) {
std::cerr << "error: " << ex.what() << "\n";

@ -1,4 +1,75 @@
// 汇编打印:
// - 将最终机器指令MIR打印为 ARMv8-A / AArch64 汇编(.s
// - 负责标签、伪指令、段信息等输出(按需要实现)
#include "mir/MIR.h"
#include <ostream>
#include <stdexcept>
namespace mir {
namespace {
const FrameSlot& GetFrameSlot(const MachineFunction& function,
const Operand& operand) {
if (operand.kind() != Operand::Kind::FrameIndex) {
throw std::runtime_error("期望 FrameIndex 操作数");
}
return function.frame_slot(operand.frame_index());
}
void PrintStackAccess(std::ostream& os, const char* mnemonic, PhysReg reg,
int offset) {
os << " " << mnemonic << " " << PhysRegName(reg) << ", [x29, #" << offset
<< "]\n";
}
} // namespace
void PrintAsm(const MachineFunction& function, std::ostream& os) {
os << ".text\n";
os << ".global " << function.name() << "\n";
os << ".type " << function.name() << ", %function\n";
os << function.name() << ":\n";
for (const auto& inst : function.entry().instructions()) {
const auto& ops = inst.operands();
switch (inst.opcode()) {
case Opcode::Prologue:
os << " stp x29, x30, [sp, #-16]!\n";
os << " mov x29, sp\n";
if (function.frame_size() > 0) {
os << " sub sp, sp, #" << function.frame_size() << "\n";
}
break;
case Opcode::Epilogue:
if (function.frame_size() > 0) {
os << " add sp, sp, #" << function.frame_size() << "\n";
}
os << " ldp x29, x30, [sp], #16\n";
break;
case Opcode::MovImm:
os << " mov " << PhysRegName(ops.at(0).reg()) << ", #"
<< ops.at(1).imm() << "\n";
break;
case Opcode::LoadStack: {
const auto& slot = GetFrameSlot(function, ops.at(1));
PrintStackAccess(os, "ldur", ops.at(0).reg(), slot.offset);
break;
}
case Opcode::StoreStack: {
const auto& slot = GetFrameSlot(function, ops.at(1));
PrintStackAccess(os, "stur", ops.at(0).reg(), slot.offset);
break;
}
case Opcode::AddRR:
os << " add " << PhysRegName(ops.at(0).reg()) << ", "
<< PhysRegName(ops.at(1).reg()) << ", "
<< PhysRegName(ops.at(2).reg()) << "\n";
break;
case Opcode::Ret:
os << " ret\n";
break;
}
}
os << ".size " << function.name() << ", .-" << function.name() << "\n";
}
} // namespace mir

@ -1,4 +1,44 @@
// 栈帧构建与序言尾声插入:
// - 计算栈大小与对齐需求,分配栈槽
// - 插入 prologue/epilogue保存/恢复 callee-saved 等
#include "mir/MIR.h"
#include <stdexcept>
#include <vector>
namespace mir {
namespace {
int AlignTo(int value, int align) {
return ((value + align - 1) / align) * align;
}
} // namespace
void RunFrameLowering(MachineFunction& function) {
int cursor = 0;
for (const auto& slot : function.frame_slots()) {
cursor += slot.size;
if (-cursor < -256) {
throw std::runtime_error(
"Lab3 MVP 后端暂不支持超过 64 个 i32 栈槽的函数");
}
}
cursor = 0;
for (const auto& slot : function.frame_slots()) {
cursor += slot.size;
function.frame_slot(slot.index).offset = -cursor;
}
function.set_frame_size(AlignTo(cursor, 16));
auto& insts = function.entry().instructions();
std::vector<MachineInstr> lowered;
lowered.emplace_back(Opcode::Prologue);
for (const auto& inst : insts) {
if (inst.opcode() == Opcode::Ret) {
lowered.emplace_back(Opcode::Epilogue);
}
lowered.push_back(inst);
}
insts = std::move(lowered);
}
} // namespace mir

@ -1,4 +1,120 @@
// IR -> MIRAArch64 指令选择):
// - 将平台无关 IR 翻译为 AArch64 的机器指令序列
// - 初始阶段使用虚拟寄存器,生成 MIRFunction/MIRBasicBlock/MIRInstr
#include "mir/MIR.h"
#include <stdexcept>
#include <unordered_map>
#include "ir/IR.h"
namespace mir {
namespace {
using ValueSlotMap = std::unordered_map<const ir::Value*, int>;
void EmitValueToReg(const ir::Value* value, PhysReg target,
const ValueSlotMap& slots, MachineBasicBlock& block) {
if (auto* constant = dynamic_cast<const ir::ConstantInt*>(value)) {
block.Append(Opcode::MovImm,
{Operand::Reg(target), Operand::Imm(constant->value())});
return;
}
auto it = slots.find(value);
if (it == slots.end()) {
throw std::runtime_error("Lab3 MVP 后端找不到值对应的栈槽: " +
value->name());
}
block.Append(Opcode::LoadStack,
{Operand::Reg(target), Operand::FrameIndex(it->second)});
}
void LowerInstruction(const ir::Instruction& inst, MachineFunction& function,
ValueSlotMap& slots) {
auto& block = function.entry();
switch (inst.opcode()) {
case ir::Opcode::Alloca: {
slots.emplace(&inst, function.CreateFrameIndex());
return;
}
case ir::Opcode::Store: {
auto& store = static_cast<const ir::StoreInst&>(inst);
auto dst = slots.find(store.ptr());
if (dst == slots.end()) {
throw std::runtime_error("Lab3 MVP 后端要求 store 目标必须来自 alloca");
}
EmitValueToReg(store.value(), PhysReg::W8, slots, block);
block.Append(Opcode::StoreStack,
{Operand::Reg(PhysReg::W8), Operand::FrameIndex(dst->second)});
return;
}
case ir::Opcode::Load: {
auto& load = static_cast<const ir::LoadInst&>(inst);
auto src = slots.find(load.ptr());
if (src == slots.end()) {
throw std::runtime_error("Lab3 MVP 后端要求 load 源必须来自 alloca");
}
int dst_slot = function.CreateFrameIndex();
block.Append(Opcode::LoadStack,
{Operand::Reg(PhysReg::W8), Operand::FrameIndex(src->second)});
block.Append(Opcode::StoreStack,
{Operand::Reg(PhysReg::W8), Operand::FrameIndex(dst_slot)});
slots.emplace(&inst, dst_slot);
return;
}
case ir::Opcode::Add: {
auto& bin = static_cast<const ir::BinaryInst&>(inst);
int dst_slot = function.CreateFrameIndex();
EmitValueToReg(bin.lhs(), PhysReg::W8, slots, block);
EmitValueToReg(bin.rhs(), PhysReg::W9, slots, block);
block.Append(Opcode::AddRR, {Operand::Reg(PhysReg::W8),
Operand::Reg(PhysReg::W8),
Operand::Reg(PhysReg::W9)});
block.Append(Opcode::StoreStack,
{Operand::Reg(PhysReg::W8), Operand::FrameIndex(dst_slot)});
slots.emplace(&inst, dst_slot);
return;
}
case ir::Opcode::Ret: {
auto& ret = static_cast<const ir::ReturnInst&>(inst);
EmitValueToReg(ret.value(), PhysReg::W0, slots, block);
block.Append(Opcode::Ret);
return;
}
case ir::Opcode::Sub:
case ir::Opcode::Mul:
throw std::runtime_error("Lab3 MVP 后端暂不支持 add 以外的二元运算");
}
throw std::runtime_error("Lab3 MVP 后端遇到未知 IR 指令");
}
} // namespace
std::unique_ptr<MachineFunction> LowerToMIR(const ir::Module& module) {
DefaultContext();
if (module.functions().size() != 1) {
throw std::runtime_error("Lab3 MVP 后端只支持单函数 main");
}
const auto& func = *module.functions().front();
if (func.name() != "main") {
throw std::runtime_error("Lab3 MVP 后端只支持 main 函数");
}
auto machine_func = std::make_unique<MachineFunction>(func.name());
ValueSlotMap slots;
const auto* entry = func.entry();
if (!entry) {
throw std::runtime_error("IR 函数缺少入口基本块");
}
for (const auto& inst : entry->instructions()) {
LowerInstruction(*inst, *machine_func, slots);
}
return machine_func;
}
} // namespace mir

@ -1,4 +1,16 @@
// 机器基本块MIRBasicBlock
// - 维护机器指令列表
// - 记录或可计算机器级 CFG 前驱/后继信息
#include "mir/MIR.h"
#include <utility>
namespace mir {
MachineBasicBlock::MachineBasicBlock(std::string name)
: name_(std::move(name)) {}
MachineInstr& MachineBasicBlock::Append(Opcode opcode,
std::initializer_list<Operand> operands) {
instructions_.emplace_back(opcode, std::vector<Operand>(operands));
return instructions_.back();
}
} // namespace mir

@ -1,4 +1,10 @@
// MIR 上下文:
// - 保存目标约束、指令集信息等(面向 AArch64
// - 为后端 Lowering/RegAlloc/FrameLowering 提供公共信息
#include "mir/MIR.h"
namespace mir {
MIRContext& DefaultContext() {
static MIRContext ctx;
return ctx;
}
} // namespace mir

@ -1,4 +1,31 @@
// 机器函数MIRFunction
// - 包含机器基本块列表与 CFG 信息
// - 维护栈帧信息、虚拟/物理寄存器使用情况等
#include "mir/MIR.h"
#include <stdexcept>
#include <utility>
namespace mir {
MachineFunction::MachineFunction(std::string name)
: name_(std::move(name)), entry_("entry") {}
int MachineFunction::CreateFrameIndex(int size) {
int index = static_cast<int>(frame_slots_.size());
frame_slots_.push_back(FrameSlot{index, size, 0});
return index;
}
FrameSlot& MachineFunction::frame_slot(int index) {
if (index < 0 || index >= static_cast<int>(frame_slots_.size())) {
throw std::runtime_error("非法 FrameIndex");
}
return frame_slots_[index];
}
const FrameSlot& MachineFunction::frame_slot(int index) const {
if (index < 0 || index >= static_cast<int>(frame_slots_.size())) {
throw std::runtime_error("非法 FrameIndex");
}
return frame_slots_[index];
}
} // namespace mir

@ -1,4 +1,23 @@
// 机器指令MIRInstr
// - opcode + operands寄存器/立即数/栈槽/符号/标签等)
// - 支撑指令选择、寄存器分配与汇编打印
#include "mir/MIR.h"
#include <utility>
namespace mir {
Operand::Operand(Kind kind, PhysReg reg, int imm)
: kind_(kind), reg_(reg), imm_(imm) {}
Operand Operand::Reg(PhysReg reg) { return Operand(Kind::Reg, reg, 0); }
Operand Operand::Imm(int value) {
return Operand(Kind::Imm, PhysReg::W0, value);
}
Operand Operand::FrameIndex(int index) {
return Operand(Kind::FrameIndex, PhysReg::W0, index);
}
MachineInstr::MachineInstr(Opcode opcode, std::vector<Operand> operands)
: opcode_(opcode), operands_(std::move(operands)) {}
} // namespace mir

@ -1,4 +1,33 @@
// 寄存器分配:
// - 将虚拟寄存器分配到物理寄存器
// - 处理 spill/reload并为后续栈帧布局提供栈槽需求信息
#include "mir/MIR.h"
#include <stdexcept>
namespace mir {
namespace {
bool IsAllowedReg(PhysReg reg) {
switch (reg) {
case PhysReg::W0:
case PhysReg::W8:
case PhysReg::W9:
case PhysReg::X29:
case PhysReg::X30:
case PhysReg::SP:
return true;
}
return false;
}
} // namespace
void RunRegAlloc(MachineFunction& function) {
for (const auto& inst : function.entry().instructions()) {
for (const auto& operand : inst.operands()) {
if (operand.kind() == Operand::Kind::Reg && !IsAllowedReg(operand.reg())) {
throw std::runtime_error("Lab3 MVP 后端发现未预着色的寄存器");
}
}
}
}
} // namespace mir

@ -1,4 +1,25 @@
// 寄存器表示:
// - 区分虚拟寄存器与物理寄存器,提供编号/属性等
// - 寄存器类RegClassGPR/FPR 等分类与可分配集合描述(供 RA 使用)
#include "mir/MIR.h"
#include <stdexcept>
namespace mir {
const char* PhysRegName(PhysReg reg) {
switch (reg) {
case PhysReg::W0:
return "w0";
case PhysReg::W8:
return "w8";
case PhysReg::W9:
return "w9";
case PhysReg::X29:
return "x29";
case PhysReg::X30:
return "x30";
case PhysReg::SP:
return "sp";
}
throw std::runtime_error("未知物理寄存器");
}
} // namespace mir

@ -15,7 +15,7 @@ CLIOptions ParseCLI(int argc, char** argv) {
if (argc <= 1) {
throw std::runtime_error(
"用法: compiler [--help] [--emit-ast] [--emit-ir] [--ast-dot <file.dot>] <input.sy>");
"用法: compiler [--help] [--emit-ast] [--emit-ir] [--emit-asm] [--ast-dot <file.dot>] <input.sy>");
}
for (int i = 1; i < argc; ++i) {
@ -47,12 +47,24 @@ CLIOptions ParseCLI(int argc, char** argv) {
if (!explicit_emit) {
opt.emit_ast = false;
opt.emit_ir = false;
opt.emit_asm = false;
explicit_emit = true;
}
opt.emit_ir = true;
continue;
}
if (std::strcmp(arg, "--emit-asm") == 0) {
if (!explicit_emit) {
opt.emit_ast = false;
opt.emit_ir = false;
opt.emit_asm = false;
explicit_emit = true;
}
opt.emit_asm = true;
continue;
}
if (arg[0] == '-') {
throw std::runtime_error(std::string("未知参数: ") + arg +
"(使用 --help 查看用法)");
@ -68,9 +80,10 @@ CLIOptions ParseCLI(int argc, char** argv) {
if (opt.input.empty() && !opt.show_help) {
throw std::runtime_error("缺少输入文件:请提供 <input.sy>(使用 --help 查看用法)");
}
if (!opt.emit_ast && !opt.emit_ir && opt.ast_dot_output.empty()) {
if (!opt.emit_ast && !opt.emit_ir && !opt.emit_asm &&
opt.ast_dot_output.empty()) {
throw std::runtime_error(
"未选择任何输出:请使用 --emit-ast 或 --emit-ir(或使用 --ast-dot 导出图)");
"未选择任何输出:请使用 --emit-ast / --emit-ir / --emit-asm(或使用 --ast-dot 导出图)");
}
return opt;
}

@ -8,6 +8,7 @@ struct CLIOptions {
std::string ast_dot_output;
bool emit_ast = true;
bool emit_ir = true;
bool emit_asm = false;
bool show_help = false;
};

@ -10,17 +10,18 @@ void PrintHelp(std::ostream& os) {
os << "SysY Compiler (课程实验最小可运行示例)\n"
<< "\n"
<< "用法:\n"
<< " compiler [--help] [--emit-ast] [--emit-ir] [--ast-dot <file.dot>] <input.sy>\n"
<< " compiler [--help] [--emit-ast] [--emit-ir] [--emit-asm] [--ast-dot <file.dot>] <input.sy>\n"
<< "\n"
<< "选项:\n"
<< " -h, --help 打印帮助信息并退出\n"
<< " --emit-ast 仅在显式模式下启用 AST 文本输出\n"
<< " --emit-ir 仅在显式模式下启用 IR 输出\n"
<< " --emit-asm 仅在显式模式下启用 AArch64 汇编输出\n"
<< " --ast-dot <path> 导出 AST Graphviz DOT 到指定文件\n"
<< "\n"
<< "说明:\n"
<< " - 默认同时输出 AST 与 IR\n"
<< " - 若使用 --emit-ast/--emit-ir,则仅输出显式选择的阶段\n"
<< " - 若使用 --emit-ast/--emit-ir/--emit-asm,则仅输出显式选择的阶段\n"
<< " - 可使用重定向写入文件:\n"
<< " compiler test/test_case/simple_add.sy > out.ll\n";
<< " compiler --emit-asm test/test_case/simple_add.sy > out.s\n";
}

Loading…
Cancel
Save