已实现基本标量优化,实现部分寄存器优化

zhm
安峻邑 2 weeks ago
parent 0f3cdc3b5f
commit 15a71238e5

@ -0,0 +1,20 @@
// 包装 ANTLR4提供简易的解析入口。
#pragma once
#include <memory>
#include <string>
#include "SysYLexer.h"
#include "SysYParser.h"
#include "antlr4-runtime.h"
struct AntlrResult {
std::unique_ptr<antlr4::ANTLRInputStream> input;
std::unique_ptr<SysYLexer> lexer;
std::unique_ptr<antlr4::CommonTokenStream> tokens;
std::unique_ptr<SysYParser> parser;
antlr4::tree::ParseTree* tree = nullptr; // owned by parser
};
// 解析指定文件,发生错误时抛出 std::runtime_error。
AntlrResult ParseFileWithAntlr(const std::string& path);

@ -0,0 +1,9 @@
#pragma once
#include <iosfwd>
#include "antlr4-runtime.h"
// 以树状缩进形式直接打印 ANTLR parse tree。
void PrintSyntaxTree(antlr4::tree::ParseTree* tree, antlr4::Parser* parser,
std::ostream& os);

@ -0,0 +1,545 @@
// 当前只支撑 i32、i32*、void 以及最小的内存/算术指令,演示用。
//
// 当前已经实现:
// 1. 基础类型系统void / i32 / i32*
// 2. Value 体系Value / ConstantValue / ConstantInt / Function / BasicBlock / User / GlobalValue / Instruction
// 3. 最小指令集Add / Alloca / Load / Store / Ret
// 4. BasicBlock / Function / Module 三层组织结构
// 5. IRBuilder便捷创建常量和最小指令
// 6. def-use 关系的轻量实现:
// - Instruction 保存 operand 列表
// - Value 保存 uses
// - 支持 ReplaceAllUsesWith 的简化实现
//
// 当前尚未实现或只做了最小占位:
// 1. 完整类型系统数组、函数类型、label 类型等
// 2. 更完整的指令系统br / condbr / call / phi / gep 等
// 3. 更成熟的 Use 管理(例如 LLVM 风格的双向链式结构)
// 4. 更完整的 IR verifier 和优化基础设施
//
// 当前需要特别说明的两个简化点:
// 1. BasicBlock 虽然已经纳入 Value 体系,但其类型目前仍用 void 作为占位,
// 后续如果补 label type可以再改成更合理的块标签类型。
// 2. ConstantValue 体系目前只实现了 ConstantInt后续可以继续补 ConstantFloat、
// ConstantArray等更完整的常量种类。
//
// 建议的扩展顺序:
// 1. 先补更多指令和类型
// 2. 再补控制流相关 IR
// 3. 最后再考虑把 Value/User/Use 进一步抽象成更完整的框架
#pragma once
#include <iosfwd>
#include <memory>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
namespace ir {
class Type;
class Value;
class User;
class ConstantValue;
class ConstantInt;
class ConstantFloat;
class GlobalValue;
class Instruction;
class BasicBlock;
class Function;
class Argument;
class GlobalVariable;
// Use 表示一个 Value 的一次使用记录。
// 当前实现设计:
// - value被使用的值
// - user使用该值的 User
// - operand_index该值在 user 操作数列表中的位置
class Use {
public:
Use() = default;
Use(Value* value, User* user, size_t operand_index)
: value_(value), user_(user), operand_index_(operand_index) {}
Value* GetValue() const { return value_; }
User* GetUser() const { return user_; }
size_t GetOperandIndex() const { return operand_index_; }
void SetValue(Value* value) { value_ = value; }
void SetUser(User* user) { user_ = user; }
void SetOperandIndex(size_t operand_index) { operand_index_ = operand_index; }
private:
Value* value_ = nullptr;
User* user_ = nullptr;
size_t operand_index_ = 0;
};
// IR 上下文:集中管理类型、常量等共享资源,便于复用与扩展。
class Context {
public:
Context() = default;
~Context();
// 去重创建 i32 常量。
ConstantInt* GetConstInt(int v);
ConstantFloat* GetConstFloat(double v);
// 去重创建 i1 常量0 或 1
ConstantInt* GetConstBool(int v);
std::string NextTemp();
private:
std::unordered_map<int, std::unique_ptr<ConstantInt>> const_ints_;
std::unordered_map<std::string, std::unique_ptr<ConstantFloat>> const_floats_;
std::unordered_map<int, std::unique_ptr<ConstantInt>> const_bools_;
int temp_index_ = -1;
};
class Type {
public:
enum class Kind { Void, Int1, Int32, Float32, PtrInt32, PtrFloat32 };
explicit Type(Kind k);
// 使用静态共享对象获取类型。
// 同一类型可直接比较返回值是否相等,例如:
// Type::GetInt32Type() == Type::GetInt32Type()
static const std::shared_ptr<Type>& GetVoidType();
static const std::shared_ptr<Type>& GetInt1Type();
static const std::shared_ptr<Type>& GetInt32Type();
static const std::shared_ptr<Type>& GetFloat32Type();
static const std::shared_ptr<Type>& GetPtrInt32Type();
static const std::shared_ptr<Type>& GetPtrFloat32Type();
Kind GetKind() const;
bool IsVoid() const;
bool IsInt1() const;
bool IsInt32() const;
bool IsFloat32() const;
bool IsPtrInt32() const;
bool IsPtrFloat32() const;
private:
Kind kind_;
};
class Value {
public:
Value(std::shared_ptr<Type> ty, std::string name);
virtual ~Value() = default;
const std::shared_ptr<Type>& GetType() const;
const std::string& GetName() const;
void SetName(std::string n);
bool IsVoid() const;
bool IsInt32() const;
bool IsFloat32() const;
bool IsPtrInt32() const;
bool IsPtrFloat32() const;
bool IsConstant() const;
bool IsInstruction() const;
bool IsUser() const;
bool IsFunction() const;
void AddUse(User* user, size_t operand_index);
void RemoveUse(User* user, size_t operand_index);
const std::vector<Use>& GetUses() const;
void ReplaceAllUsesWith(Value* new_value);
protected:
std::shared_ptr<Type> type_;
std::string name_;
std::vector<Use> uses_;
};
// ConstantValue 是常量体系的基类。
// 当前只实现了 ConstantInt后续可继续扩展更多常量种类。
class ConstantValue : public Value {
public:
ConstantValue(std::shared_ptr<Type> ty, std::string name = "");
};
class ConstantInt : public ConstantValue {
public:
ConstantInt(std::shared_ptr<Type> ty, int v);
int GetValue() const { return value_; }
private:
int value_{};
};
class ConstantFloat : public ConstantValue {
public:
ConstantFloat(std::shared_ptr<Type> ty, double v);
double GetValue() const { return value_; }
private:
double value_{};
};
// 后续还需要扩展更多指令类型。
enum class Opcode {
Add,
Sub,
Mul,
Div,
Mod,
SIToFP,
FPToSI,
ZExt,
Eq,
Ne,
Lt,
Le,
Gt,
Ge,
Alloca,
Load,
Store,
GEP,
Call,
Br,
CondBr,
Ret,
Phi
};
// User 是所有“会使用其他 Value 作为输入”的 IR 对象的抽象基类。
// 当前实现中只有 Instruction 继承自 User。
class User : public Value {
public:
User(std::shared_ptr<Type> ty, std::string name);
size_t GetNumOperands() const;
Value* GetOperand(size_t index) const;
void SetOperand(size_t index, Value* value);
void AddOperand(Value* value);
private:
std::vector<Value*> operands_;
};
// GlobalValue 是全局值/全局变量体系的空壳占位类。
// 当前只补齐类层次,具体初始化器、打印和链接语义后续再补。
class GlobalValue : public User {
public:
GlobalValue(std::shared_ptr<Type> ty, std::string name);
};
class GlobalVariable : public GlobalValue {
public:
enum class StorageKind {
Scalar,
Array,
};
enum class ElemKind {
Int32,
Float32,
};
GlobalVariable(std::string name, int init_value);
GlobalVariable(std::string name, double init_value);
GlobalVariable(std::string name, size_t array_size);
GlobalVariable(std::string name, size_t array_size, ElemKind elem_kind);
GlobalVariable(std::string name, size_t array_size, const std::vector<int>& init_values);
GlobalVariable(std::string name, size_t array_size, const std::vector<double>& init_values);
StorageKind GetStorageKind() const;
bool IsArray() const;
ElemKind GetElemKind() const;
bool IsFloatElem() const;
int GetInitValue() const;
double GetInitFloatValue() const;
size_t GetArraySize() const;
const std::vector<int>& GetInitValues() const;
const std::vector<double>& GetInitFloatValues() const;
bool HasInitValues() const;
private:
StorageKind storage_kind_ = StorageKind::Scalar;
ElemKind elem_kind_ = ElemKind::Int32;
int init_value_ = 0;
double init_float_value_ = 0.0;
size_t array_size_ = 0;
std::vector<int> init_values_;
std::vector<double> init_float_values_;
};
class Instruction : public User {
public:
Instruction(Opcode op, std::shared_ptr<Type> ty, std::string name = "");
Opcode GetOpcode() const;
bool IsTerminator() const;
BasicBlock* GetParent() const;
void SetParent(BasicBlock* parent);
private:
Opcode opcode_;
BasicBlock* parent_ = nullptr;
};
class BinaryInst : public Instruction {
public:
BinaryInst(Opcode op, std::shared_ptr<Type> ty, Value* lhs, Value* rhs,
std::string name);
Value* GetLhs() const;
Value* GetRhs() const;
};
class CastInst : public Instruction {
public:
CastInst(Opcode op, std::shared_ptr<Type> ty, Value* operand,
std::string name);
Value* GetOperandValue() const;
};
class BranchInst : public Instruction {
public:
BranchInst(std::shared_ptr<Type> void_ty, BasicBlock* target);
BasicBlock* GetTarget() const;
};
class CondBranchInst : public Instruction {
public:
CondBranchInst(std::shared_ptr<Type> void_ty, Value* cond, BasicBlock* true_bb,
BasicBlock* false_bb);
Value* GetCond() const;
BasicBlock* GetTrueTarget() const;
BasicBlock* GetFalseTarget() const;
};
class CallInst : public Instruction {
public:
CallInst(std::shared_ptr<Type> ret_ty, Function* callee,
const std::vector<Value*>& args, std::string name);
Function* GetCallee() const;
size_t GetNumArgs() const;
Value* GetArg(size_t index) const;
};
class ReturnInst : public Instruction {
public:
ReturnInst(std::shared_ptr<Type> void_ty, Value* val = nullptr);
Value* GetValue() const;
bool HasValue() const;
};
class AllocaInst : public Instruction {
public:
AllocaInst(std::shared_ptr<Type> elem_ty, std::string name,
Value* count = nullptr);
bool IsArrayAlloca() const;
Value* GetCount() const;
std::shared_ptr<Type> GetElementType() const;
};
class GetElementPtrInst : public Instruction {
public:
GetElementPtrInst(std::shared_ptr<Type> ptr_ty, Value* base_ptr,
Value* index, std::string name);
Value* GetBasePtr() const;
Value* GetIndex() const;
};
class LoadInst : public Instruction {
public:
LoadInst(std::shared_ptr<Type> val_ty, Value* ptr, std::string name);
Value* GetPtr() const;
};
class StoreInst : public Instruction {
public:
StoreInst(std::shared_ptr<Type> void_ty, Value* val, Value* ptr);
Value* GetValue() const;
Value* GetPtr() const;
};
class PhiInst : public Instruction {
public:
PhiInst(std::shared_ptr<Type> ty, std::string name);
AllocaInst* GetAlloca() const { return alloca_; }
void SetAlloca(AllocaInst* alloca) { alloca_ = alloca; }
private:
AllocaInst* alloca_;
};
class Argument : public Value {
public:
Argument(std::shared_ptr<Type> ty, std::string name, size_t index);
size_t GetIndex() const;
private:
size_t index_ = 0;
};
// BasicBlock 已纳入 Value 体系,便于后续向更完整 IR 类图靠拢。
// 当前其类型仍使用 void 作为占位,后续可替换为专门的 label type。
class BasicBlock : public Value {
public:
explicit BasicBlock(std::string name);
Function* GetParent() const;
void SetParent(Function* parent);
bool HasTerminator() const;
const std::vector<std::unique_ptr<Instruction>>& GetInstructions() const;
const std::vector<BasicBlock*>& GetPredecessors() const;
const std::vector<BasicBlock*>& GetSuccessors() const;
std::vector<BasicBlock*>& GetMutablePredecessors() {
return predecessors_;
}
std::vector<BasicBlock*>& GetMutableSuccessors() {
return successors_;
}
template <typename T, typename... Args>
T* Append(Args&&... args) {
if (HasTerminator()) {
throw std::runtime_error("BasicBlock 已有 terminator不能继续追加指令: " +
name_);
}
auto inst = std::make_unique<T>(std::forward<Args>(args)...);
auto* ptr = inst.get();
ptr->SetParent(this);
instructions_.push_back(std::move(inst));
return ptr;
}
template <typename T, typename... Args>
T* Prepend(Args&&... args) {
auto inst = std::make_unique<T>(std::forward<Args>(args)...);
auto* ptr = inst.get();
ptr->SetParent(this);
instructions_.insert(instructions_.begin(), std::move(inst));
return ptr;
}
template <typename T, typename... Args>
T* InsertAlloca(Args&&... args) {
auto inst = std::make_unique<T>(std::forward<Args>(args)...);
auto* ptr = inst.get();
ptr->SetParent(this);
instructions_.insert(instructions_.begin() + alloca_insert_index_, std::move(inst));
++alloca_insert_index_;
return ptr;
}
void RemoveInstruction(Instruction* inst) {
for (auto it = instructions_.begin(); it != instructions_.end(); ++it) {
if (it->get() == inst) {
instructions_.erase(it);
break;
}
}
}
std::unique_ptr<Instruction> TakeInstruction(Instruction* inst);
void InsertInstructionBeforeTerminator(std::unique_ptr<Instruction> inst);
private:
Function* parent_ = nullptr;
std::vector<std::unique_ptr<Instruction>> instructions_;
std::vector<BasicBlock*> predecessors_;
std::vector<BasicBlock*> successors_;
size_t alloca_insert_index_ = 0;
};
// Function 当前也采用了最小实现。
// 需要特别注意:由于项目里还没有单独的 FunctionType
// Function 继承自 Value 后,其 type_ 目前只保存“返回类型”,
// 并不能完整表达“返回类型 + 形参列表”这一整套函数签名。
// 这对当前只支持 int main() 的最小 IR 足够,但后续若补普通函数、
// 形参和调用,通常需要引入专门的函数类型表示。
class Function : public Value {
public:
// 当前构造函数接收的也是返回类型,而不是完整函数类型。
Function(std::string name, std::shared_ptr<Type> ret_type,
bool is_external = false);
Argument* AddParam(const std::string& name, std::shared_ptr<Type> type);
const std::vector<std::unique_ptr<Argument>>& GetParams() const;
bool IsExternal() const;
BasicBlock* CreateBlock(const std::string& name);
BasicBlock* GetEntry();
const BasicBlock* GetEntry() const;
const std::vector<std::unique_ptr<BasicBlock>>& GetBlocks() const;
private:
bool is_external_ = false;
BasicBlock* entry_ = nullptr;
std::vector<std::unique_ptr<Argument>> params_;
std::vector<std::unique_ptr<BasicBlock>> blocks_;
};
class Module {
public:
Module() = default;
Context& GetContext();
const Context& GetContext() const;
// 创建函数时当前只显式传入返回类型,尚未接入完整的 FunctionType。
Function* CreateFunction(const std::string& name,
std::shared_ptr<Type> ret_type,
bool is_external = false);
Function* GetFunction(const std::string& name) const;
GlobalVariable* CreateGlobalI32(const std::string& name, int init_value);
GlobalVariable* CreateGlobalF32(const std::string& name, double init_value);
GlobalVariable* CreateGlobalArrayI32(const std::string& name,
size_t array_size);
GlobalVariable* CreateGlobalArrayF32(const std::string& name,
size_t array_size);
GlobalVariable* CreateGlobalArrayI32(const std::string& name,
size_t array_size,
const std::vector<int>& init_values);
GlobalVariable* CreateGlobalArrayF32(const std::string& name,
size_t array_size,
const std::vector<double>& init_values);
GlobalVariable* GetGlobal(const std::string& name) const;
const std::vector<std::unique_ptr<GlobalVariable>>& GetGlobals() const;
const std::vector<std::unique_ptr<Function>>& GetFunctions() const;
private:
Context context_;
std::vector<std::unique_ptr<GlobalVariable>> globals_;
std::vector<std::unique_ptr<Function>> functions_;
};
class IRBuilder {
public:
IRBuilder(Context& ctx, BasicBlock* bb);
void SetInsertPoint(BasicBlock* bb);
BasicBlock* GetInsertBlock() const;
// 构造常量、二元运算、返回指令的最小集合。
ConstantInt* CreateConstInt(int v);
ConstantFloat* CreateConstFloat(double v);
BinaryInst* CreateBinary(Opcode op, Value* lhs, Value* rhs,
const std::string& name);
BinaryInst* CreateAdd(Value* lhs, Value* rhs, const std::string& name);
BinaryInst* CreateICmp(Opcode op, Value* lhs, Value* rhs,
const std::string& name);
CastInst* CreateSIToFP(Value* operand, const std::string& name);
CastInst* CreateFPToSI(Value* operand, const std::string& name);
CastInst* CreateZExt(Value* operand, std::shared_ptr<Type> target_ty, const std::string& name);
AllocaInst* CreateAlloca(std::shared_ptr<Type> elem_ty, const std::string& name,
Value* count = nullptr);
AllocaInst* CreateAllocaI32(const std::string& name,
Value* count = nullptr);
AllocaInst* CreateAllocaF32(const std::string& name,
Value* count = nullptr);
LoadInst* CreateLoad(Value* ptr, const std::string& name);
StoreInst* CreateStore(Value* val, Value* ptr);
GetElementPtrInst* CreateGEP(Value* base_ptr, Value* index,
const std::string& name);
CallInst* CreateCall(Function* callee, const std::vector<Value*>& args,
const std::string& name);
BranchInst* CreateBr(BasicBlock* target);
CondBranchInst* CreateCondBr(Value* cond, BasicBlock* true_bb,
BasicBlock* false_bb);
ReturnInst* CreateRet(Value* v);
ReturnInst* CreateRetVoid();
PhiInst* CreatePhi(std::shared_ptr<Type> ty, const std::string& name);
private:
Context& ctx_;
BasicBlock* insert_block_;
};
class IRPrinter {
public:
void Print(const Module& module, std::ostream& os);
};
} // namespace ir

@ -0,0 +1,122 @@
// 将语法树翻译为 IR。
// 实现拆分在 IRGenFunc/IRGenStmt/IRGenExp/IRGenDecl。
#pragma once
#include <any>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "SysYBaseVisitor.h"
#include "SysYParser.h"
#include "ir/IR.h"
#include "sem/Sema.h"
namespace ir {
class Module;
class Function;
class IRBuilder;
class Value;
}
class IRGenImpl final : public SysYBaseVisitor {
public:
IRGenImpl(ir::Module& module, const SemanticContext& sema);
std::any visitCompUnit(SysYParser::CompUnitContext* ctx) override;
std::any visitFuncDef(SysYParser::FuncDefContext* ctx) override;
std::any visitBlock(SysYParser::BlockContext* ctx) override;
std::any visitBlockItem(SysYParser::BlockItemContext* ctx) override;
std::any visitDecl(SysYParser::DeclContext* ctx) override;
std::any visitVarDecl(SysYParser::VarDeclContext* ctx) override;
std::any visitStmt(SysYParser::StmtContext* ctx) override;
std::any visitVarDef(SysYParser::VarDefContext* ctx) override;
std::any visitExp(SysYParser::ExpContext* ctx) override;
std::any visitAddExp(SysYParser::AddExpContext* ctx) override;
std::any visitMulExp(SysYParser::MulExpContext* ctx) override;
std::any visitUnaryExp(SysYParser::UnaryExpContext* ctx) override;
std::any visitPrimaryExp(SysYParser::PrimaryExpContext* ctx) override;
std::any visitLVal(SysYParser::LValContext* ctx) override;
std::any visitNumber(SysYParser::NumberContext* ctx) override;
private:
enum class BlockFlow {
Continue,
Terminated,
};
BlockFlow VisitBlockItemResult(SysYParser::BlockItemContext& item);
ir::Value* EvalExpr(SysYParser::ExpContext& expr);
ir::Value* EvalBinaryOrFold(ir::Opcode op, ir::Value* lhs, ir::Value* rhs);
std::shared_ptr<ir::Type> ResolveBType(SysYParser::BTypeContext* btype) const;
int EvalConstIntExpr(SysYParser::ExpContext& expr);
int EvalConstIntExpr(SysYParser::ConstExpContext& expr);
int EvalConstIntAddExp(SysYParser::AddExpContext& expr);
int EvalConstIntMulExp(SysYParser::MulExpContext& expr);
int EvalConstIntUnaryExp(SysYParser::UnaryExpContext& expr);
int EvalConstIntPrimaryExp(SysYParser::PrimaryExpContext& expr);
double EvalConstFloatExpr(SysYParser::ConstExpContext& expr);
double EvalConstFloatAddExp(SysYParser::AddExpContext& expr);
double EvalConstFloatMulExp(SysYParser::MulExpContext& expr);
double EvalConstFloatUnaryExp(SysYParser::UnaryExpContext& expr);
double EvalConstFloatPrimaryExp(SysYParser::PrimaryExpContext& expr);
std::vector<int> EvalArrayExtents(
const std::vector<SysYParser::ConstExpContext*>& dims);
std::vector<int> GetArrayExtentsForDecl(SysYParser::VarDefContext* decl);
std::vector<int> GetArrayExtentsForConstDecl(
SysYParser::ConstDefContext* decl);
std::vector<int> GetArrayExtentsForLVal(SysYParser::LValContext& lval,
bool& is_array);
ir::Value* BuildLinearizedIndex(
const std::vector<ir::Value*>& indices,
const std::vector<int>& extents_with_first_dim) ;
ir::Value* CastValueTo(ir::Value* value,
const std::shared_ptr<ir::Type>& target_type);
ir::Value* GetLValAddress(SysYParser::LValContext& lval);
ir::AllocaInst* CreateEntryBlockAlloca(std::shared_ptr<ir::Type> elem_ty,
const std::string& name,
ir::Value* count = nullptr);
std::string NextBlockName(const std::string& prefix);
void EmitCondBranch(SysYParser::CondContext& cond, ir::BasicBlock* true_bb,
ir::BasicBlock* false_bb);
void EmitLOrBranch(SysYParser::LOrExpContext& expr, ir::BasicBlock* true_bb,
ir::BasicBlock* false_bb);
void EmitLAndBranch(SysYParser::LAndExpContext& expr, ir::BasicBlock* true_bb,
ir::BasicBlock* false_bb);
void EmitEqBranch(SysYParser::EqExpContext& expr, ir::BasicBlock* true_bb,
ir::BasicBlock* false_bb);
void EmitRelBranch(SysYParser::RelExpContext& expr, ir::BasicBlock* true_bb,
ir::BasicBlock* false_bb);
ir::Value* EvalEqValue(SysYParser::EqExpContext& expr);
ir::Value* EvalRelValue(SysYParser::RelExpContext& expr);
ir::Module& module_;
const SemanticContext& sema_;
ir::Function* func_;
ir::IRBuilder builder_;
std::unordered_map<std::string, ir::Function*> function_map_;
std::unordered_map<std::string, int> const_value_map_;
std::vector<std::unordered_map<std::string, int>> local_const_stack_;
std::vector<std::unordered_map<std::string, int>> const_value_history_;
std::unordered_map<SysYParser::VarDefContext*, std::vector<int>>
array_extents_map_;
std::unordered_map<SysYParser::ConstDefContext*, std::vector<int>>
const_array_extents_map_;
std::unordered_map<std::string, std::vector<int>> param_array_extents_map_;
std::unordered_map<std::string, ir::Value*> param_storage_map_;
std::unordered_map<std::string, ir::Value*> param_pointer_map_;
std::unordered_map<SysYParser::VarDefContext*, ir::Value*> global_storage_map_;
std::unordered_map<SysYParser::ConstDefContext*, ir::Value*>
const_global_storage_map_;
// 名称绑定由 Sema 负责IRGen 只维护“声明 -> 存储槽位”的代码生成状态。
std::unordered_map<SysYParser::VarDefContext*, ir::Value*> storage_map_;
std::unordered_map<SysYParser::ConstDefContext*, ir::Value*>
const_storage_map_;
std::vector<std::pair<ir::BasicBlock*, ir::BasicBlock*>> loop_stack_;
int block_index_ = 0;
};
std::unique_ptr<ir::Module> GenerateIR(SysYParser::CompUnitContext& tree,
const SemanticContext& sema);

@ -0,0 +1,414 @@
#pragma once
#include <initializer_list>
#include <iosfwd>
#include <memory>
#include <string>
#include <vector>
namespace ir
{
class Module;
}
namespace mir
{
class MIRContext
{
public:
MIRContext() = default;
};
MIRContext &DefaultContext();
enum class PhysReg
{
W0,
W1,
W2,
W3,
W4,
W5,
W6,
W7,
W8,
W9,
W10,
W11,
W12,
W13,
W14,
W15,
W16,
W17,
W18,
W19,
W20,
W21,
W22,
W23,
W24,
W25,
W26,
W27,
W28,
W29,
W30,
X0,
X1,
X2,
X3,
X4,
X5,
X6,
X7,
X8,
X9,
X10,
X11,
X12,
X13,
X14,
X15,
X16,
X17,
X18,
X19,
X20,
X21,
X22,
X23,
X24,
X25,
X26,
X27,
X28,
X29,
X30,
S0,
S1,
S2,
S3,
S4,
S5,
S6,
S7,
S8,
S9,
S10,
S11,
S12,
S13,
S14,
S15,
S16,
S17,
S18,
S19,
S20,
S21,
S22,
S23,
S24,
S25,
S26,
S27,
S28,
S29,
S30,
S31,
XZR,
SP,
WZR
};
const char *PhysRegName(PhysReg reg);
enum class VRegClass
{
Int,
Float,
Ptr
};
enum class Opcode
{
Prologue,
Epilogue,
MovImm,
LoadStack,
StoreStack,
LoadStackAddr,
LoadGlobal,
StoreGlobal,
LoadGlobalAddr,
LoadMem,
StoreMem,
AddRR,
SubRR,
MulRR,
DivRR,
ModRR,
AndRR,
OrRR,
XorRR,
ShlRR,
ShrRR,
AsrRR,
Asr64RR,
Uxtw,
Sxtw,
CmpRR,
CmpImm,
FCmpRR,
CSet,
Csel,
Smull,
Msub,
NegRR,
FAddRR,
FSubRR,
FMulRR,
FDivRR,
Scvtf,
FCvtzs,
FMovWS,
Br,
CondBr,
Call,
Ret,
LoadAddr,
MovReg,
};
enum class CondCode
{
EQ,
NE,
LT,
LE,
GT,
GE
};
class Operand
{
public:
enum class Kind
{
Reg,
VReg,
Imm,
FrameIndex,
Label,
Symbol
};
static Operand Reg(PhysReg reg);
static Operand VReg(int id, VRegClass vreg_class);
static Operand Imm(int value);
static Operand FrameIndex(int index);
static Operand Label(int label_id);
static Operand Symbol(std::string symbol);
Kind GetKind() const { return kind_; }
PhysReg GetReg() const { return reg_; }
int GetImm() const { return imm_; }
int GetFrameIndex() const { return imm_; }
int GetLabel() const { return imm_; }
const std::string &GetSymbol() const { return symbol_; }
int GetVRegId() const { return imm_; }
VRegClass GetVRegClass() const { return vreg_class_; }
private:
Operand(Kind kind, PhysReg reg, int imm,
VRegClass vreg_class = VRegClass::Int, std::string symbol = "");
Kind kind_;
PhysReg reg_;
int imm_;
std::string symbol_;
VRegClass vreg_class_;
};
class MachineInstr
{
public:
MachineInstr(Opcode opcode, std::vector<Operand> operands = {});
Opcode GetOpcode() const { return opcode_; }
const std::vector<Operand> &GetOperands() const { return operands_; }
std::vector<Operand> &GetOperands() { return operands_; }
private:
Opcode opcode_;
std::vector<Operand> operands_;
};
struct FrameSlot
{
int index = 0;
int size = 4;
int offset = 0;
bool is_stack_arg = false;
bool is_callee_stack_arg = false;
};
class MachineBasicBlock
{
public:
explicit MachineBasicBlock(std::string name, int label_id = -1);
const std::string &GetName() const { return name_; }
int GetLabelId() const { return label_id_; }
void SetLabelId(int label_id) { label_id_ = label_id; }
std::vector<MachineInstr> &GetInstructions() { return instructions_; }
const std::vector<MachineInstr> &GetInstructions() const { return instructions_; }
MachineInstr &Append(Opcode opcode,
std::initializer_list<Operand> operands = {});
private:
std::string name_;
int label_id_ = -1;
std::vector<MachineInstr> instructions_;
};
class MachineFunction
{
public:
explicit MachineFunction(std::string name);
const std::string &GetName() const { return name_; }
MachineBasicBlock &GetEntry() { return *entry_; }
const MachineBasicBlock &GetEntry() const { return *entry_; }
MachineBasicBlock *GetEntryPtr() { return entry_; }
const MachineBasicBlock *GetEntryPtr() const { return entry_; }
MachineBasicBlock &CreateBlock(std::string name);
MachineBasicBlock *FindBlock(const std::string &name);
const MachineBasicBlock *FindBlock(const std::string &name) const;
std::vector<std::unique_ptr<MachineBasicBlock>> &GetBlocks()
{
return blocks_;
}
const std::vector<std::unique_ptr<MachineBasicBlock>> &GetBlocks() const
{
return blocks_;
}
int CreateLabel();
int CreateFrameIndex(int size = 4);
int CreateStackArgFrameIndex(int size = 4);
int CreateCalleeStackArgFrameIndex(int size = 4);
FrameSlot &GetFrameSlot(int index);
const FrameSlot &GetFrameSlot(int index) const;
const std::vector<FrameSlot> &GetFrameSlots() const { return frame_slots_; }
std::vector<FrameSlot> &GetFrameSlots() { return frame_slots_; }
int GetFrameSize() const { return frame_size_; }
void SetFrameSize(int size) { frame_size_ = size; }
int CreateVReg(VRegClass vreg_class);
VRegClass GetVRegClass(int vreg_id) const;
int GetNumVRegs() const { return static_cast<int>(vreg_classes_.size()); }
void AddCalleeSavedReg(PhysReg reg);
const std::vector<PhysReg> &GetCalleeSavedRegs() const { return callee_saved_regs_; }
private:
std::string name_;
std::vector<std::unique_ptr<MachineBasicBlock>> blocks_;
MachineBasicBlock *entry_ = nullptr;
std::vector<FrameSlot> frame_slots_;
int frame_size_ = 0;
int next_label_id_ = 0;
std::vector<VRegClass> vreg_classes_;
std::vector<PhysReg> callee_saved_regs_;
};
struct MachineGlobal
{
enum class Kind
{
I32Scalar,
I32Array
};
std::string name;
Kind kind = Kind::I32Scalar;
int init_value = 0;
size_t array_size = 0;
std::vector<int> init_values;
};
class MachineModule
{
public:
MachineModule() = default;
MachineFunction &CreateFunction(std::string name);
MachineFunction *GetFunction(const std::string &name);
const MachineFunction *GetFunction(const std::string &name) const;
std::vector<std::unique_ptr<MachineFunction>> &GetFunctions()
{
return functions_;
}
const std::vector<std::unique_ptr<MachineFunction>> &GetFunctions() const
{
return functions_;
}
void AddGlobalI32(std::string name, int init_value)
{
MachineGlobal g;
g.name = std::move(name);
g.kind = MachineGlobal::Kind::I32Scalar;
g.init_value = init_value;
globals_.push_back(std::move(g));
}
void AddGlobalArrayI32(std::string name, size_t array_size,
std::vector<int> init_values = {})
{
MachineGlobal g;
g.name = std::move(name);
g.kind = MachineGlobal::Kind::I32Array;
g.array_size = array_size;
g.init_values = std::move(init_values);
globals_.push_back(std::move(g));
}
std::vector<MachineGlobal> &GetGlobals() { return globals_; }
const std::vector<MachineGlobal> &GetGlobals() const { return globals_; }
private:
std::vector<std::unique_ptr<MachineFunction>> functions_;
std::vector<MachineGlobal> globals_;
};
std::unique_ptr<MachineModule> LowerModuleToMIR(const ir::Module &module);
std::unique_ptr<MachineFunction> LowerToMIR(const ir::Module &module);
void RunRegAlloc(MachineFunction &function);
void RunRegAlloc(MachineModule &module);
void RunFrameLowering(MachineFunction &function);
void RunFrameLowering(MachineModule &module);
void RunPeephole(MachineFunction &function);
void RunPeephole(MachineModule &module);
void PrintAsm(const MachineFunction &function, std::ostream &os);
void PrintAsm(const MachineModule &module, std::ostream &os);
} // namespace mir

@ -0,0 +1,92 @@
// 基于语法树的语义检查与名称绑定。
#pragma once
#include <unordered_map>
#include "SysYParser.h"
class SemanticContext {
public:
void BindVarUse(SysYParser::LValContext* use,
SysYParser::VarDefContext* decl) {
var_uses_[use] = decl;
}
SysYParser::VarDefContext* ResolveVarUse(
const SysYParser::LValContext* use) const {
auto it = var_uses_.find(use);
return it == var_uses_.end() ? nullptr : it->second;
}
void BindConstArrayUse(SysYParser::LValContext* use,
SysYParser::ConstDefContext* decl) {
const_array_uses_[use] = decl;
}
SysYParser::ConstDefContext* ResolveConstArrayUse(
const SysYParser::LValContext* use) const {
auto it = const_array_uses_.find(use);
return it == const_array_uses_.end() ? nullptr : it->second;
}
void BindConstScalarUse(SysYParser::LValContext* use,
SysYParser::ConstDefContext* decl) {
const_scalar_uses_[use] = decl;
}
SysYParser::ConstDefContext* ResolveConstScalarUse(
const SysYParser::LValContext* use) const {
auto it = const_scalar_uses_.find(use);
return it == const_scalar_uses_.end() ? nullptr : it->second;
}
void BindConstUse(SysYParser::LValContext* use, int value) {
const_uses_[use] = value;
}
const int* ResolveConstUse(const SysYParser::LValContext* use) const {
auto it = const_uses_.find(use);
return it == const_uses_.end() ? nullptr : &it->second;
}
void BindConstFloatUse(SysYParser::LValContext* use, double value) {
const_float_uses_[use] = value;
}
const double* ResolveConstFloatUse(const SysYParser::LValContext* use) const {
auto it = const_float_uses_.find(use);
return it == const_float_uses_.end() ? nullptr : &it->second;
}
void BindCallUse(SysYParser::UnaryExpContext* call,
SysYParser::FuncDefContext* decl) {
call_uses_[call] = decl;
}
SysYParser::FuncDefContext* ResolveCallUse(
const SysYParser::UnaryExpContext* call) const {
auto it = call_uses_.find(call);
return it == call_uses_.end() ? nullptr : it->second;
}
private:
std::unordered_map<const SysYParser::LValContext*,
SysYParser::VarDefContext*>
var_uses_;
std::unordered_map<const SysYParser::LValContext*, int> const_uses_;
std::unordered_map<const SysYParser::LValContext*, double> const_float_uses_;
std::unordered_map<const SysYParser::LValContext*,
SysYParser::ConstDefContext*>
const_array_uses_;
std::unordered_map<const SysYParser::LValContext*,
SysYParser::ConstDefContext*>
const_scalar_uses_;
std::unordered_map<const SysYParser::UnaryExpContext*,
SysYParser::FuncDefContext*>
call_uses_;
};
// 目前仅检查:
// - 变量先声明后使用
// - 局部变量不允许重复定义
SemanticContext RunSema(SysYParser::CompUnitContext& comp_unit);

@ -0,0 +1,22 @@
// 极简符号表:记录局部变量定义点。
#pragma once
#include <string>
#include <unordered_map>
#include <vector>
#include "SysYParser.h"
class SymbolTable {
public:
void EnterScope();
void ExitScope();
void Add(const std::string& name, SysYParser::VarDefContext* decl);
bool ContainsInCurrent(const std::string& name) const;
bool Contains(const std::string& name) const;
SysYParser::VarDefContext* Lookup(const std::string& name) const;
private:
std::vector<std::unordered_map<std::string, SysYParser::VarDefContext*>>
scopes_;
};

@ -0,0 +1,15 @@
// 简易命令行解析:支持帮助、输入文件与输出阶段选择。
#pragma once
#include <string>
struct CLIOptions {
std::string input;
bool emit_parse_tree = false;
bool emit_ir = true;
bool emit_asm = false;
bool show_help = false;
bool optimize = false;
};
CLIOptions ParseCLI(int argc, char** argv);

@ -0,0 +1,20 @@
// 轻量日志接口。
#pragma once
#include <cstddef>
#include <exception>
#include <iosfwd>
#include <string>
#include <string_view>
void LogInfo(std::string_view msg, std::ostream& os);
void LogError(std::string_view msg, std::ostream& os);
std::string FormatError(std::string_view stage, std::string_view msg);
std::string FormatErrorAt(std::string_view stage, std::size_t line,
std::size_t column, std::string_view msg);
bool HasErrorPrefix(std::string_view msg, std::string_view stage);
void PrintException(std::ostream& os, const std::exception& ex);
// 打印命令行帮助信息(用于 `compiler --help`)。
void PrintHelp(std::ostream& os);
Loading…
Cancel
Save