Compare commits

...

9 Commits
master ... dev

Binary file not shown.

@ -1,35 +1,7 @@
// 当前只支撑 i32、i32*、void 以及最小的内存/算术指令,演示用。
//
// 当前已经实现:
// 1. 基础类型系统void / i32 / i32*
// 2. Value 体系Value / ConstantValue / ConstantInt / Function / BasicBlock / User / GlobalValue / Instruction
// 3. 最小指令集Add / Alloca / Load / Store / Ret
// 4. BasicBlock / Function / Module 三层组织结构
// 5. IRBuilder便捷创建常量和最小指令
// 6. def-use 关系的轻量实现:
// - Instruction 保存 operand 列表
// - Value 保存 uses
// - 支持 ReplaceAllUsesWith 的简化实现
//
// 当前尚未实现或只做了最小占位:
// 1. 完整类型系统数组、函数类型、label 类型等
// 2. 更完整的指令系统br / condbr / call / phi / gep 等
// 3. 更成熟的 Use 管理(例如 LLVM 风格的双向链式结构)
// 4. 更完整的 IR verifier 和优化基础设施
//
// 当前需要特别说明的两个简化点:
// 1. BasicBlock 虽然已经纳入 Value 体系,但其类型目前仍用 void 作为占位,
// 后续如果补 label type可以再改成更合理的块标签类型。
// 2. ConstantValue 体系目前只实现了 ConstantInt后续可以继续补 ConstantFloat、
// ConstantArray等更完整的常量种类。
//
// 建议的扩展顺序:
// 1. 先补更多指令和类型
// 2. 再补控制流相关 IR
// 3. 最后再考虑把 Value/User/Use 进一步抽象成更完整的框架
#pragma once
#include <cstddef>
#include <cstdint>
#include <iosfwd>
#include <memory>
#include <stdexcept>
@ -45,17 +17,27 @@ class Value;
class User;
class ConstantValue;
class ConstantInt;
class ConstantFloat;
class ConstantZero;
class ConstantArray;
class GlobalValue;
class GlobalVariable;
class Argument;
class Instruction;
class BinaryInst;
class CompareInst;
class ReturnInst;
class AllocaInst;
class LoadInst;
class StoreInst;
class BranchInst;
class CondBranchInst;
class CallInst;
class GetElementPtrInst;
class CastInst;
class BasicBlock;
class Function;
// Use 表示一个 Value 的一次使用记录。
// 当前实现设计:
// - value被使用的值
// - user使用该值的 User
// - operand_index该值在 user 操作数列表中的位置
class Use {
public:
Use() = default;
@ -66,64 +48,111 @@ class Use {
User* GetUser() const { return user_; }
size_t GetOperandIndex() const { return operand_index_; }
void SetValue(Value* value) { value_ = value; }
void SetUser(User* user) { user_ = user; }
void SetOperandIndex(size_t operand_index) { operand_index_ = operand_index; }
private:
Value* value_ = nullptr;
User* user_ = nullptr;
size_t operand_index_ = 0;
};
// IR 上下文:集中管理类型、常量等共享资源,便于复用与扩展。
class Context {
public:
Context() = default;
~Context();
// 去重创建 i32 常量。
ConstantInt* GetConstInt(int v);
ConstantFloat* GetConstFloat(float v);
template <typename T, typename... Args>
T* CreateOwnedConstant(Args&&... args) {
auto value = std::make_unique<T>(std::forward<Args>(args)...);
auto* ptr = value.get();
owned_constants_.push_back(std::move(value));
return ptr;
}
std::string NextTemp();
std::string NextBlock(const std::string& prefix);
private:
std::unordered_map<int, std::unique_ptr<ConstantInt>> const_ints_;
std::unordered_map<uint32_t, std::unique_ptr<ConstantFloat>> const_floats_;
std::vector<std::unique_ptr<ConstantValue>> owned_constants_;
int temp_index_ = -1;
int block_index_ = -1;
};
class Type {
public:
enum class Kind { Void, Int32, PtrInt32 };
explicit Type(Kind k);
// 使用静态共享对象获取类型。
// 同一类型可直接比较返回值是否相等,例如:
// Type::GetInt32Type() == Type::GetInt32Type()
enum class Kind { Void, Int1, Int32, Float32, Pointer, Array, Function };
explicit Type(Kind kind);
Type(Kind kind, std::shared_ptr<Type> element_type);
Type(Kind kind, std::shared_ptr<Type> element_type, size_t array_size);
Type(std::shared_ptr<Type> return_type, std::vector<std::shared_ptr<Type>> params);
static const std::shared_ptr<Type>& GetVoidType();
static const std::shared_ptr<Type>& GetInt1Type();
static const std::shared_ptr<Type>& GetInt32Type();
static const std::shared_ptr<Type>& GetFloatType();
static std::shared_ptr<Type> GetPointerType(std::shared_ptr<Type> element_type);
static std::shared_ptr<Type> GetArrayType(std::shared_ptr<Type> element_type,
size_t array_size);
static std::shared_ptr<Type> GetFunctionType(
std::shared_ptr<Type> return_type,
std::vector<std::shared_ptr<Type>> param_types);
static const std::shared_ptr<Type>& GetPtrInt32Type();
Kind GetKind() const;
const std::shared_ptr<Type>& GetElementType() const;
size_t GetArraySize() const;
const std::shared_ptr<Type>& GetReturnType() const;
const std::vector<std::shared_ptr<Type>>& GetParamTypes() const;
bool IsVoid() const;
bool IsInt1() const;
bool IsInt32() const;
bool IsFloat32() const;
bool IsPointer() const;
bool IsArray() const;
bool IsFunction() const;
bool IsScalar() const;
bool IsInteger() const;
bool IsNumeric() const;
bool IsPtrInt32() const;
bool Equals(const Type& other) const;
private:
Kind kind_;
std::shared_ptr<Type> element_type_;
size_t array_size_ = 0;
std::shared_ptr<Type> return_type_;
std::vector<std::shared_ptr<Type>> param_types_;
};
class Value {
public:
Value(std::shared_ptr<Type> ty, std::string name);
virtual ~Value() = default;
const std::shared_ptr<Type>& GetType() const;
const std::string& GetName() const;
void SetName(std::string n);
void SetName(std::string name);
bool IsVoid() const;
bool IsInt1() const;
bool IsInt32() const;
bool IsFloat32() const;
bool IsPointer() const;
bool IsArray() const;
bool IsFunctionValue() const;
bool IsPtrInt32() const;
bool IsConstant() const;
bool IsInstruction() const;
bool IsUser() const;
bool IsFunction() const;
bool IsGlobalVariable() const;
bool IsArgument() const;
void AddUse(User* user, size_t operand_index);
void RemoveUse(User* user, size_t operand_index);
const std::vector<Use>& GetUses() const;
@ -135,52 +164,116 @@ class Value {
std::vector<Use> uses_;
};
// ConstantValue 是常量体系的基类。
// 当前只实现了 ConstantInt后续可继续扩展更多常量种类。
class ConstantValue : public Value {
public:
ConstantValue(std::shared_ptr<Type> ty, std::string name = "");
virtual bool IsZeroValue() const = 0;
};
class ConstantInt : public ConstantValue {
public:
ConstantInt(std::shared_ptr<Type> ty, int v);
ConstantInt(std::shared_ptr<Type> ty, int value);
int GetValue() const { return value_; }
bool IsZeroValue() const override { return value_ == 0; }
private:
int value_ = 0;
};
class ConstantFloat : public ConstantValue {
public:
ConstantFloat(std::shared_ptr<Type> ty, float value);
float GetValue() const { return value_; }
bool IsZeroValue() const override { return value_ == 0.0f; }
private:
int value_{};
float value_ = 0.0f;
};
// 后续还需要扩展更多指令类型。
enum class Opcode { Add, Sub, Mul, Alloca, Load, Store, Ret };
class ConstantZero : public ConstantValue {
public:
explicit ConstantZero(std::shared_ptr<Type> ty);
bool IsZeroValue() const override { return true; }
};
class ConstantArray : public ConstantValue {
public:
ConstantArray(std::shared_ptr<Type> ty, std::vector<ConstantValue*> elements);
const std::vector<ConstantValue*>& GetElements() const { return elements_; }
bool IsZeroValue() const override;
private:
std::vector<ConstantValue*> elements_;
};
enum class Opcode {
Add,
Sub,
Mul,
SDiv,
SRem,
FAdd,
FSub,
FMul,
FDiv,
Alloca,
Load,
Store,
ICmp,
FCmp,
Br,
CondBr,
Call,
GEP,
SIToFP,
FPToSI,
ZExt,
Ret,
};
enum class ICmpPred { Eq, Ne, Slt, Sle, Sgt, Sge };
enum class FCmpPred { Oeq, One, Olt, Ole, Ogt, Oge };
// User 是所有“会使用其他 Value 作为输入”的 IR 对象的抽象基类。
// 当前实现中只有 Instruction 继承自 User。
class User : public Value {
public:
User(std::shared_ptr<Type> ty, std::string name);
size_t GetNumOperands() const;
Value* GetOperand(size_t index) const;
void SetOperand(size_t index, Value* value);
protected:
// 统一的 operand 入口。
void AddOperand(Value* value);
private:
std::vector<Value*> operands_;
};
// GlobalValue 是全局值/全局变量体系的空壳占位类。
// 当前只补齐类层次,具体初始化器、打印和链接语义后续再补。
class GlobalValue : public User {
class GlobalValue : public Value {
public:
GlobalValue(std::shared_ptr<Type> ty, std::string name);
};
class GlobalVariable : public GlobalValue {
public:
GlobalVariable(std::string name, std::shared_ptr<Type> value_type,
ConstantValue* initializer, bool is_constant);
const std::shared_ptr<Type>& GetValueType() const { return value_type_; }
ConstantValue* GetInitializer() const { return initializer_; }
bool IsConstant() const { return is_constant_; }
private:
std::shared_ptr<Type> value_type_;
ConstantValue* initializer_ = nullptr;
bool is_constant_ = false;
};
class Instruction : public User {
public:
Instruction(Opcode op, std::shared_ptr<Type> ty, std::string name = "");
Opcode GetOpcode() const;
bool IsTerminator() const;
BasicBlock* GetParent() const;
@ -195,45 +288,116 @@ class BinaryInst : public Instruction {
public:
BinaryInst(Opcode op, std::shared_ptr<Type> ty, Value* lhs, Value* rhs,
std::string name);
Value* GetLhs() const;
Value* GetRhs() const;
Value* GetRhs() const;
};
class CompareInst : public Instruction {
public:
CompareInst(ICmpPred pred, Value* lhs, Value* rhs, std::string name);
CompareInst(FCmpPred pred, Value* lhs, Value* rhs, std::string name);
bool IsFloatCompare() const { return is_float_compare_; }
ICmpPred GetICmpPred() const { return icmp_pred_; }
FCmpPred GetFCmpPred() const { return fcmp_pred_; }
Value* GetLhs() const;
Value* GetRhs() const;
private:
bool is_float_compare_ = false;
ICmpPred icmp_pred_ = ICmpPred::Eq;
FCmpPred fcmp_pred_ = FCmpPred::Oeq;
};
class ReturnInst : public Instruction {
public:
ReturnInst(std::shared_ptr<Type> void_ty, Value* val);
explicit ReturnInst(Value* value);
ReturnInst();
Value* GetValue() const;
};
class AllocaInst : public Instruction {
public:
AllocaInst(std::shared_ptr<Type> ptr_ty, std::string name);
AllocaInst(std::shared_ptr<Type> allocated_type, std::string name);
const std::shared_ptr<Type>& GetAllocatedType() const { return allocated_type_; }
private:
std::shared_ptr<Type> allocated_type_;
};
class LoadInst : public Instruction {
public:
LoadInst(std::shared_ptr<Type> val_ty, Value* ptr, std::string name);
LoadInst(Value* ptr, std::shared_ptr<Type> value_type, std::string name);
Value* GetPtr() const;
};
class StoreInst : public Instruction {
public:
StoreInst(std::shared_ptr<Type> void_ty, Value* val, Value* ptr);
StoreInst(Value* value, Value* ptr);
Value* GetValue() const;
Value* GetPtr() const;
};
// BasicBlock 已纳入 Value 体系,便于后续向更完整 IR 类图靠拢。
// 当前其类型仍使用 void 作为占位,后续可替换为专门的 label type。
class BranchInst : public Instruction {
public:
explicit BranchInst(BasicBlock* target);
BasicBlock* GetTarget() const;
};
class CondBranchInst : public Instruction {
public:
CondBranchInst(Value* cond, BasicBlock* true_block, BasicBlock* false_block);
Value* GetCond() const;
BasicBlock* GetTrueBlock() const;
BasicBlock* GetFalseBlock() const;
};
class CallInst : public Instruction {
public:
CallInst(Function* callee, std::vector<Value*> args, std::string name);
Function* GetCallee() const;
std::vector<Value*> GetArgs() const;
};
class GetElementPtrInst : public Instruction {
public:
GetElementPtrInst(Value* base_ptr, std::vector<Value*> indices,
std::shared_ptr<Type> result_type, std::string name);
Value* GetBasePtr() const;
std::vector<Value*> GetIndices() const;
std::shared_ptr<Type> GetSourceElementType() const;
};
class CastInst : public Instruction {
public:
CastInst(Opcode op, Value* value, std::shared_ptr<Type> dst_type,
std::string name);
Value* GetValue() const;
};
class BasicBlock : public Value {
public:
explicit BasicBlock(std::string name);
Function* GetParent() const;
void SetParent(Function* parent);
bool HasTerminator() const;
void AddSuccessor(BasicBlock* succ);
const std::vector<std::unique_ptr<Instruction>>& GetInstructions() const;
const std::vector<BasicBlock*>& GetPredecessors() const;
const std::vector<BasicBlock*>& GetSuccessors() const;
template <typename T, typename... Args>
T* Append(Args&&... args) {
if (HasTerminator()) {
@ -254,60 +418,105 @@ class BasicBlock : public Value {
std::vector<BasicBlock*> successors_;
};
// Function 当前也采用了最小实现。
// 需要特别注意:由于项目里还没有单独的 FunctionType
// Function 继承自 Value 后,其 type_ 目前只保存“返回类型”,
// 并不能完整表达“返回类型 + 形参列表”这一整套函数签名。
// 这对当前只支持 int main() 的最小 IR 足够,但后续若补普通函数、
// 形参和调用,通常需要引入专门的函数类型表示。
class Function : public Value {
class Argument : public Value {
public:
// 当前构造函数接收的也是返回类型,而不是完整函数类型。
Function(std::string name, std::shared_ptr<Type> ret_type);
Argument(std::shared_ptr<Type> ty, std::string name, size_t index,
Function* parent);
size_t GetIndex() const { return index_; }
Function* GetParent() const { return parent_; }
private:
size_t index_ = 0;
Function* parent_ = nullptr;
};
class Function : public GlobalValue {
public:
Function(std::string name, std::shared_ptr<Type> function_type,
bool is_declaration);
const std::shared_ptr<Type>& GetFunctionType() const;
const std::shared_ptr<Type>& GetReturnType() const;
const std::vector<std::unique_ptr<Argument>>& GetArguments() const;
bool IsDeclaration() const { return is_declaration_; }
Argument* AddArgument(std::shared_ptr<Type> ty, const std::string& name);
BasicBlock* CreateBlock(const std::string& name);
BasicBlock* GetEntry();
const BasicBlock* GetEntry() const;
const std::vector<std::unique_ptr<BasicBlock>>& GetBlocks() const;
private:
bool is_declaration_ = false;
BasicBlock* entry_ = nullptr;
std::vector<std::unique_ptr<Argument>> arguments_;
std::vector<std::unique_ptr<BasicBlock>> blocks_;
};
class Module {
public:
Module() = default;
Context& GetContext();
const Context& GetContext() const;
// 创建函数时当前只显式传入返回类型,尚未接入完整的 FunctionType。
GlobalVariable* CreateGlobal(std::string name, std::shared_ptr<Type> value_type,
ConstantValue* initializer, bool is_constant);
Function* CreateFunction(const std::string& name,
std::shared_ptr<Type> ret_type);
std::shared_ptr<Type> function_type,
bool is_declaration = false);
Function* FindFunction(const std::string& name) const;
GlobalVariable* FindGlobal(const std::string& name) const;
const std::vector<std::unique_ptr<GlobalVariable>>& GetGlobals() const;
const std::vector<std::unique_ptr<Function>>& GetFunctions() const;
private:
Context context_;
std::vector<std::unique_ptr<GlobalVariable>> globals_;
std::vector<std::unique_ptr<Function>> functions_;
};
class IRBuilder {
public:
IRBuilder(Context& ctx, BasicBlock* bb);
void SetInsertPoint(BasicBlock* bb);
BasicBlock* GetInsertBlock() const;
// 构造常量、二元运算、返回指令的最小集合。
ConstantInt* CreateConstInt(int v);
ConstantFloat* CreateConstFloat(float v);
ConstantValue* CreateZero(std::shared_ptr<Type> type);
BinaryInst* CreateBinary(Opcode op, Value* lhs, Value* rhs,
const std::string& name);
BinaryInst* CreateAdd(Value* lhs, Value* rhs, const std::string& name);
AllocaInst* CreateAlloca(std::shared_ptr<Type> allocated_type,
const std::string& name);
AllocaInst* CreateAllocaI32(const std::string& name);
LoadInst* CreateLoad(Value* ptr, const std::string& name);
StoreInst* CreateStore(Value* val, Value* ptr);
ReturnInst* CreateRet(Value* v);
CompareInst* CreateICmp(ICmpPred pred, Value* lhs, Value* rhs,
const std::string& name);
CompareInst* CreateFCmp(FCmpPred pred, Value* lhs, Value* rhs,
const std::string& name);
BranchInst* CreateBr(BasicBlock* target);
CondBranchInst* CreateCondBr(Value* cond, BasicBlock* true_block,
BasicBlock* false_block);
CallInst* CreateCall(Function* callee, const std::vector<Value*>& args,
const std::string& name);
GetElementPtrInst* CreateGEP(Value* base_ptr, const std::vector<Value*>& indices,
const std::string& name);
CastInst* CreateSIToFP(Value* value, const std::string& name);
CastInst* CreateFPToSI(Value* value, const std::string& name);
CastInst* CreateZExt(Value* value, std::shared_ptr<Type> dst_type,
const std::string& name);
ReturnInst* CreateRet(Value* value);
ReturnInst* CreateRetVoid();
private:
Context& ctx_;
BasicBlock* insert_block_;
BasicBlock* insert_block_ = nullptr;
};
class IRPrinter {

@ -1,57 +1,99 @@
// 将语法树翻译为 IR。
// 实现拆分在 IRGenFunc/IRGenStmt/IRGenExp/IRGenDecl。
#pragma once
#include <any>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "SysYBaseVisitor.h"
#include "SysYParser.h"
#include "ir/IR.h"
#include "sem/Sema.h"
namespace ir {
class Module;
class Function;
class IRBuilder;
class Value;
}
class IRGenImpl final : public SysYBaseVisitor {
class IRGenImpl {
public:
IRGenImpl(ir::Module& module, const SemanticContext& sema);
std::any visitCompUnit(SysYParser::CompUnitContext* ctx) override;
std::any visitFuncDef(SysYParser::FuncDefContext* ctx) override;
std::any visitBlockStmt(SysYParser::BlockStmtContext* ctx) override;
std::any visitBlockItem(SysYParser::BlockItemContext* ctx) override;
std::any visitDecl(SysYParser::DeclContext* ctx) override;
std::any visitStmt(SysYParser::StmtContext* ctx) override;
std::any visitVarDef(SysYParser::VarDefContext* ctx) override;
std::any visitReturnStmt(SysYParser::ReturnStmtContext* ctx) override;
std::any visitParenExp(SysYParser::ParenExpContext* ctx) override;
std::any visitNumberExp(SysYParser::NumberExpContext* ctx) override;
std::any visitVarExp(SysYParser::VarExpContext* ctx) override;
std::any visitAdditiveExp(SysYParser::AdditiveExpContext* ctx) override;
void Gen(SysYParser::CompUnitContext& cu);
private:
enum class BlockFlow {
Continue,
Terminated,
struct StorageEntry {
ir::Value* storage = nullptr;
std::shared_ptr<ir::Type> declared_type;
bool is_array_param = false;
bool is_global = false;
bool is_const = false;
};
BlockFlow VisitBlockItemResult(SysYParser::BlockItemContext& item);
ir::Value* EvalExpr(SysYParser::ExpContext& expr);
void DeclareBuiltins();
void GenGlobals(SysYParser::CompUnitContext& cu);
void GenFunctionDecls(SysYParser::CompUnitContext& cu);
void GenFunctionBodies(SysYParser::CompUnitContext& cu);
void GenFuncDef(SysYParser::FuncDefContext& func);
void GenBlock(SysYParser::BlockContext& block);
void GenBlockItem(SysYParser::BlockItemContext& item);
void GenDecl(SysYParser::DeclContext& decl);
void GenConstDecl(SysYParser::ConstDeclContext& decl);
void GenVarDecl(SysYParser::VarDeclContext& decl);
void GenStmt(SysYParser::StmtContext& stmt);
ir::Value* GenExpr(SysYParser::ExpContext& expr);
ir::Value* GenAddExpr(SysYParser::AddExpContext& add);
ir::Value* GenMulExpr(SysYParser::MulExpContext& mul);
ir::Value* GenUnaryExpr(SysYParser::UnaryExpContext& unary);
ir::Value* GenPrimary(SysYParser::PrimaryContext& primary);
ir::Value* GenRelExpr(SysYParser::RelExpContext& rel);
ir::Value* GenEqExpr(SysYParser::EqExpContext& eq);
ir::Value* GenLValueAddress(SysYParser::LValContext& lval);
ir::Value* GenLValueValue(SysYParser::LValContext& lval);
void GenCond(SysYParser::CondContext& cond, ir::BasicBlock* true_block,
ir::BasicBlock* false_block);
void GenLOrCond(SysYParser::LOrExpContext& expr, ir::BasicBlock* true_block,
ir::BasicBlock* false_block);
void GenLAndCond(SysYParser::LAndExpContext& expr, ir::BasicBlock* true_block,
ir::BasicBlock* false_block);
ir::Value* CastValue(ir::Value* value, const std::shared_ptr<ir::Type>& dst_type);
ir::Value* ToBool(ir::Value* value);
ir::Value* DecayArrayPointer(ir::Value* array_ptr);
void EnterScope();
void ExitScope();
void EnsureInsertableBlock();
void DeclareLocal(const std::string& name, StorageEntry entry);
StorageEntry* LookupStorage(const std::string& name);
const StorageEntry* LookupStorage(const std::string& name) const;
size_t CountScalars(const std::shared_ptr<ir::Type>& type) const;
std::vector<int> FlatIndexToIndices(const std::shared_ptr<ir::Type>& type,
size_t flat_index) const;
void EmitArrayStore(ir::Value* base_ptr, const std::shared_ptr<ir::Type>& array_type,
size_t flat_index, ir::Value* value);
void ZeroInitializeLocalArray(ir::Value* base_ptr,
const std::shared_ptr<ir::Type>& array_type);
void EmitLocalArrayInit(ir::Value* base_ptr, const std::shared_ptr<ir::Type>& array_type,
SysYParser::InitValContext& init);
void EmitLocalConstArrayInit(ir::Value* base_ptr,
const std::shared_ptr<ir::Type>& array_type,
SysYParser::ConstInitValContext& init);
ir::ConstantValue* BuildGlobalInitializer(const std::shared_ptr<ir::Type>& type,
SysYParser::InitValContext* init);
ir::ConstantValue* BuildGlobalConstInitializer(
const std::shared_ptr<ir::Type>& type, SysYParser::ConstInitValContext* init);
ir::Module& module_;
const SemanticContext& sema_;
ir::Function* func_;
ir::Function* current_function_ = nullptr;
std::shared_ptr<ir::Type> current_return_type_;
ir::IRBuilder builder_;
// 名称绑定由 Sema 负责IRGen 只维护“声明 -> 存储槽位”的代码生成状态。
std::unordered_map<SysYParser::VarDefContext*, ir::Value*> storage_map_;
std::vector<std::unordered_map<std::string, StorageEntry>> local_scopes_;
std::unordered_map<std::string, StorageEntry> globals_;
std::vector<ir::BasicBlock*> break_targets_;
std::vector<ir::BasicBlock*> continue_targets_;
std::unordered_map<std::string, ConstantData> global_const_values_;
};
std::unique_ptr<ir::Module> GenerateIR(SysYParser::CompUnitContext& tree,

@ -1,30 +1,77 @@
// 基于语法树的语义检查与名称绑定。
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "SysYParser.h"
#include "ir/IR.h"
enum class SymbolKind { Object, Function };
struct ConstantData {
enum class Kind { Int, Float };
Kind kind = Kind::Int;
int int_value = 0;
float float_value = 0.0f;
static ConstantData FromInt(int value);
static ConstantData FromFloat(float value);
bool IsInt() const { return kind == Kind::Int; }
bool IsFloat() const { return kind == Kind::Float; }
int AsInt() const;
float AsFloat() const;
ConstantData CastTo(const std::shared_ptr<ir::Type>& dst_type) const;
std::shared_ptr<ir::Type> GetType() const;
};
struct SymbolInfo {
std::string name;
SymbolKind kind = SymbolKind::Object;
std::shared_ptr<ir::Type> type;
bool is_const = false;
bool is_global = false;
bool is_parameter = false;
bool is_array_parameter = false;
bool is_builtin = false;
SysYParser::ConstDefContext* const_def = nullptr;
SysYParser::VarDefContext* var_def = nullptr;
SysYParser::FuncDefContext* func_def = nullptr;
bool has_const_value = false;
ConstantData const_value{};
};
class SemanticContext {
public:
void BindVarUse(SysYParser::VarContext* use,
SysYParser::VarDefContext* decl) {
var_uses_[use] = decl;
}
SymbolInfo* CreateSymbol(SymbolInfo symbol);
void BindConstDef(SysYParser::ConstDefContext* node, const SymbolInfo* symbol);
void BindVarDef(SysYParser::VarDefContext* node, const SymbolInfo* symbol);
void BindFuncDef(SysYParser::FuncDefContext* node, const SymbolInfo* symbol);
void BindLVal(SysYParser::LValContext* node, const SymbolInfo* symbol);
void BindCall(SysYParser::UnaryExpContext* node, const SymbolInfo* symbol);
void SetExprType(const void* node, std::shared_ptr<ir::Type> type);
SysYParser::VarDefContext* ResolveVarUse(
const SysYParser::VarContext* use) const {
auto it = var_uses_.find(use);
return it == var_uses_.end() ? nullptr : it->second;
}
const SymbolInfo* ResolveConstDef(const SysYParser::ConstDefContext* node) const;
const SymbolInfo* ResolveVarDef(const SysYParser::VarDefContext* node) const;
const SymbolInfo* ResolveFuncDef(const SysYParser::FuncDefContext* node) const;
const SymbolInfo* ResolveLVal(const SysYParser::LValContext* node) const;
const SymbolInfo* ResolveCall(const SysYParser::UnaryExpContext* node) const;
std::shared_ptr<ir::Type> ResolveExprType(const void* node) const;
private:
std::unordered_map<const SysYParser::VarContext*,
SysYParser::VarDefContext*>
var_uses_;
std::vector<std::unique_ptr<SymbolInfo>> owned_symbols_;
std::unordered_map<const SysYParser::ConstDefContext*, const SymbolInfo*> const_defs_;
std::unordered_map<const SysYParser::VarDefContext*, const SymbolInfo*> var_defs_;
std::unordered_map<const SysYParser::FuncDefContext*, const SymbolInfo*> func_defs_;
std::unordered_map<const SysYParser::LValContext*, const SymbolInfo*> lvals_;
std::unordered_map<const SysYParser::UnaryExpContext*, const SymbolInfo*> calls_;
std::unordered_map<const void*, std::shared_ptr<ir::Type>> expr_types_;
};
// 目前仅检查:
// - 变量先声明后使用
// - 局部变量不允许重复定义
SemanticContext RunSema(SysYParser::CompUnitContext& comp_unit);

@ -1,17 +1,20 @@
// 极简符号表:记录局部变量定义点。
#pragma once
#include <string>
#include <unordered_map>
#include <vector>
#include "SysYParser.h"
#include "sem/Sema.h"
class SymbolTable {
public:
void Add(const std::string& name, SysYParser::VarDefContext* decl);
bool Contains(const std::string& name) const;
SysYParser::VarDefContext* Lookup(const std::string& name) const;
void EnterScope();
void ExitScope();
bool Declare(const std::string& name, const SymbolInfo* symbol);
const SymbolInfo* Lookup(const std::string& name) const;
const SymbolInfo* LookupCurrent(const std::string& name) const;
private:
std::unordered_map<std::string, SysYParser::VarDefContext*> table_;
std::vector<std::unordered_map<std::string, const SymbolInfo*>> scopes_;
};

@ -0,0 +1,86 @@
#!/bin/bash
# 批量测试所有.sy文件的语法解析
# 获取脚本所在目录假设脚本在项目根目录或scripts目录下
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# 尝试定位项目根目录
# 情况1: 脚本在项目根目录
if [ -f "$SCRIPT_DIR/build/bin/compiler" ]; then
PROJECT_ROOT="$SCRIPT_DIR"
# 情况2: 脚本在项目根目录下的 scripts/ 目录
elif [ -f "$SCRIPT_DIR/../build/bin/compiler" ]; then
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
# 情况3: 使用环境变量(如果设置了)
elif [ -n "$COMPILER_PROJECT_ROOT" ]; then
PROJECT_ROOT="$COMPILER_PROJECT_ROOT"
else
echo "错误:无法定位项目根目录"
echo "请将脚本放在项目根目录或 scripts/ 目录下,"
echo "或设置环境变量 COMPILER_PROJECT_ROOT"
exit 1
fi
# 设置默认路径,支持通过环境变量覆盖
test_dir="${TEST_DIR:-$PROJECT_ROOT/test/test_case/functional}"
compiler="${COMPILER_PATH:-$PROJECT_ROOT/build/bin/compiler}"
# 检查编译器是否存在
if [ ! -f "$compiler" ]; then
echo "错误:编译器不存在: $compiler"
echo "请先构建项目,或设置 COMPILER_PATH 环境变量指向编译器"
exit 1
fi
# 检查测试目录是否存在
if [ ! -d "$test_dir" ]; then
echo "错误:测试目录不存在: $test_dir"
echo "请设置 TEST_DIR 环境变量指向测试用例目录"
exit 1
fi
success_count=0
failed_count=0
failed_tests=()
echo "编译器: $compiler"
echo "测试目录: $test_dir"
echo ""
echo "开始测试所有.sy文件的语法解析..."
echo "========================================"
# 获取所有.sy文件并排序
while IFS= read -r test_file; do
echo -n "测试: $(basename "$test_file") ... "
# 运行解析测试,将输出重定向到/dev/null
"$compiler" --emit-parse-tree "$test_file" > /dev/null 2>&1
if [ $? -eq 0 ]; then
echo "✓ 成功"
((success_count++))
else
echo "✗ 失败"
((failed_count++))
# 保存相对路径而不是仅文件名,便于定位
failed_tests+=("${test_file#$PROJECT_ROOT/}")
fi
done < <(find "$test_dir" -name "*.sy" | sort)
echo "========================================"
echo "测试完成!"
echo "总测试数: $((success_count + failed_count))"
echo "成功: $success_count"
echo "失败: $failed_count"
if [ $failed_count -gt 0 ]; then
echo ""
echo "失败的测试用例:"
for test in "${failed_tests[@]}"; do
echo " - $test"
done
exit 1
fi
exit 0

@ -37,6 +37,13 @@ if [[ ! -x "$compiler" ]]; then
exit 1
fi
runtime_src="./sylib/sylib.c"
runtime_hdr="./sylib/sylib.h"
if [[ ! -f "$runtime_src" || ! -f "$runtime_hdr" ]]; then
echo "未找到 SysY 运行库: $runtime_src / $runtime_hdr" >&2
exit 1
fi
mkdir -p "$out_dir"
base=$(basename "$input")
stem=${base%.sy}
@ -56,11 +63,13 @@ if [[ "$run_exec" == true ]]; then
exit 1
fi
obj="$out_dir/$stem.o"
runtime_obj="$out_dir/sylib.o"
exe="$out_dir/$stem"
stdout_file="$out_dir/$stem.stdout"
actual_file="$out_dir/$stem.actual.out"
llc -filetype=obj "$out_file" -o "$obj"
clang "$obj" -o "$exe"
clang -c "$runtime_src" -o "$runtime_obj"
clang "$obj" "$runtime_obj" -o "$exe"
echo "运行 $exe ..."
set +e
if [[ -f "$stdin_file" ]]; then
@ -77,11 +86,15 @@ if [[ "$run_exec" == true ]]; then
if [[ -s "$stdout_file" ]] && (( $(tail -c 1 "$stdout_file" | wc -l) == 0 )); then
printf '\n'
fi
printf '%s\n' "$status"
printf '%s' "$status"
} > "$actual_file"
if [[ -f "$expected_file" ]]; then
if diff -u "$expected_file" "$actual_file"; then
expected_cmp="$out_dir/$stem.expected.norm"
actual_cmp="$out_dir/$stem.actual.norm"
perl -0pe 's/\r\n/\n/g; s/\r/\n/g; s/\n\z//' "$expected_file" > "$expected_cmp"
perl -0pe 's/\r\n/\n/g; s/\r/\n/g; s/\n\z//' "$actual_file" > "$actual_cmp"
if diff -u "$expected_cmp" "$actual_cmp"; then
echo "输出匹配: $expected_file"
else
echo "输出不匹配: $expected_file" >&2

@ -0,0 +1,161 @@
# Lab1 修改记录
## 1. 修改文件
- `src/antlr4/SysY.g4`
- `src/main.cpp`
- `include/sem/Sema.h`
- `src/sem/Sema.cpp`
- `include/irgen/IRGen.h`
- `src/irgen/IRGenDriver.cpp`
- `src/irgen/IRGenFunc.cpp`
- `src/irgen/IRGenDecl.cpp`
- `src/irgen/IRGenStmt.cpp`
- `src/irgen/IRGenExp.cpp`
- `solution/Lab1-设计方案.md`
- `solution/Lab1-修改记录.md`
- `solution/RUN.md`
- `solution/run_lab1_batch.sh`
- `test/test_case/negative/missing_semicolon.sy`
- `test/test_case/negative/missing_rparen.sy`
- `test/test_case/negative/unexpected_else.sy`
## 2. 文法扩展
将原来只支持:
- `int main() { ... }`
- 局部 `int` 标量声明
- 简单 `return a + b`
的最小文法,扩展为支持:
- 全局声明与多函数定义
- `const/int/float/void`
- 标量与数组声明
- 花括号初始化列表
- 函数形参、数组形参、函数调用
- `if/else/while/break/continue/return`
- 赋值语句、表达式语句、复合语句
- `+ - * / %`
- 比较与逻辑表达式
- 十进制/八进制/十六进制整数
- 十进制/十六进制浮点常量
## 3. 运行路径调整
修改 `src/main.cpp`
- 当命令行仅指定 `--emit-parse-tree` 时,打印语法树后直接返回。
这样可以避免:
- 已经通过语法分析的用例
- 因后续 `sema/irgen` 仍是最小子集而失败
这是 Lab1 场景下必要的阶段隔离。
## 4. 新文法下的接口适配
由于 `SysY.g4` 从“单一 `main` 函数”扩展为完整编译单元ANTLR 生成的 Context 接口发生变化,因此同步调整了:
- `sema` 中的变量使用绑定位置:从旧的最小表达式节点改为 `LValContext`
- `irgen` 中的遍历入口:改为适配 `compUnit/funcDef/block/stmt/exp`
- `irgen` 中的存储槽映射:按单个 `VarDefContext` 维护
- `irgen` 的表达式遍历:适配 `mulExp / unaryExp / primary / lVal`
说明:
- 这些修改的目标是“适配新文法并保持工程可编译”
- 并未把 `sema/irgen` 扩展到完整 SysY 2022
- 当前后续阶段仍主要支持最小 `int` 标量子集
## 5. ANTLR 重新生成
使用命令重新生成了 Lexer/Parser
```bash
mkdir -p build/generated/antlr4
java -jar third_party/antlr-4.13.2-complete.jar \
-Dlanguage=Cpp \
-visitor -no-listener \
-Xexact-output-dir \
-o build/generated/antlr4 \
src/antlr4/SysY.g4
```
## 6. 构建验证
执行:
```bash
cmake --build build -j 4
```
结果:
- 构建成功
## 7. 用例验证
已验证以下代表性样例可输出语法树:
- `test/test_case/functional/simple_add.sy`
- `test/test_case/functional/15_graph_coloring.sy`
- `test/test_case/functional/95_float.sy`
并批量验证:
```bash
./build/bin/compiler --emit-parse-tree test/test_case/functional/*.sy
./build/bin/compiler --emit-parse-tree test/test_case/performance/*.sy
```
结果:
- `test/test_case` 下全部 `.sy` 用例在 `--emit-parse-tree` 模式下通过
## 8. 批量脚本增强
补充更新了 `solution/run_lab1_batch.sh`
- 默认使用 `COMPILER_PARSE_ONLY=ON` 进行 Lab1 构建
- 新增可选参数 `--save-tree`
- 启用后,会在仓库根目录下创建 `test_tree/`
- 并按照 `functional/`、`performance/` 的目录结构保存每个样例对应的语法树
- 增加正例总数、通过数、失败数统计
- 增加反例总数、通过数、失败数统计
- 增加失败样例列表打印,便于直接汇报“覆盖样例数 + 通过率”
## 9. 反例测试补充
新增目录:
- `test/test_case/negative`
新增负例样例:
- `missing_semicolon.sy`
- `missing_rparen.sy`
- `unexpected_else.sy`
这些反例用于证明:
- 合法程序可以成功解析
- 非法程序会触发 `parse` 错误
- 报错信息包含位置信息,便于定位问题
同时同步更新了:
- `solution/RUN.md`
- `solution/Lab1-设计方案.md`
- `solution/Lab1-修改记录.md`
## 10. 已知边界
当前提交完成的是 Lab1 所需的“语法分析与语法树构建”。以下能力仍属于后续实验范围:
- 完整语义分析
- 完整 IR 生成
- 浮点/数组/控制流的中间表示支持
- 更完整的函数/作用域/类型系统检查

@ -0,0 +1,189 @@
# Lab1 设计方案
## 1. 目标
根据 `sysy2022.pdf` 中的 SysY 语言定义,扩展 `src/antlr4/SysY.g4`,使编译器能够:
1. 识别 SysY 2022 的主要词法单元与语法结构。
2. 通过 `--emit-parse-tree` 输出完整的 ANTLR 语法树。
3. 在 Lab1 仅要求语法树输出时,不被后续尚未完成的语义分析与 IR 生成阶段阻塞。
## 2. 总体方案
本次实现继续沿用“ANTLR 语法树直接输出”的路径,不额外引入 AST 层。整体分三部分:
1. 扩展 `SysY.g4`,覆盖 SysY 2022 所需语法。
2. 保持现有 `SyntaxTreePrinter` 输出格式不变,继续直接打印 ANTLR parse tree。
3. 调整 `main.cpp`:当只指定 `--emit-parse-tree` 时,打印后直接结束,避免进入当前仍是最小子集的 `sema/irgen`
## 3. 文法设计
### 3.1 顶层结构
采用标准 SysY 编译单元形式:
- `compUnit -> (decl | funcDef)+ EOF`
- 同时支持全局声明和函数定义
这样可以覆盖示例中的:
- 全局变量/常量
- 多函数程序
- `main` 前定义辅助函数
### 3.2 声明
声明分为两类:
- `constDecl`
- `varDecl`
两者都支持:
- 基本类型 `int` / `float`
- 多个定义项以逗号分隔
- 数组维度
- 标量初始化与花括号初始化列表
对应规则核心为:
- `constDef : Ident ('[' constExp ']')* '=' constInitVal`
- `varDef : Ident ('[' constExp ']')* ('=' initVal)?`
### 3.3 函数
函数定义支持:
- 返回类型 `void/int/float`
- 形参列表
- 数组形参
形参数组采用 SysY 常见形式:
- 第一维可省略长度:`int a[]`
- 后续维度显式给出:`int a[][N]`
### 3.4 语句
`stmt` 覆盖以下类型:
- 赋值语句
- 表达式语句/空语句
- 复合语句 `block`
- `if/else`
- `while`
- `break`
- `continue`
- `return`
这样能够覆盖测试用例中的:
- 单行 `if`
- 带 `else` 的分支
- 深层嵌套语句
- 循环控制语句
### 3.5 表达式优先级
表达式分层采用自底向上的优先级结构:
- `primary`
- `unaryExp`
- `mulExp`
- `addExp`
- `relExp`
- `eqExp`
- `lAndExp`
- `lOrExp`
其中:
- `exp` 保持为 `addExp`,与 SysY 中“普通表达式”和“条件表达式”分离的定义一致
- `cond` 使用 `lOrExp`
这样可以保证:
- 算术表达式优先级正确
- 比较与逻辑表达式能用于 `if` / `while`
- 函数实参仍符合 SysY 定义
### 3.6 左值与函数调用
通过:
- `lVal : Ident ('[' exp ']')*`
- `unaryExp : primary | Ident '(' funcRParams? ')' | unaryOp unaryExp`
支持:
- 普通变量使用
- 数组下标访问
- 函数调用
- 一元 `+ - !`
### 3.7 数字字面量
词法层将整数和浮点数统一归为 `Number`,便于当前前端最小实现继续复用已有“数字常量”处理方式,同时在词法规则内覆盖:
- 十进制整数
- 八进制整数
- 十六进制整数
- 十进制浮点数
- 十六进制浮点数
- 指数形式
可解析的典型形式包括:
- `0`
- `077`
- `0xff`
- `5.5`
- `03.1415926`
- `.33E+5`
- `1e-6`
- `0x1.921fb6p+1`
- `0x.AP-3`
## 4. 语法树输出方案
语法树输出继续使用现有 `SyntaxTreePrinter.cpp`
- 非终结符输出为规则名
- 终结符输出为 `TokenName: text`
- 使用树形 ASCII 缩进
本次不修改输出器,只保证文法规则名和 token 名能稳定反映 SysY 结构。
## 5. 与后续阶段的兼容策略
当前 `sema``irgen` 只支持极小子集。为避免 Lab1 被后续阶段阻塞,采用两层兼容策略:
1. `main.cpp` 在“只输出语法树”时提前返回。
2. 同时把 `sema/irgen` 的接口适配到新文法,使最小子集仍可编译通过。
3. `solution/run_lab1_batch.sh` 默认使用 `COMPILER_PARSE_ONLY=ON` 配置 CMake确保批量验证只依赖前端解析与语法树打印。
这样既满足 Lab1又不破坏当前工程的构建链路。
## 6. 验证方案
验证分三步:
1. 使用代表性样例检查语法树结构。
2. 批量遍历 `test/test_case/functional/*.sy``test/test_case/performance/*.sy`,执行 `./build/bin/compiler --emit-parse-tree`
3. 增加 `test/test_case/negative/*.sy` 反例,验证非法输入会触发 `parse` 错误。
另外补充一个批量自动化脚本 `solution/run_lab1_batch.sh`,用于统一执行:
- ANTLR 文件重新生成
- `parse-only` 模式下的 CMake 配置与编译
- 所有正例 `.sy` 用例的语法树回归
- 所有反例 `.sy` 用例的错误回归
- 在需要时通过 `--save-tree` 将语法树保存到 `test_tree/`
- 输出正例/反例/总体统计信息与失败列表
验证目标是:
- 文法能接受测试目录中的 SysY 程序
- 语法树可稳定输出
- 非法输入能稳定报出 `parse` 错误
- 工程可以重新生成 ANTLR 文件并成功编译

@ -0,0 +1,333 @@
# Lab2 修改记录
## 1. 修改目标
根据 [doc/Lab2-中间表示生成.md](../doc/Lab2-中间表示生成.md) 的要求,完成 SysY 前端到 LLVM 风格 IR 的主链路扩展,使编译器能够:
1. 基于现有 ANTLR parse tree 完成语义分析。
2. 生成可被 `llc` / `clang` 接受的 IR。
3. 通过运行库和验证脚本完成 “生成 IR -> 链接运行 -> 输出比对”。
本次实现继续沿用:
1. `parse tree -> Sema -> IRGen -> IRPrinter`
2. 局部变量采用 `alloca/store/load` 内存模型
3. 不在 Lab2 中引入独立 AST
## 2. 设计修订
在实现前,对 [Lab2-设计方案.md](./Lab2-设计方案.md) 做了以下修订:
1. 明确 SysY 源语言继续只接受 `funcDef`,不额外引入用户自定义函数声明语法。
2. 将“模块级外部函数声明支持”与“源语言语法支持”区分开。
3. 将 `sylib` 运行库补全和 `verify_ir.sh` 自动链接运行库纳入阶段 0 前置。
4. 将 `functional``performance` 全量通过定义为阶段 C 收口后的总目标,不作为 A1/A2/B 的单阶段硬门槛。
5. 统一错误归因口径:
- `parse`
- `sema`
- `irgen`
- `llvm-link/run`
## 3. 代码改动
### 3.1 IR 层扩展
修改文件:
1. `include/ir/IR.h`
2. `src/ir/Type.cpp`
3. `src/ir/Value.cpp`
4. `src/ir/Context.cpp`
5. `src/ir/GlobalValue.cpp`
6. `src/ir/Function.cpp`
7. `src/ir/Module.cpp`
8. `src/ir/BasicBlock.cpp`
9. `src/ir/Instruction.cpp`
10. `src/ir/IRBuilder.cpp`
11. `src/ir/IRPrinter.cpp`
主要改动:
1. 类型系统从最小 `void/i32/i32*` 扩展到:
- `void`
- `i1`
- `i32`
- `float`
- `pointer`
- `array`
- `function`
2. 值系统新增:
- `ConstantFloat`
- `ConstantArray`
- `Argument`
- `GlobalVariable`
3. 指令系统补齐:
- 整数算术:`add/sub/mul/sdiv/srem`
- 浮点算术:`fadd/fsub/fmul/fdiv`
- 比较:`icmp/fcmp`
- 控制流:`br/condbr`
- 调用:`call`
- 地址计算:`gep`
- 类型转换:`sitofp/fptosi/zext`
- 存储与返回:`alloca/load/store/ret`
4. `IRBuilder` 从按 `i32/i32*` 写死的专用接口改为按 `Type` 驱动的通用接口。
5. `IRPrinter` 输出调整为 LLVM 可接受文本格式。
6. SSA 临时名生成改为 `%t0/%t1/...`,避免 LLVM 对纯数字 SSA 命名的歧义。
7. 浮点常量打印改为 LLVM 可接受的十六进制形式。
8. `alloca` 统一插入函数入口块,避免循环内重复分配导致的栈增长问题。
9. `GEP` 结果类型推导修正,支持数组对象、数组指针和多维数组访问。
### 3.2 Sema 重构
修改文件:
1. `include/sem/Sema.h`
2. `include/sem/SymbolTable.h`
3. `src/sem/SymbolTable.cpp`
4. `src/sem/Sema.cpp`
主要改动:
1. `SemanticContext` 从“变量 use -> decl”扩展为统一语义结果容器记录
- 声明绑定
- 函数绑定
- 调用绑定
- 表达式静态类型
2. `SymbolTable` 升级为作用域栈,支持:
- 全局作用域
- 函数作用域
- 块作用域
- 同层去重和内层遮蔽
3. `RunSema` 改为两遍式:
- 第一遍收集顶层对象和函数签名
- 第二遍检查函数体
4. 注入运行库函数签名,包括:
- `getint/getch/getfloat/getarray/getfarray`
- `putint/putch/putfloat/putarray/putfarray`
- `starttime/stoptime`
5. 增加语义检查:
- 函数调用实参数量与类型匹配
- 返回值类型匹配
- 赋值左值合法性
- 数组维度和下标检查
- `break/continue` 循环上下文检查
- 表达式类型推导和 `int/float` 转换规则
6. 常量表达式求值整合到 `Sema.cpp`,用于:
- 数组维度
- `const` 初始化
- 全局初始化
7. 修正常量数组初始化检查,允许花括号内部出现标量叶子表达式。
### 3.3 IRGen 扩展
修改文件:
1. `include/irgen/IRGen.h`
2. `src/irgen/IRGenDriver.cpp`
3. `src/irgen/IRGenFunc.cpp`
4. `src/irgen/IRGenDecl.cpp`
5. `src/irgen/IRGenStmt.cpp`
6. `src/irgen/IRGenExp.cpp`
主要改动:
1. 顶层生成分成两步:
- 先建立函数签名、全局对象和运行库声明
- 再逐函数填充函数体
2. 支持:
- 全局变量与全局常量
- 局部变量与局部常量
- 数组对象与数组初始化
- 数组形参
- 普通函数调用与运行库调用
- `if/else`
- `while`
- `break/continue`
- `return`
3. 表达式生成拆分为:
- `GenExpr`
- `GenLValueAddress`
- `GenCond`
4. 条件表达式和短路逻辑直接降到控制流,不走“先算整型值再判断”的路径。
5. 多维数组访问统一走逐维 `GEP`
6. `int/float` 混合表达式按规则插入 `sitofp/fptosi`
7. 修正一元逻辑非 `!` 的 IR 生成,保证其语义为真正的布尔取反。
### 3.4 运行库与验证脚本
修改文件:
1. `sylib/sylib.h`
2. `sylib/sylib.c`
3. `scripts/verify_ir.sh`
4. `solution/run_lab2_batch.sh`
主要改动:
1. 补全 `sylib` 头文件与 C 实现。
2. `verify_ir.sh` 在链接时自动编译并链接 `sylib/sylib.c`
3. 输出比对增加换行归一化,兼容测试集中的 `CRLF/LF` 差异和末尾换行差异。
4. 新增 `run_lab2_batch.sh`,用于 Lab2 的全量构建、批量回归和结果统计。
## 4. 覆盖的阶段目标
本次实现已覆盖设计方案中的全部阶段目标:
1. 阶段 0IR 基础设施、运行库、验证链路
2. 阶段 A1函数、调用、全局 `int`
3. 阶段 A2控制流、比较、短路、循环跳转
4. 阶段 B数组、初始化、多维下标、数组运行库
5. 阶段 C`float`、浮点比较、`int <-> float` 转换、浮点运行库
## 5. 验证记录
### 5.1 构建验证
执行命令:
```bash
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DCOMPILER_PARSE_ONLY=OFF
cmake --build build -j 4
```
结果:
1. 构建成功。
2. `./build/bin/compiler --emit-ir` 可正常生成 IR。
### 5.2 单样例和阶段样例验证
执行过的阶段代表样例包括:
1. `simple_add.sy`
2. `09_func_defn.sy`
3. `29_break.sy`
4. `36_op_priority2.sy`
5. `if-combine3.sy`
6. `22_matrix_multiply.sy`
7. `15_graph_coloring.sy`
8. `01_mm2.sy`
9. `02_mv3.sy`
10. `03_sort1.sy`
11. `transpose0.sy`
12. `95_float.sy`
13. `large_loop_array_2.sy`
14. `vector_mul3.sy`
结果:
1. `--emit-ir` 可生成合法 IR。
2. `verify_ir.sh --run` 可完成链接、运行与输出比对。
### 5.3 全量正例回归
执行命令:
```bash
for case in $(find test/test_case/functional test/test_case/performance -maxdepth 1 -name '*.sy' | sort); do
./scripts/verify_ir.sh "$case" test/test_result/lab2_ir --run || exit 1
done
```
以及新增批量脚本:
```bash
./solution/run_lab2_batch.sh
```
结果:
1. `functional`11/11 通过
2. `performance`10/10 通过
3. 总计21/21 通过
### 5.4 额外自检
1. 运行库调用自检:
- `putint(42)` 可正常生成 IR、链接运行并输出 `42`
2. 语义错误归因自检:
- `break` 出现在循环外时,能够在 `sema` 阶段报错,而不是落到 `irgen` 或 LLVM 工具链
## 6. 当前边界说明
1. Lab2 的目标是 `--emit-ir` 链路,不是后端汇编链路。
2. MIR/后端没有同步扩展完整功能,只保持了工程可编译。
3. 本次实现未引入独立 AST也未实现 SSA/phi 构造和优化。
## 7. 结论
本次修改后Lab2 已完成从 SysY 语法树到 LLVM 风格 IR 的主链路扩展,支持函数、控制流、数组、初始化、浮点与运行库调用,并且通过了当前仓库 `functional``performance` 正例全集的运行验证。
## 附录2026-04-08 增量修复
本次增量修复补齐了两处会影响 Lab2 语义一致性的缺陷。
### A. 全局数组标量初始化补齐
问题:
1. `Sema` 已允许数组初始化器直接写单个表达式。
2. 局部数组初始化路径也能把该表达式落到首元素,其他元素补零。
3. 但全局数组在 `BuildGlobalInitializer` 中只处理花括号初始化,导致 `int a[3] = 1;` 被错误生成为全零数组。
修复:
1. 在 `src/irgen/IRGenDecl.cpp` 中为数组类型增加 `init->exp()` 分支。
2. 将该表达式求值后写入扁平化初始化列表第 0 个元素,其余元素继续保持零初始化。
结果:
1. `int a[3] = 1;` 现在会生成 `@a = global [3 x i32] [i32 1, i32 0, i32 0]`
2. `float b[2] = 2.5;` 现在会生成首元素为 `2.5`、其余元素为 `0.0`
### B. 常量表达式 `%` 类型约束对齐
问题:
1. 运行时表达式路径已经限制 `%` 仅支持 `int`
2. 但常量求值路径会直接执行 `AsInt() % AsInt()`,从而把 `float` 静默截断后继续通过。
修复:
1. 在 `src/sem/Sema.cpp` 的常量求值路径中加入 `%``int` 类型检查。
2. 在 `src/irgen/IRGenDecl.cpp` 的全局常量初始化求值路径中加入同样的检查。
结果:
1. 普通表达式和常量表达式对 `%` 的语义约束保持一致。
2. `const int a = 5 % 2.0;` 现在会在 `sema` 阶段直接报错,而不是被静默接受。
### C. 本次新增回归样例
1. `test/test_case/functional/06_global_arr_scalar_init.sy`
2. `test/test_case/functional/06_global_arr_scalar_init.out`
3. `test/test_case/negative/lab2_const_mod_float.sy`
### D. 本次定向验证
执行命令:
```bash
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DCOMPILER_PARSE_ONLY=OFF
cmake --build build -j 4
./scripts/verify_ir.sh test/test_case/functional/06_global_arr_scalar_init.sy test/test_result/lab2_ir --run
./build/bin/compiler --emit-ir test/test_case/negative/lab2_const_mod_float.sy
```
结果:
1. 正例 `06_global_arr_scalar_init.sy` 成功生成 IR、链接运行并匹配期望退出码 `3`
2. 负例 `lab2_const_mod_float.sy` 按预期报错:`[error] [sema] % 只支持 int`

@ -0,0 +1,437 @@
# Lab2 设计方案(修订版)
## 1. 目标
根据 [doc/Lab2-中间表示生成.md](../doc/Lab2-中间表示生成.md) 的要求,在当前最小编译器框架上扩展 Sema -> IRGen -> IRPrinter 链路,使更多 SysY 语法能够被正确翻译为 LLVM 风格 IR并通过运行验证完成 IR -> 目标程序 -> 输出比对。
本次 Lab2 采用分阶段、可回归、可归因的推进策略,核心原则如下:
1. 先补基础设施,再补语法覆盖,避免阶段跳步。
2. 每阶段都定义最小样例集与退出条件,避免只做点测。
3. 错误分类保持一致:
- 语法错误归 parse
- 语义错误归 sema
- 生成能力缺口归 irgen
- LLVM 文本、运行库链接或运行结果问题归 llvm-link/run
## 2. 当前实现现状与约束
结合当前代码,现状可概括为:
1. Sema 只覆盖最小名称绑定,范围偏向 main 函数内局部变量。
2. IRGen 只覆盖最小顺序语句流程,核心是局部 int、基础算术、return。
3. IR 类型与指令集合都是教学最小子集,无法直接承载完整 Lab2 功能。
因此Lab2 不能只改某一个目录,必须协同扩展:
1. IR 层
- include/ir/IR.h
- src/ir
2. 语义层
- include/sem/Sema.h
- include/sem/SymbolTable.h
- src/sem
3. 生成层
- include/irgen/IRGen.h
- src/irgen
## 3. 总体设计原则
### 3.1 保持 parse tree 直连方案
继续基于 ANTLR parse tree不引入独立 AST。接口仍保持
1. RunSema(CompUnit) -> SemanticContext
2. GenerateIR(CompUnit, SemanticContext) -> Module
理由:降低结构性重构成本,把精力聚焦在 Lab2 的语义补全与 IR 生成补全。
### 3.2 继续采用内存模型
局部变量和形参默认走 alloca/store/load 模型,不在 Lab2 引入 SSA 构造与 phi 优化。理由:优先保证正确性与可运行性,优化类目标留给后续实验。
### 3.3 分阶段门禁
每阶段必须满足三类门禁:
1. 该阶段目标样例通过。
2. 前阶段样例无回归。
3. 失败能快速归因到 parse/sema/irgen/llvm-link/run。
## 4. 阶段划分(重排后)
### 4.1 阶段 0基础设施硬前置
这是后续所有阶段的阻塞前置阶段,未完成不得进入 A1。
目标:
1. 扩展 IR 类型系统到最小可用集合:
- void
- i1
- i32
- float
- pointer
- array
- function
2. 扩展关键指令集合:
- 算术补齐 sdiv、srem
- 比较补齐 icmp、fcmp
- 控制流补齐 br、condbr
- 调用补齐 call
- 地址计算补齐 gep
- 转换补齐 sitofp、fptosi、zext
3. IRBuilder 与 IRPrinter 同步扩展,避免出现能生成但不能打印、或能打印但 LLVM 不接受。
4. Sema 架构改为两遍式骨架:
- 第一遍收集顶层符号(函数签名、全局对象、运行库函数)
- 第二遍检查函数体(类型、调用、控制流上下文等)
5. SymbolTable 升级为作用域栈,支持全局/函数/块作用域和遮蔽规则。
6. 运行库与验证环境前置补齐:
- 完整提供 `sylib/sylib.h``sylib/sylib.c`
- `verify_ir.sh` 在链接阶段自动带上运行库
- 运行结果比对需要容忍测试集中的换行风格差异
阶段样例:
1. simple_add
退出条件:
1. simple_add 不回归。
2. 新增 IR 元素可被 llc/clang 接受。
3. parse/sema/irgen 错误分类可区分。
### 4.2 阶段 A1函数与调用主链路依赖阶段 0
目标:
1. 用户函数定义支持,以及 IR/Module 层的外部函数声明支持。
2. 形参与返回类型检查。
3. 函数调用与实参数量/类型检查。
4. 全局 int 标量与全局初始化。
5. 运行库函数声明注册与调用生成。
实现要点:
1. SysY 源语言继续只接受 `funcDef`,不额外引入用户自定义函数声明语法。
2. Module 区分函数声明和函数定义。
3. 运行库函数和其他外部函数通过模块级声明接入,而不是扩展源语言语法。
4. 形参映射为 Argument再按内存模型落地到槽位。
5. Sema 在调用点完成签名匹配,不把类型错误拖到 IRGen。
阶段样例:
1. simple_add
2. 09_func_defn
退出条件:
1. 阶段样例 --emit-ir 成功。
2. 阶段样例 --run 输出与退出码匹配。
3. 无阶段 0 回归。
### 4.3 阶段 A2控制流与条件主链路依赖 A1
目标:
1. 支持赋值语句、表达式语句、块语句。
2. 支持 if/else。
3. 支持 while。
4. 支持 break/continue含循环嵌套场景
5. 支持比较与逻辑条件生成。
实现要点:
1. 明确三类表达式接口职责:
- GenRValue
- GenLValueAddr
- GenCond
2. 控制流模板固定化:
- ifcond -> then -> else(可选) -> merge
- whilecond -> body -> exit
- break 绑定 exit
- continue 绑定 cond
阶段样例:
1. 29_break
2. 36_op_priority2
3. if-combine3
退出条件:
1. 阶段样例 --run 全通过。
2. 短路与循环跳转行为正确。
3. 无 A1 与阶段 0 回归。
### 4.4 阶段 B数组与初始化依赖 A2
目标:
1. 一维/多维数组类型与对象表示。
2. 全局数组与局部数组支持。
3. 数组形参支持。
4. 下标访问通过 GEP 生成。
5. 初始化器递归展开与补零规则落地。
6. getarray/putarray 相关调用与类型检查支持。
实现要点:
1. 数组对象与数组指针区分清晰。
2. 下标访问逐维计算,避免扁平化误用。
3. 局部数组与全局数组初始化路径分离。
阶段样例:
1. 22_matrix_multiply
2. 15_graph_coloring
3. 01_mm2
4. 02_mv3
5. transpose0
6. 03_sort1
退出条件:
1. 数组样例链路通过。
2. 初始化补零行为与预期一致。
3. 无 A2 及之前回归。
### 4.5 阶段 Cfloat 与混合类型(依赖 B
目标:
1. float 类型与浮点常量。
2. 浮点运算与浮点比较。
3. int <-> float 隐式转换。
4. getfloat/putfloat/getfarray/putfarray 支持。
实现要点:
1. 明确定义类型提升规则,避免不同模块各自推断。
2. 转换插入策略统一:
- 算术场景的提升
- 赋值场景的收窄/转换
- 调用实参与形参匹配转换
阶段样例:
1. 95_float
2. large_loop_array_2
3. vector_mul3
退出条件:
1. 浮点样例链路通过。
2. 类型错误优先在 sema 阶段暴露。
3. 无 B 及之前回归。
## 5. IR 层详细设计
### 5.1 类型系统
类型至少覆盖:
1. Void
2. Int1
3. Int32
4. Float32
5. Pointer(element_type)
6. Array(element_type, extent)
7. Function(return_type, param_types)
要求:
1. 类型构造和查询接口统一。
2. 现有按 `i32/i32*` 写死的接口需要升级为按 `Type` 驱动的通用实现。
3. IRPrinter 打印格式与 LLVM 文本兼容。
4. 函数签名可完整表达返回值与参数列表。
### 5.2 值与对象系统
至少补齐:
1. ConstantFloat
2. ConstantArray
3. Argument
4. GlobalVariable 或等价全局对象表示
Module 层至少支持:
1. 函数声明集合
2. 函数定义集合
3. 全局变量/常量对象集合
### 5.3 指令与 Builder
Builder 最小接口建议包括:
1. CreateBr
2. CreateCondBr
3. CreateCall
4. CreateICmp
5. CreateFCmp
6. CreateGEP
7. CreateSIToFP
8. CreateFPToSI
9. CreateZExt
10. CreateAlloca(type)
要求:
1. 新增指令必须同步到 IRPrinter。
2. 输出 IR 必须可被 llc/clang 接受。
## 6. Sema 详细设计
### 6.1 SemanticContext 扩展
除变量绑定外,至少包含:
1. 函数绑定信息
2. 表达式静态类型
3. 左值可赋值性
4. 数组维度/退化信息
5. 调用点签名匹配结果
### 6.2 符号表规则
采用作用域栈,支持:
1. Declare同层去重
2. Lookup由内向外
3. EnterScope / ExitScope
覆盖范围:
1. 全局作用域
2. 函数作用域
3. 块作用域
### 6.3 两遍式语义流程
第一遍:
1. 收集顶层函数签名
2. 收集全局变量/常量
3. 注入运行库函数签名
第二遍:
1. 校验函数体
2. 校验 return 与函数返回类型
3. 校验调用参数个数与类型
4. 校验数组下标与维度
5. 校验 break/continue 上下文
6. 计算常量表达式(用于维度与初始化)
## 7. IRGen 详细设计
### 7.1 生成流程
两阶段生成:
1. 顶层扫描,建立函数与全局对象骨架。
2. 逐函数填充基本块和指令。
### 7.2 函数状态
函数级状态建议包括:
1. current_func
2. current_bb
3. return_bb
4. return_slot非 void 可选)
5. break_targets 栈
6. continue_targets 栈
7. 局部存储槽位环境
### 7.3 表达式与语句职责拆分
表达式:
1. GenRValue
2. GenLValueAddr
3. GenCond
语句:
1. 声明
2. 赋值
3. 表达式语句
4. return
5. if/else
6. while
7. break
8. continue
9. block
## 8. 验证与回归方案
### 8.1 单样例验证
用于快速定位:
1. 编译器生成 IR 是否成功
2. IR 文本是否基本正确
### 8.2 阶段样例回归
每阶段必须执行对应样例集,不得只跑一个样例。
### 8.3 全量回归
阶段内只要求回归相关子集并记录失败样例。
当前仓库 `functional``performance` 正例全集覆盖,属于阶段 C 完成后的总目标,不作为 A1/A2/B 的单阶段硬门槛。
### 8.4 失败归因矩阵
1. parse 失败:语法规则或词法/语法处理问题。
2. sema 失败:名称绑定、类型检查、上下文约束问题。
3. irgen 失败:语义到 IR 映射未实现或实现错误。
4. llvm-link/run 失败IR 文本不合法、链接缺失、运行行为错误。
### 8.5 建议验证命令模板
单样例:
```bash
./build/bin/compiler --emit-ir test/test_case/functional/simple_add.sy
./scripts/verify_ir.sh test/test_case/functional/simple_add.sy test/test_result/function/ir --run
```
阶段样例循环回归(示意):
```bash
for f in test/test_case/functional/09_func_defn.sy test/test_case/functional/29_break.sy; do
./scripts/verify_ir.sh "$f" test/test_result/function/ir --run || exit 1
done
```
## 9. 设计取舍
1. 不引入独立 AST。优先保证 Lab2 可落地与可验证,降低重构成本。
2. 继续采用内存模型。减少实现复杂度,先确保正确性。
3. 优先保证 LLVM 可接受性。内部抽象服从外部工具链约束。
4. 分阶段推进。降低单次改动规模,便于调试与协作。
5. 明确排除范围。Lab2 不承担 SSA/phi 构造和优化类目标,相关工作放到后续实验。
## 10. 最终验收目标
Lab2 完成后应达到:
1. Sema 能完成核心名称绑定与类型检查。
2. IRGen 能覆盖 Lab2 目标语法并生成合法 LLVM 风格 IR。
3. 关键样例能通过运行比对。
4. 形成稳定回归流程,支持后续 Lab3 对接。
5. 阶段 C 收口后,当前仓库 `functional``performance` 正例全集应能完成 IR 生成、链接与运行比对。
在此基础上Lab3 再继续推进后端相关能力,包括指令选择、栈帧与寄存器分配。

@ -0,0 +1,171 @@
# Lab1 运行说明
## 1. 环境要求
建议环境中具备以下工具:
- `java`
- `cmake`
- `g++` / `clang++`
- `make``ninja`
可先检查:
```bash
java -version
cmake --version
g++ --version
```
## 2. 手动生成 ANTLR 代码
在仓库根目录执行:
```bash
mkdir -p build/generated/antlr4
java -jar third_party/antlr-4.13.2-complete.jar \
-Dlanguage=Cpp \
-visitor -no-listener \
-Xexact-output-dir \
-o build/generated/antlr4 \
src/antlr4/SysY.g4
```
## 3. 手动配置与编译
在仓库根目录执行:
```bash
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release
cmake --build build -j "$(nproc)"
```
编译成功后,可执行文件位于:
```bash
./build/bin/compiler
```
## 4. 单个样例运行
### 4.1 仅输出语法树
```bash
./build/bin/compiler --emit-parse-tree test/test_case/functional/simple_add.sy
```
### 4.2 验证最小 IR 仍可工作
```bash
./build/bin/compiler --emit-ir test/test_case/functional/simple_add.sy
```
## 5. 批量测试
我提供了一个批量测试脚本:
```bash
./solution/run_lab1_batch.sh
```
该脚本默认使用 **parse-only 构建模式**
```bash
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DCOMPILER_PARSE_ONLY=ON
```
这样即使 `sem` / `irgen` / `mir` 还没有完成Lab1 的语法树验证也不会被后续实验模块阻塞。
如果希望在批量测试时把每个样例的语法树保存到 `test_tree/` 目录,可以加可选项:
```bash
./solution/run_lab1_batch.sh --save-tree
```
该脚本会自动完成:
1. 重新生成 `build/generated/antlr4` 下的 ANTLR 文件
2. 执行 `cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DCOMPILER_PARSE_ONLY=ON`
3. 执行 `cmake --build build -j "$(nproc)"`
4. 批量测试 `test/test_case/functional/*.sy`
5. 批量测试 `test/test_case/performance/*.sy`
6. 批量测试 `test/test_case/negative/*.sy`,确认非法输入会触发 `parse` 报错
若使用 `--save-tree`,还会额外:
7. 在仓库根目录下创建 `test_tree/`
8. 将语法树按测试集目录结构保存,例如:
```bash
test_tree/functional/simple_add.tree
test_tree/performance/fft0.tree
```
脚本结束时会输出:
- 正例总数 / 通过数 / 失败数
- 反例总数 / 通过数 / 失败数
- 总覆盖样例数与整体通过情况
- 失败样例列表
若某个用例失败,脚本会打印失败用例名并返回非零退出码。
## 6. 反例测试说明
新增了负例目录:
```bash
test/test_case/negative
```
当前提供了 3 个非法样例:
- `missing_semicolon.sy`
- `missing_rparen.sy`
- `unexpected_else.sy`
这些样例用于验证:
- 合法输入能够成功输出语法树
- 非法输入能够触发 `parse` 报错
- 报错信息带有位置,便于定位问题
## 7. 常用附加命令
### 7.1 查看帮助
```bash
./build/bin/compiler --help
```
### 7.2 指定单个样例文件
```bash
./build/bin/compiler --emit-parse-tree <your_case.sy>
```
### 7.3 重新从零开始构建
```bash
rm -rf build
mkdir -p build/generated/antlr4
java -jar third_party/antlr-4.13.2-complete.jar \
-Dlanguage=Cpp \
-visitor -no-listener \
-Xexact-output-dir \
-o build/generated/antlr4 \
src/antlr4/SysY.g4
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release
cmake --build build -j "$(nproc)"
```
## 8. 结果判定
Lab1 主要检查点是:
- 合法 SysY 程序可以被 `SysY.g4` 成功解析
- `--emit-parse-tree` 能输出语法树
- `test/test_case` 下正例可以批量通过语法树模式
- `test/test_case/negative` 下反例会稳定触发 `parse` 报错
本项目当前实现中Lab1 的重点是“语法分析与语法树构建”,不是完整语义分析和完整 IR/汇编支持。

@ -0,0 +1,145 @@
#!/usr/bin/env bash
set -euo pipefail
shopt -s nullglob
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
BUILD_DIR="$ROOT_DIR/build"
ANTLR_DIR="$BUILD_DIR/generated/antlr4"
JAR_PATH="$ROOT_DIR/third_party/antlr-4.13.2-complete.jar"
GRAMMAR_PATH="$ROOT_DIR/src/antlr4/SysY.g4"
COMPILER="$BUILD_DIR/bin/compiler"
SAVE_TREE=false
TREE_DIR="$ROOT_DIR/test_tree"
POSITIVE_CASES=(
"$ROOT_DIR"/test/test_case/functional/*.sy
"$ROOT_DIR"/test/test_case/performance/*.sy
)
NEGATIVE_CASES=(
"$ROOT_DIR"/test/test_case/negative/*.sy
)
positive_total=0
positive_passed=0
positive_failed=0
negative_total=0
negative_passed=0
negative_failed=0
failed_cases=()
print_summary() {
local total passed failed
total=$((positive_total + negative_total))
passed=$((positive_passed + negative_passed))
failed=$((positive_failed + negative_failed))
echo
echo "Summary:"
echo " Positive cases: total=$positive_total, passed=$positive_passed, failed=$positive_failed"
echo " Negative cases: total=$negative_total, passed=$negative_passed, failed=$negative_failed"
echo " Overall: total=$total, passed=$passed, failed=$failed"
if (( ${#failed_cases[@]} > 0 )); then
echo "Failed cases:"
printf ' - %s\n' "${failed_cases[@]}"
fi
}
while [[ $# -gt 0 ]]; do
case "$1" in
--save-tree)
SAVE_TREE=true
;;
*)
echo "Unknown option: $1" >&2
echo "Usage: $0 [--save-tree]" >&2
exit 1
;;
esac
shift
done
echo "[1/4] Generating ANTLR sources..."
mkdir -p "$ANTLR_DIR"
java -jar "$JAR_PATH" \
-Dlanguage=Cpp \
-visitor -no-listener \
-Xexact-output-dir \
-o "$ANTLR_DIR" \
"$GRAMMAR_PATH"
echo "[2/4] Configuring CMake..."
cmake -S "$ROOT_DIR" -B "$BUILD_DIR" -DCMAKE_BUILD_TYPE=Release -DCOMPILER_PARSE_ONLY=ON
echo "[3/4] Building project..."
cmake --build "$BUILD_DIR" -j "$(nproc)"
echo "[4/4] Running parse-tree tests in parse-only mode..."
if [[ "$SAVE_TREE" == true ]]; then
rm -rf "$TREE_DIR"
mkdir -p "$TREE_DIR"
fi
for case_file in "${POSITIVE_CASES[@]}"; do
((positive_total += 1))
if [[ "$SAVE_TREE" == true ]]; then
rel_path="${case_file#"$ROOT_DIR"/test/test_case/}"
rel_dir="$(dirname "$rel_path")"
stem="$(basename "${case_file%.sy}")"
out_dir="$TREE_DIR/$rel_dir"
out_file="$out_dir/$stem.tree"
mkdir -p "$out_dir"
if ! "$COMPILER" --emit-parse-tree "$case_file" >"$out_file" 2>/tmp/lab1_parse.err; then
echo "FAIL: $case_file"
cat /tmp/lab1_parse.err
rm -f "$out_file"
((positive_failed += 1))
failed_cases+=("$case_file")
else
echo "PASS: $case_file -> $out_file"
((positive_passed += 1))
fi
else
if ! "$COMPILER" --emit-parse-tree "$case_file" >/dev/null 2>/tmp/lab1_parse.err; then
echo "FAIL: $case_file"
cat /tmp/lab1_parse.err
((positive_failed += 1))
failed_cases+=("$case_file")
else
echo "PASS: $case_file"
((positive_passed += 1))
fi
fi
done
if (( ${#NEGATIVE_CASES[@]} > 0 )); then
echo
echo "Running negative parse tests..."
for case_file in "${NEGATIVE_CASES[@]}"; do
((negative_total += 1))
if "$COMPILER" --emit-parse-tree "$case_file" >/tmp/lab1_negative.out 2>/tmp/lab1_negative.err; then
echo "FAIL: $case_file (expected parse failure, but parsing succeeded)"
((negative_failed += 1))
failed_cases+=("$case_file")
else
if grep -q '^\[error\] \[parse\]' /tmp/lab1_negative.err; then
echo "PASS: $case_file -> expected parse error"
((negative_passed += 1))
else
echo "FAIL: $case_file (did not report parse error as expected)"
cat /tmp/lab1_negative.err
((negative_failed += 1))
failed_cases+=("$case_file")
fi
fi
done
fi
print_summary
if (( positive_failed + negative_failed > 0 )); then
echo "Batch test finished with failures."
exit 1
fi
echo "Batch test passed."

@ -0,0 +1,173 @@
#!/usr/bin/env bash
set -euo pipefail
shopt -s nullglob
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
BUILD_DIR="$ROOT_DIR/build"
ANTLR_DIR="$BUILD_DIR/generated/antlr4"
JAR_PATH="$ROOT_DIR/third_party/antlr-4.13.2-complete.jar"
GRAMMAR_PATH="$ROOT_DIR/src/antlr4/SysY.g4"
OUT_ROOT="$ROOT_DIR/test/test_result/lab2_ir_batch"
RUN_FUNCTIONAL=true
RUN_PERFORMANCE=true
DO_BUILD=true
functional_total=0
functional_passed=0
functional_failed=0
performance_total=0
performance_passed=0
performance_failed=0
failed_cases=()
usage() {
cat <<'EOF'
Usage: ./solution/run_lab2_batch.sh [options]
Options:
--no-build Skip ANTLR generation and project rebuild
--functional-only Run only test/test_case/functional/*.sy
--performance-only Run only test/test_case/performance/*.sy
--output-dir <dir> Set output directory for generated IR and logs
--help Show this help message
EOF
}
print_summary() {
local total passed failed
total=$((functional_total + performance_total))
passed=$((functional_passed + performance_passed))
failed=$((functional_failed + performance_failed))
echo
echo "Summary:"
echo " Functional cases: total=$functional_total, passed=$functional_passed, failed=$functional_failed"
echo " Performance cases: total=$performance_total, passed=$performance_passed, failed=$performance_failed"
echo " Overall: total=$total, passed=$passed, failed=$failed"
if (( ${#failed_cases[@]} > 0 )); then
echo "Failed cases:"
printf ' - %s\n' "${failed_cases[@]}"
fi
}
run_case() {
local case_file=$1
local group=$2
local stem out_dir log_file
stem="$(basename "${case_file%.sy}")"
out_dir="$OUT_ROOT/$group"
log_file="$out_dir/$stem.verify.log"
mkdir -p "$out_dir"
if [[ "$group" == "functional" ]]; then
((functional_total += 1))
else
((performance_total += 1))
fi
if ./scripts/verify_ir.sh "$case_file" "$out_dir" --run >"$log_file" 2>&1; then
echo "PASS: $case_file"
if [[ "$group" == "functional" ]]; then
((functional_passed += 1))
else
((performance_passed += 1))
fi
else
echo "FAIL: $case_file"
cat "$log_file"
if [[ "$group" == "functional" ]]; then
((functional_failed += 1))
else
((performance_failed += 1))
fi
failed_cases+=("$case_file")
fi
}
while [[ $# -gt 0 ]]; do
case "$1" in
--no-build)
DO_BUILD=false
;;
--functional-only)
RUN_FUNCTIONAL=true
RUN_PERFORMANCE=false
;;
--performance-only)
RUN_FUNCTIONAL=false
RUN_PERFORMANCE=true
;;
--output-dir)
shift
if [[ $# -eq 0 ]]; then
echo "Missing value for --output-dir" >&2
usage
exit 1
fi
if [[ "$1" = /* ]]; then
OUT_ROOT="$1"
else
OUT_ROOT="$ROOT_DIR/$1"
fi
;;
--help)
usage
exit 0
;;
*)
echo "Unknown option: $1" >&2
usage
exit 1
;;
esac
shift
done
if [[ "$RUN_FUNCTIONAL" == false && "$RUN_PERFORMANCE" == false ]]; then
echo "No test set selected." >&2
exit 1
fi
if [[ "$DO_BUILD" == true ]]; then
echo "[1/4] Generating ANTLR sources..."
mkdir -p "$ANTLR_DIR"
java -jar "$JAR_PATH" \
-Dlanguage=Cpp \
-visitor -no-listener \
-Xexact-output-dir \
-o "$ANTLR_DIR" \
"$GRAMMAR_PATH"
echo "[2/4] Configuring CMake..."
cmake -S "$ROOT_DIR" -B "$BUILD_DIR" -DCMAKE_BUILD_TYPE=Release -DCOMPILER_PARSE_ONLY=OFF
echo "[3/4] Building project..."
cmake --build "$BUILD_DIR" -j "$(nproc)"
fi
echo "[4/4] Running IR batch tests..."
if [[ "$RUN_FUNCTIONAL" == true ]]; then
for case_file in "$ROOT_DIR"/test/test_case/functional/*.sy; do
run_case "$case_file" "functional"
done
fi
if [[ "$RUN_PERFORMANCE" == true ]]; then
for case_file in "$ROOT_DIR"/test/test_case/performance/*.sy; do
run_case "$case_file" "performance"
done
fi
print_summary
if (( functional_failed + performance_failed > 0 )); then
echo "Batch test finished with failures."
exit 1
fi
echo "Batch test passed."

@ -1,68 +1,65 @@
// SysY 子集语法:支持形如
// int main() { int a = 1; int b = 2; return a + b; }
// 的最小返回表达式编译。
// 后续需要自行添加
grammar SysY;
/*===-------------------------------------------===*/
/* Lexer rules */
/*===-------------------------------------------===*/
INT: 'int';
RETURN: 'return';
ASSIGN: '=';
ADD: '+';
LPAREN: '(';
RPAREN: ')';
LBRACE: '{';
RBRACE: '}';
SEMICOLON: ';';
ID: [a-zA-Z_][a-zA-Z_0-9]*;
ILITERAL: [0-9]+;
compUnit
: (decl | funcDef)+ EOF
;
WS: [ \t\r\n] -> skip;
LINECOMMENT: '//' ~[\r\n]* -> skip;
BLOCKCOMMENT: '/*' .*? '*/' -> skip;
decl
: constDecl
| varDecl
;
/*===-------------------------------------------===*/
/* Syntax rules */
/*===-------------------------------------------===*/
constDecl
: Const bType constDef (Comma constDef)* Semi
;
compUnit
: funcDef EOF
varDecl
: bType varDef (Comma varDef)* Semi
;
decl
: btype varDef SEMICOLON
bType
: Int
| Float
;
btype
: INT
constDef
: Ident (L_BRACK constExp R_BRACK)* Assign constInitVal
;
varDef
: lValue (ASSIGN initValue)?
: Ident (L_BRACK constExp R_BRACK)* (Assign initVal)?
;
constInitVal
: constExp
| L_BRACE (constInitVal (Comma constInitVal)*)? R_BRACE
;
initValue
initVal
: exp
| L_BRACE (initVal (Comma initVal)*)? R_BRACE
;
funcDef
: funcType ID LPAREN RPAREN blockStmt
: funcType Ident L_PAREN funcFParams? R_PAREN block
;
funcType
: INT
: Void
| Int
| Float
;
funcFParams
: funcFParam (Comma funcFParam)*
;
funcFParam
: bType Ident (L_BRACK R_BRACK (L_BRACK exp R_BRACK)*)?
;
blockStmt
: LBRACE blockItem* RBRACE
block
: L_BRACE blockItem* R_BRACE
;
blockItem
@ -71,28 +68,231 @@ blockItem
;
stmt
: returnStmt
: assignStmt
| expStmt
| block
| ifStmt
| whileStmt
| breakStmt
| continueStmt
| returnStmt
;
assignStmt
: lVal Assign exp Semi
;
expStmt
: exp? Semi
;
ifStmt
: If L_PAREN cond R_PAREN stmt (Else stmt)?
;
whileStmt
: While L_PAREN cond R_PAREN stmt
;
breakStmt
: Break Semi
;
continueStmt
: Continue Semi
;
returnStmt
: RETURN exp SEMICOLON
: Return exp? Semi
;
exp
: LPAREN exp RPAREN # parenExp
| var # varExp
| number # numberExp
| exp ADD exp # additiveExp
: addExp
;
cond
: lOrExp
;
lVal
: Ident (L_BRACK exp R_BRACK)*
;
primary
: Number
| lVal
| L_PAREN exp R_PAREN
;
unaryExp
: primary
| Ident L_PAREN funcRParams? R_PAREN
| unaryOp unaryExp
;
unaryOp
: Add
| Sub
| Not
;
funcRParams
: exp (Comma exp)*
;
mulExp
: unaryExp ((Mul | Div | Mod) unaryExp)*
;
addExp
: mulExp ((Add | Sub) mulExp)*
;
relExp
: addExp ((Lt | Gt | Le | Ge) addExp)*
;
eqExp
: relExp ((Eq | Ne) relExp)*
;
lAndExp
: eqExp (And eqExp)*
;
lOrExp
: lAndExp (Or lAndExp)*
;
constExp
: addExp
;
Const : 'const';
Int : 'int';
Float : 'float';
Void : 'void';
If : 'if';
Else : 'else';
While : 'while';
Break : 'break';
Continue : 'continue';
Return : 'return';
Add : '+';
Sub : '-';
Mul : '*';
Div : '/';
Mod : '%';
Assign : '=';
Eq : '==';
Ne : '!=';
Lt : '<';
Gt : '>';
Le : '<=';
Ge : '>=';
Not : '!';
And : '&&';
Or : '||';
Comma : ',';
Semi : ';';
L_PAREN : '(';
R_PAREN : ')';
L_BRACE : '{';
R_BRACE : '}';
L_BRACK : '[';
R_BRACK : ']';
Ident
: IdentifierNondigit IdentifierChar*
;
Number
: HexFloatConst
| DecFloatConst
| HexIntConst
| OctIntConst
| DecIntConst
;
WS
: [ \t\r\n]+ -> skip
;
COMMENT
: '//' ~[\r\n]* -> skip
;
BLOCK_COMMENT
: '/*' .*? '*/' -> skip
;
fragment IdentifierNondigit
: [a-zA-Z_]
;
fragment IdentifierChar
: IdentifierNondigit
| [0-9]
;
fragment DecIntConst
: '0'
| [1-9] [0-9]*
;
fragment OctIntConst
: '0' [0-7]+
;
fragment HexIntConst
: HexPrefix HexDigit+
;
fragment DecFloatConst
: FractionalConst ExponentPart?
| DigitSequence ExponentPart
;
fragment HexFloatConst
: HexPrefix HexFractionalConst BinaryExponentPart
| HexPrefix HexDigit+ BinaryExponentPart
;
fragment FractionalConst
: DigitSequence? Dot DigitSequence
| DigitSequence Dot
;
fragment HexFractionalConst
: HexDigit* Dot HexDigit+
| HexDigit+ Dot
;
fragment ExponentPart
: [eE] Sign? DigitSequence
;
fragment BinaryExponentPart
: [pP] Sign? DigitSequence
;
fragment Sign
: [+-]
;
fragment HexPrefix
: '0' [xX]
;
var
: ID
fragment DigitSequence
: [0-9]+
;
lValue
: ID
fragment HexDigit
: [0-9a-fA-F]
;
number
: ILITERAL
fragment Dot
: '.'
;

@ -1,19 +1,10 @@
// IR 基本块:
// - 保存指令序列
// - 为后续 CFG 分析预留前驱/后继接口
//
// 当前仍是最小实现:
// - BasicBlock 已纳入 Value 体系,但类型先用 void 占位;
// - 指令追加与 terminator 约束主要在头文件中的 Append 模板里处理;
// - 前驱/后继容器已经预留,但当前项目里还没有分支指令与自动维护逻辑。
#include "ir/IR.h"
#include <algorithm>
#include <utility>
namespace ir {
// 当前 BasicBlock 还没有专门的 label type因此先用 void 作为占位类型。
BasicBlock::BasicBlock(std::string name)
: Value(Type::GetVoidType(), std::move(name)) {}
@ -21,19 +12,29 @@ Function* BasicBlock::GetParent() const { return parent_; }
void BasicBlock::SetParent(Function* parent) { parent_ = parent; }
bool BasicBlock::HasTerminator() const {
return !instructions_.empty() && instructions_.back()->IsTerminator();
}
// 按插入顺序返回块内指令序列。
void BasicBlock::AddSuccessor(BasicBlock* succ) {
if (!succ) {
return;
}
if (std::find(successors_.begin(), successors_.end(), succ) ==
successors_.end()) {
successors_.push_back(succ);
}
if (std::find(succ->predecessors_.begin(), succ->predecessors_.end(), this) ==
succ->predecessors_.end()) {
succ->predecessors_.push_back(this);
}
}
const std::vector<std::unique_ptr<Instruction>>& BasicBlock::GetInstructions()
const {
return instructions_;
}
// 前驱/后继接口先保留给后续 CFG 扩展使用。
// 当前最小 IR 中还没有 branch 指令,因此这些列表通常为空。
const std::vector<BasicBlock*>& BasicBlock::GetPredecessors() const {
return predecessors_;
}

@ -1,6 +1,6 @@
// 管理基础类型、整型常量池和临时名生成。
#include "ir/IR.h"
#include <cstring>
#include <sstream>
namespace ir {
@ -9,15 +9,38 @@ Context::~Context() = default;
ConstantInt* Context::GetConstInt(int v) {
auto it = const_ints_.find(v);
if (it != const_ints_.end()) return it->second.get();
if (it != const_ints_.end()) {
return it->second.get();
}
auto inserted =
const_ints_.emplace(v, std::make_unique<ConstantInt>(Type::GetInt32Type(), v)).first;
const_ints_.emplace(v, std::make_unique<ConstantInt>(Type::GetInt32Type(), v))
.first;
return inserted->second.get();
}
ConstantFloat* Context::GetConstFloat(float v) {
uint32_t bits = 0;
std::memcpy(&bits, &v, sizeof(bits));
auto it = const_floats_.find(bits);
if (it != const_floats_.end()) {
return it->second.get();
}
auto inserted = const_floats_
.emplace(bits, std::make_unique<ConstantFloat>(
Type::GetFloatType(), v))
.first;
return inserted->second.get();
}
std::string Context::NextTemp() {
std::ostringstream oss;
oss << "%" << ++temp_index_;
oss << "%t" << ++temp_index_;
return oss.str();
}
std::string Context::NextBlock(const std::string& prefix) {
std::ostringstream oss;
oss << prefix << "." << ++block_index_;
return oss.str();
}

@ -1,16 +1,39 @@
// IR Function
// - 保存参数列表、基本块列表
// - 记录函数属性/元信息(按需要扩展)
#include "ir/IR.h"
#include <stdexcept>
namespace ir {
Function::Function(std::string name, std::shared_ptr<Type> ret_type)
: Value(std::move(ret_type), std::move(name)) {
entry_ = CreateBlock("entry");
Function::Function(std::string name, std::shared_ptr<Type> function_type,
bool is_declaration)
: GlobalValue(std::move(function_type), std::move(name)),
is_declaration_(is_declaration) {
if (!type_ || !type_->IsFunction()) {
throw std::runtime_error("Function 需要 function type");
}
}
const std::shared_ptr<Type>& Function::GetFunctionType() const { return type_; }
const std::shared_ptr<Type>& Function::GetReturnType() const {
return type_->GetReturnType();
}
const std::vector<std::unique_ptr<Argument>>& Function::GetArguments() const {
return arguments_;
}
Argument* Function::AddArgument(std::shared_ptr<Type> ty, const std::string& name) {
auto arg = std::make_unique<Argument>(std::move(ty), name, arguments_.size(), this);
auto* ptr = arg.get();
arguments_.push_back(std::move(arg));
return ptr;
}
BasicBlock* Function::CreateBlock(const std::string& name) {
if (is_declaration_) {
throw std::runtime_error("声明函数不能创建基本块");
}
auto block = std::make_unique<BasicBlock>(name);
auto* ptr = block.get();
ptr->SetParent(this);

@ -1,11 +1,19 @@
// GlobalValue 占位实现:
// - 具体的全局初始化器、打印和链接语义需要自行补全
#include "ir/IR.h"
namespace ir {
GlobalValue::GlobalValue(std::shared_ptr<Type> ty, std::string name)
: User(std::move(ty), std::move(name)) {}
: Value(std::move(ty), std::move(name)) {}
GlobalVariable::GlobalVariable(std::string name, std::shared_ptr<Type> value_type,
ConstantValue* initializer, bool is_constant)
: GlobalValue(Type::GetPointerType(value_type), std::move(name)),
value_type_(std::move(value_type)),
initializer_(initializer),
is_constant_(is_constant) {}
Argument::Argument(std::shared_ptr<Type> ty, std::string name, size_t index,
Function* parent)
: Value(std::move(ty), std::move(name)), index_(index), parent_(parent) {}
} // namespace ir

@ -1,89 +1,178 @@
// IR 构建工具:
// - 管理插入点(当前基本块/位置)
// - 提供创建各类指令的便捷接口,降低 IRGen 复杂度
#include "ir/IR.h"
#include <stdexcept>
#include "utils/Log.h"
namespace ir {
IRBuilder::IRBuilder(Context& ctx, BasicBlock* bb)
: ctx_(ctx), insert_block_(bb) {}
namespace {
void RequireInsertBlock(BasicBlock* bb) {
if (!bb) {
throw std::runtime_error("IRBuilder 未设置插入点");
}
}
std::shared_ptr<Type> InferLoadType(Value* ptr) {
if (!ptr || !ptr->GetType() || !ptr->GetType()->IsPointer()) {
throw std::runtime_error("CreateLoad 需要指针");
}
return ptr->GetType()->GetElementType();
}
std::shared_ptr<Type> InferGEPResultType(Value* base_ptr,
const std::vector<Value*>& indices) {
if (!base_ptr || !base_ptr->GetType() || !base_ptr->GetType()->IsPointer()) {
throw std::runtime_error("CreateGEP 需要指针基址");
}
auto current = base_ptr->GetType()->GetElementType();
for (size_t i = 0; i < indices.size(); ++i) {
auto* index = indices[i];
(void)index;
if (!current) {
throw std::runtime_error("CreateGEP 遇到空类型");
}
if (i == 0) {
continue;
}
if (current->IsArray()) {
current = current->GetElementType();
continue;
}
if (current->IsPointer()) {
current = current->GetElementType();
continue;
}
break;
}
return Type::GetPointerType(current);
}
} // namespace
IRBuilder::IRBuilder(Context& ctx, BasicBlock* bb) : ctx_(ctx), insert_block_(bb) {}
void IRBuilder::SetInsertPoint(BasicBlock* bb) { insert_block_ = bb; }
BasicBlock* IRBuilder::GetInsertBlock() const { return insert_block_; }
ConstantInt* IRBuilder::CreateConstInt(int v) {
// 常量不需要挂在基本块里,由 Context 负责去重与生命周期。
return ctx_.GetConstInt(v);
ConstantInt* IRBuilder::CreateConstInt(int v) { return ctx_.GetConstInt(v); }
ConstantFloat* IRBuilder::CreateConstFloat(float v) { return ctx_.GetConstFloat(v); }
ConstantValue* IRBuilder::CreateZero(std::shared_ptr<Type> type) {
if (!type) {
throw std::runtime_error("CreateZero 缺少类型");
}
if (type->IsInt1() || type->IsInt32()) {
return CreateConstInt(0);
}
if (type->IsFloat32()) {
return CreateConstFloat(0.0f);
}
return ctx_.CreateOwnedConstant<ConstantZero>(type);
}
BinaryInst* IRBuilder::CreateBinary(Opcode op, Value* lhs, Value* rhs,
const std::string& name) {
if (!insert_block_) {
throw std::runtime_error(FormatError("ir", "IRBuilder 未设置插入点"));
}
if (!lhs) {
throw std::runtime_error(
FormatError("ir", "IRBuilder::CreateBinary 缺少 lhs"));
}
if (!rhs) {
throw std::runtime_error(
FormatError("ir", "IRBuilder::CreateBinary 缺少 rhs"));
RequireInsertBlock(insert_block_);
if (!lhs || !rhs) {
throw std::runtime_error("CreateBinary 缺少操作数");
}
return insert_block_->Append<BinaryInst>(op, lhs->GetType(), lhs, rhs, name);
}
BinaryInst* IRBuilder::CreateAdd(Value* lhs, Value* rhs,
const std::string& name) {
return CreateBinary(Opcode::Add, lhs, rhs, name);
AllocaInst* IRBuilder::CreateAlloca(std::shared_ptr<Type> allocated_type,
const std::string& name) {
RequireInsertBlock(insert_block_);
auto* parent = insert_block_->GetParent();
if (!parent || !parent->GetEntry()) {
throw std::runtime_error("CreateAlloca 需要所在函数入口块");
}
return parent->GetEntry()->Append<AllocaInst>(std::move(allocated_type), name);
}
AllocaInst* IRBuilder::CreateAllocaI32(const std::string& name) {
if (!insert_block_) {
throw std::runtime_error(FormatError("ir", "IRBuilder 未设置插入点"));
}
return insert_block_->Append<AllocaInst>(Type::GetPtrInt32Type(), name);
return CreateAlloca(Type::GetInt32Type(), name);
}
LoadInst* IRBuilder::CreateLoad(Value* ptr, const std::string& name) {
if (!insert_block_) {
throw std::runtime_error(FormatError("ir", "IRBuilder 未设置插入点"));
}
if (!ptr) {
throw std::runtime_error(
FormatError("ir", "IRBuilder::CreateLoad 缺少 ptr"));
}
return insert_block_->Append<LoadInst>(Type::GetInt32Type(), ptr, name);
RequireInsertBlock(insert_block_);
return insert_block_->Append<LoadInst>(ptr, InferLoadType(ptr), name);
}
StoreInst* IRBuilder::CreateStore(Value* val, Value* ptr) {
if (!insert_block_) {
throw std::runtime_error(FormatError("ir", "IRBuilder 未设置插入点"));
}
if (!val) {
throw std::runtime_error(
FormatError("ir", "IRBuilder::CreateStore 缺少 val"));
}
if (!ptr) {
throw std::runtime_error(
FormatError("ir", "IRBuilder::CreateStore 缺少 ptr"));
}
return insert_block_->Append<StoreInst>(Type::GetVoidType(), val, ptr);
RequireInsertBlock(insert_block_);
return insert_block_->Append<StoreInst>(val, ptr);
}
ReturnInst* IRBuilder::CreateRet(Value* v) {
if (!insert_block_) {
throw std::runtime_error(FormatError("ir", "IRBuilder 未设置插入点"));
}
if (!v) {
throw std::runtime_error(
FormatError("ir", "IRBuilder::CreateRet 缺少返回值"));
CompareInst* IRBuilder::CreateICmp(ICmpPred pred, Value* lhs, Value* rhs,
const std::string& name) {
RequireInsertBlock(insert_block_);
return insert_block_->Append<CompareInst>(pred, lhs, rhs, name);
}
CompareInst* IRBuilder::CreateFCmp(FCmpPred pred, Value* lhs, Value* rhs,
const std::string& name) {
RequireInsertBlock(insert_block_);
return insert_block_->Append<CompareInst>(pred, lhs, rhs, name);
}
BranchInst* IRBuilder::CreateBr(BasicBlock* target) {
RequireInsertBlock(insert_block_);
return insert_block_->Append<BranchInst>(target);
}
CondBranchInst* IRBuilder::CreateCondBr(Value* cond, BasicBlock* true_block,
BasicBlock* false_block) {
RequireInsertBlock(insert_block_);
return insert_block_->Append<CondBranchInst>(cond, true_block, false_block);
}
CallInst* IRBuilder::CreateCall(Function* callee, const std::vector<Value*>& args,
const std::string& name) {
RequireInsertBlock(insert_block_);
std::string actual_name = name;
if (callee && callee->GetReturnType()->IsVoid()) {
actual_name.clear();
}
return insert_block_->Append<ReturnInst>(Type::GetVoidType(), v);
return insert_block_->Append<CallInst>(callee, args, actual_name);
}
GetElementPtrInst* IRBuilder::CreateGEP(Value* base_ptr,
const std::vector<Value*>& indices,
const std::string& name) {
RequireInsertBlock(insert_block_);
return insert_block_->Append<GetElementPtrInst>(
base_ptr, indices, InferGEPResultType(base_ptr, indices), name);
}
CastInst* IRBuilder::CreateSIToFP(Value* value, const std::string& name) {
RequireInsertBlock(insert_block_);
return insert_block_->Append<CastInst>(Opcode::SIToFP, value,
Type::GetFloatType(), name);
}
CastInst* IRBuilder::CreateFPToSI(Value* value, const std::string& name) {
RequireInsertBlock(insert_block_);
return insert_block_->Append<CastInst>(Opcode::FPToSI, value,
Type::GetInt32Type(), name);
}
CastInst* IRBuilder::CreateZExt(Value* value, std::shared_ptr<Type> dst_type,
const std::string& name) {
RequireInsertBlock(insert_block_);
return insert_block_->Append<CastInst>(Opcode::ZExt, value, std::move(dst_type),
name);
}
ReturnInst* IRBuilder::CreateRet(Value* value) {
RequireInsertBlock(insert_block_);
return value ? insert_block_->Append<ReturnInst>(value)
: insert_block_->Append<ReturnInst>();
}
ReturnInst* IRBuilder::CreateRetVoid() {
RequireInsertBlock(insert_block_);
return insert_block_->Append<ReturnInst>();
}
} // namespace ir

@ -1,30 +1,127 @@
// IR 文本输出:
// - 将 IR 打印为 .ll 风格的文本
// - 支撑调试与测试对比diff
#include "ir/IR.h"
#include <cstdint>
#include <cstring>
#include <iomanip>
#include <limits>
#include <ostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include "utils/Log.h"
namespace ir {
namespace {
std::string TypeToString(const std::shared_ptr<Type>& ty);
std::string ConstantToString(const ConstantValue* value);
static const char* TypeToString(const Type& ty) {
switch (ty.GetKind()) {
std::string TypeToString(const std::shared_ptr<Type>& ty) {
if (!ty) {
throw std::runtime_error("空类型无法打印");
}
switch (ty->GetKind()) {
case Type::Kind::Void:
return "void";
case Type::Kind::Int1:
return "i1";
case Type::Kind::Int32:
return "i32";
case Type::Kind::PtrInt32:
return "i32*";
case Type::Kind::Float32:
return "float";
case Type::Kind::Pointer:
return TypeToString(ty->GetElementType()) + "*";
case Type::Kind::Array: {
std::ostringstream oss;
oss << "[" << ty->GetArraySize() << " x "
<< TypeToString(ty->GetElementType()) << "]";
return oss.str();
}
case Type::Kind::Function: {
std::ostringstream oss;
oss << TypeToString(ty->GetReturnType()) << " (";
const auto& params = ty->GetParamTypes();
for (size_t i = 0; i < params.size(); ++i) {
if (i != 0) {
oss << ", ";
}
oss << TypeToString(params[i]);
}
oss << ")";
return oss.str();
}
}
throw std::runtime_error(FormatError("ir", "未知类型"));
throw std::runtime_error("未知类型");
}
static const char* OpcodeToString(Opcode op) {
std::string FloatLiteral(float value) {
std::ostringstream oss;
double widened = static_cast<double>(value);
std::uint64_t bits = 0;
std::memcpy(&bits, &widened, sizeof(bits));
oss << "0x" << std::uppercase << std::hex << std::setw(16) << std::setfill('0')
<< bits;
return oss.str();
}
std::string ValueRef(const Value* value) {
if (!value) {
return "<null>";
}
if (auto* ci = dynamic_cast<const ConstantInt*>(value)) {
return std::to_string(ci->GetValue());
}
if (auto* cf = dynamic_cast<const ConstantFloat*>(value)) {
return FloatLiteral(cf->GetValue());
}
if (auto* cz = dynamic_cast<const ConstantZero*>(value)) {
if (cz->GetType()->IsFloat32()) {
return FloatLiteral(0.0f);
}
return "0";
}
if (dynamic_cast<const Function*>(value) != nullptr ||
dynamic_cast<const GlobalVariable*>(value) != nullptr) {
return "@" + value->GetName();
}
return value->GetName();
}
std::string ConstantToString(const ConstantValue* value) {
if (!value) {
throw std::runtime_error("空常量无法打印");
}
if (auto* ci = dynamic_cast<const ConstantInt*>(value)) {
return std::to_string(ci->GetValue());
}
if (auto* cf = dynamic_cast<const ConstantFloat*>(value)) {
return FloatLiteral(cf->GetValue());
}
if (auto* cz = dynamic_cast<const ConstantZero*>(value)) {
if (cz->GetType()->IsScalar()) {
return ValueRef(cz);
}
return "zeroinitializer";
}
if (auto* array = dynamic_cast<const ConstantArray*>(value)) {
if (array->IsZeroValue()) {
return "zeroinitializer";
}
std::ostringstream oss;
oss << "[";
const auto& elements = array->GetElements();
for (size_t i = 0; i < elements.size(); ++i) {
if (i != 0) {
oss << ", ";
}
oss << TypeToString(elements[i]->GetType()) << " "
<< ConstantToString(elements[i]);
}
oss << "]";
return oss.str();
}
throw std::runtime_error("未知常量类型");
}
const char* BinaryOpcodeName(Opcode op) {
switch (op) {
case Opcode::Add:
return "add";
@ -32,69 +129,241 @@ static const char* OpcodeToString(Opcode op) {
return "sub";
case Opcode::Mul:
return "mul";
case Opcode::Alloca:
return "alloca";
case Opcode::Load:
return "load";
case Opcode::Store:
return "store";
case Opcode::Ret:
return "ret";
}
return "?";
case Opcode::SDiv:
return "sdiv";
case Opcode::SRem:
return "srem";
case Opcode::FAdd:
return "fadd";
case Opcode::FSub:
return "fsub";
case Opcode::FMul:
return "fmul";
case Opcode::FDiv:
return "fdiv";
default:
throw std::runtime_error("不是二元算术 opcode");
}
}
static std::string ValueToString(const Value* v) {
if (auto* ci = dynamic_cast<const ConstantInt*>(v)) {
return std::to_string(ci->GetValue());
const char* ICmpPredName(ICmpPred pred) {
switch (pred) {
case ICmpPred::Eq:
return "eq";
case ICmpPred::Ne:
return "ne";
case ICmpPred::Slt:
return "slt";
case ICmpPred::Sle:
return "sle";
case ICmpPred::Sgt:
return "sgt";
case ICmpPred::Sge:
return "sge";
}
return v ? v->GetName() : "<null>";
throw std::runtime_error("未知 ICmp 谓词");
}
const char* FCmpPredName(FCmpPred pred) {
switch (pred) {
case FCmpPred::Oeq:
return "oeq";
case FCmpPred::One:
return "one";
case FCmpPred::Olt:
return "olt";
case FCmpPred::Ole:
return "ole";
case FCmpPred::Ogt:
return "ogt";
case FCmpPred::Oge:
return "oge";
}
throw std::runtime_error("未知 FCmp 谓词");
}
void PrintFunctionHeader(const Function& func, std::ostream& os, bool define) {
os << (define ? "define " : "declare ")
<< TypeToString(func.GetReturnType()) << " @" << func.GetName() << "(";
const auto& args = func.GetArguments();
const auto& params = func.GetFunctionType()->GetParamTypes();
for (size_t i = 0; i < params.size(); ++i) {
if (i != 0) {
os << ", ";
}
os << TypeToString(params[i]);
if (define) {
os << " " << args[i]->GetName();
}
}
os << ")";
}
} // namespace
void IRPrinter::Print(const Module& module, std::ostream& os) {
for (const auto& global : module.GetGlobals()) {
if (!global) {
continue;
}
os << "@" << global->GetName() << " = "
<< (global->IsConstant() ? "constant " : "global ")
<< TypeToString(global->GetValueType()) << " ";
auto* init = global->GetInitializer();
if (!init) {
ConstantZero zero(global->GetValueType());
os << ConstantToString(&zero);
} else {
os << ConstantToString(init);
}
os << "\n";
}
if (!module.GetGlobals().empty() && !module.GetFunctions().empty()) {
os << "\n";
}
bool first_function = true;
for (const auto& func : module.GetFunctions()) {
os << "define " << TypeToString(*func->GetType()) << " @" << func->GetName()
<< "() {\n";
if (!func) {
continue;
}
if (!first_function) {
os << "\n";
}
first_function = false;
if (func->IsDeclaration()) {
PrintFunctionHeader(*func, os, false);
os << "\n";
continue;
}
PrintFunctionHeader(*func, os, true);
os << " {\n";
for (const auto& bb : func->GetBlocks()) {
if (!bb) {
continue;
}
os << bb->GetName() << ":\n";
for (const auto& instPtr : bb->GetInstructions()) {
const auto* inst = instPtr.get();
for (const auto& inst_ptr : bb->GetInstructions()) {
const auto* inst = inst_ptr.get();
switch (inst->GetOpcode()) {
case Opcode::Add:
case Opcode::Sub:
case Opcode::Mul: {
auto* bin = static_cast<const BinaryInst*>(inst);
os << " " << bin->GetName() << " = "
<< OpcodeToString(bin->GetOpcode()) << " "
<< TypeToString(*bin->GetLhs()->GetType()) << " "
<< ValueToString(bin->GetLhs()) << ", "
<< ValueToString(bin->GetRhs()) << "\n";
case Opcode::Mul:
case Opcode::SDiv:
case Opcode::SRem:
case Opcode::FAdd:
case Opcode::FSub:
case Opcode::FMul:
case Opcode::FDiv: {
const auto* bin = static_cast<const BinaryInst*>(inst);
os << " " << bin->GetName() << " = " << BinaryOpcodeName(inst->GetOpcode())
<< " " << TypeToString(bin->GetType()) << " "
<< ValueRef(bin->GetLhs()) << ", " << ValueRef(bin->GetRhs())
<< "\n";
break;
}
case Opcode::Alloca: {
auto* alloca = static_cast<const AllocaInst*>(inst);
os << " " << alloca->GetName() << " = alloca i32\n";
const auto* alloca = static_cast<const AllocaInst*>(inst);
os << " " << alloca->GetName() << " = alloca "
<< TypeToString(alloca->GetAllocatedType()) << "\n";
break;
}
case Opcode::Load: {
auto* load = static_cast<const LoadInst*>(inst);
os << " " << load->GetName() << " = load i32, i32* "
<< ValueToString(load->GetPtr()) << "\n";
const auto* load = static_cast<const LoadInst*>(inst);
os << " " << load->GetName() << " = load "
<< TypeToString(load->GetType()) << ", "
<< TypeToString(load->GetPtr()->GetType()) << " "
<< ValueRef(load->GetPtr()) << "\n";
break;
}
case Opcode::Store: {
auto* store = static_cast<const StoreInst*>(inst);
os << " store i32 " << ValueToString(store->GetValue())
<< ", i32* " << ValueToString(store->GetPtr()) << "\n";
const auto* store = static_cast<const StoreInst*>(inst);
os << " store " << TypeToString(store->GetValue()->GetType()) << " "
<< ValueRef(store->GetValue()) << ", "
<< TypeToString(store->GetPtr()->GetType()) << " "
<< ValueRef(store->GetPtr()) << "\n";
break;
}
case Opcode::ICmp:
case Opcode::FCmp: {
const auto* cmp = static_cast<const CompareInst*>(inst);
os << " " << cmp->GetName() << " = "
<< (cmp->IsFloatCompare() ? "fcmp " : "icmp ")
<< (cmp->IsFloatCompare() ? FCmpPredName(cmp->GetFCmpPred())
: ICmpPredName(cmp->GetICmpPred()))
<< " " << TypeToString(cmp->GetLhs()->GetType()) << " "
<< ValueRef(cmp->GetLhs()) << ", " << ValueRef(cmp->GetRhs())
<< "\n";
break;
}
case Opcode::Br: {
const auto* br = static_cast<const BranchInst*>(inst);
os << " br label %" << br->GetTarget()->GetName() << "\n";
break;
}
case Opcode::CondBr: {
const auto* br = static_cast<const CondBranchInst*>(inst);
os << " br i1 " << ValueRef(br->GetCond()) << ", label %"
<< br->GetTrueBlock()->GetName() << ", label %"
<< br->GetFalseBlock()->GetName() << "\n";
break;
}
case Opcode::Call: {
const auto* call = static_cast<const CallInst*>(inst);
if (!call->GetType()->IsVoid()) {
os << " " << call->GetName() << " = ";
} else {
os << " ";
}
os << "call " << TypeToString(call->GetCallee()->GetReturnType())
<< " @" << call->GetCallee()->GetName() << "(";
auto args = call->GetArgs();
for (size_t i = 0; i < args.size(); ++i) {
if (i != 0) {
os << ", ";
}
os << TypeToString(args[i]->GetType()) << " " << ValueRef(args[i]);
}
os << ")\n";
break;
}
case Opcode::GEP: {
const auto* gep = static_cast<const GetElementPtrInst*>(inst);
os << " " << gep->GetName() << " = getelementptr "
<< TypeToString(gep->GetSourceElementType()) << ", "
<< TypeToString(gep->GetBasePtr()->GetType()) << " "
<< ValueRef(gep->GetBasePtr());
for (auto* index : gep->GetIndices()) {
os << ", " << TypeToString(index->GetType()) << " " << ValueRef(index);
}
os << "\n";
break;
}
case Opcode::SIToFP:
case Opcode::FPToSI:
case Opcode::ZExt: {
const auto* cast = static_cast<const CastInst*>(inst);
const char* opname = inst->GetOpcode() == Opcode::SIToFP
? "sitofp"
: inst->GetOpcode() == Opcode::FPToSI ? "fptosi"
: "zext";
os << " " << cast->GetName() << " = " << opname << " "
<< TypeToString(cast->GetValue()->GetType()) << " "
<< ValueRef(cast->GetValue()) << " to "
<< TypeToString(cast->GetType()) << "\n";
break;
}
case Opcode::Ret: {
auto* ret = static_cast<const ReturnInst*>(inst);
os << " ret " << TypeToString(*ret->GetValue()->GetType()) << " "
<< ValueToString(ret->GetValue()) << "\n";
const auto* ret = static_cast<const ReturnInst*>(inst);
if (auto* value = ret->GetValue()) {
os << " ret " << TypeToString(value->GetType()) << " "
<< ValueRef(value) << "\n";
} else {
os << " ret void\n";
}
break;
}
}

@ -1,13 +1,27 @@
// IR 指令体系:
// - 二元运算/比较、load/store、call、br/condbr、ret、phi、alloca 等
// - 指令操作数与结果类型管理,支持打印与优化
#include "ir/IR.h"
#include <stdexcept>
#include "utils/Log.h"
namespace ir {
namespace {
void Require(bool condition, const std::string& message) {
if (!condition) {
throw std::runtime_error(message);
}
}
bool SameType(const std::shared_ptr<Type>& lhs, const std::shared_ptr<Type>& rhs) {
return lhs && rhs && lhs->Equals(*rhs);
}
std::shared_ptr<Type> GetPointeeType(Value* ptr) {
Require(ptr && ptr->GetType() && ptr->GetType()->IsPointer(), "期望指针类型");
return ptr->GetType()->GetElementType();
}
} // namespace
User::User(std::shared_ptr<Type> ty, std::string name)
: Value(std::move(ty), std::move(name)) {}
@ -24,9 +38,7 @@ void User::SetOperand(size_t index, Value* value) {
if (index >= operands_.size()) {
throw std::out_of_range("User operand index out of range");
}
if (!value) {
throw std::runtime_error(FormatError("ir", "User operand 不能为空"));
}
Require(value != nullptr, "User operand 不能为空");
auto* old = operands_[index];
if (old == value) {
return;
@ -39,10 +51,8 @@ void User::SetOperand(size_t index, Value* value) {
}
void User::AddOperand(Value* value) {
if (!value) {
throw std::runtime_error(FormatError("ir", "User operand 不能为空"));
}
size_t operand_index = operands_.size();
Require(value != nullptr, "User operand 不能为空");
const size_t operand_index = operands_.size();
operands_.push_back(value);
value->AddUse(this, operand_index);
}
@ -52,30 +62,49 @@ Instruction::Instruction(Opcode op, std::shared_ptr<Type> ty, std::string name)
Opcode Instruction::GetOpcode() const { return opcode_; }
bool Instruction::IsTerminator() const { return opcode_ == Opcode::Ret; }
bool Instruction::IsTerminator() const {
return opcode_ == Opcode::Ret || opcode_ == Opcode::Br ||
opcode_ == Opcode::CondBr;
}
BasicBlock* Instruction::GetParent() const { return parent_; }
void Instruction::SetParent(BasicBlock* parent) { parent_ = parent; }
BinaryInst::BinaryInst(Opcode op, std::shared_ptr<Type> ty, Value* lhs,
Value* rhs, std::string name)
: Instruction(op, std::move(ty), std::move(name)) {
if (op != Opcode::Add) {
throw std::runtime_error(FormatError("ir", "BinaryInst 当前只支持 Add"));
}
if (!lhs || !rhs) {
throw std::runtime_error(FormatError("ir", "BinaryInst 缺少操作数"));
}
if (!type_ || !lhs->GetType() || !rhs->GetType()) {
throw std::runtime_error(FormatError("ir", "BinaryInst 缺少类型信息"));
void Instruction::SetParent(BasicBlock* parent) {
parent_ = parent;
if (!parent_) {
return;
}
if (lhs->GetType()->GetKind() != rhs->GetType()->GetKind() ||
type_->GetKind() != lhs->GetType()->GetKind()) {
throw std::runtime_error(FormatError("ir", "BinaryInst 类型不匹配"));
if (auto* br = dynamic_cast<BranchInst*>(this)) {
parent_->AddSuccessor(br->GetTarget());
} else if (auto* cond = dynamic_cast<CondBranchInst*>(this)) {
parent_->AddSuccessor(cond->GetTrueBlock());
parent_->AddSuccessor(cond->GetFalseBlock());
}
if (!type_->IsInt32()) {
throw std::runtime_error(FormatError("ir", "BinaryInst 当前只支持 i32"));
}
BinaryInst::BinaryInst(Opcode op, std::shared_ptr<Type> ty, Value* lhs, Value* rhs,
std::string name)
: Instruction(op, std::move(ty), std::move(name)) {
Require(lhs && rhs, "BinaryInst 缺少操作数");
Require(type_ && lhs->GetType() && rhs->GetType(), "BinaryInst 缺少类型信息");
Require(SameType(lhs->GetType(), rhs->GetType()), "BinaryInst 操作数类型不匹配");
Require(SameType(type_, lhs->GetType()), "BinaryInst 结果类型不匹配");
switch (op) {
case Opcode::Add:
case Opcode::Sub:
case Opcode::Mul:
case Opcode::SDiv:
case Opcode::SRem:
Require(type_->IsInt32(), "整数 BinaryInst 只支持 i32");
break;
case Opcode::FAdd:
case Opcode::FSub:
case Opcode::FMul:
case Opcode::FDiv:
Require(type_->IsFloat32(), "浮点 BinaryInst 只支持 float");
break;
default:
throw std::runtime_error("BinaryInst 不支持该 opcode");
}
AddOperand(lhs);
AddOperand(rhs);
@ -85,67 +114,189 @@ Value* BinaryInst::GetLhs() const { return GetOperand(0); }
Value* BinaryInst::GetRhs() const { return GetOperand(1); }
ReturnInst::ReturnInst(std::shared_ptr<Type> void_ty, Value* val)
: Instruction(Opcode::Ret, std::move(void_ty), "") {
if (!val) {
throw std::runtime_error(FormatError("ir", "ReturnInst 缺少返回值"));
}
if (!type_ || !type_->IsVoid()) {
throw std::runtime_error(FormatError("ir", "ReturnInst 返回类型必须为 void"));
}
AddOperand(val);
CompareInst::CompareInst(ICmpPred pred, Value* lhs, Value* rhs, std::string name)
: Instruction(Opcode::ICmp, Type::GetInt1Type(), std::move(name)),
icmp_pred_(pred) {
Require(lhs && rhs, "ICmp 缺少操作数");
Require(lhs->GetType() && rhs->GetType(), "ICmp 缺少类型信息");
Require(lhs->GetType()->IsInt32() && rhs->GetType()->IsInt32(),
"ICmp 只支持 i32");
AddOperand(lhs);
AddOperand(rhs);
}
CompareInst::CompareInst(FCmpPred pred, Value* lhs, Value* rhs, std::string name)
: Instruction(Opcode::FCmp, Type::GetInt1Type(), std::move(name)),
is_float_compare_(true),
fcmp_pred_(pred) {
Require(lhs && rhs, "FCmp 缺少操作数");
Require(lhs->GetType() && rhs->GetType(), "FCmp 缺少类型信息");
Require(lhs->GetType()->IsFloat32() && rhs->GetType()->IsFloat32(),
"FCmp 只支持 float");
AddOperand(lhs);
AddOperand(rhs);
}
Value* ReturnInst::GetValue() const { return GetOperand(0); }
Value* CompareInst::GetLhs() const { return GetOperand(0); }
AllocaInst::AllocaInst(std::shared_ptr<Type> ptr_ty, std::string name)
: Instruction(Opcode::Alloca, std::move(ptr_ty), std::move(name)) {
if (!type_ || !type_->IsPtrInt32()) {
throw std::runtime_error(FormatError("ir", "AllocaInst 当前只支持 i32*"));
}
Value* CompareInst::GetRhs() const { return GetOperand(1); }
ReturnInst::ReturnInst(Value* value)
: Instruction(Opcode::Ret, Type::GetVoidType(), "") {
Require(value != nullptr, "ret 缺少返回值");
AddOperand(value);
}
LoadInst::LoadInst(std::shared_ptr<Type> val_ty, Value* ptr, std::string name)
: Instruction(Opcode::Load, std::move(val_ty), std::move(name)) {
if (!ptr) {
throw std::runtime_error(FormatError("ir", "LoadInst 缺少 ptr"));
}
if (!type_ || !type_->IsInt32()) {
throw std::runtime_error(FormatError("ir", "LoadInst 当前只支持加载 i32"));
}
if (!ptr->GetType() || !ptr->GetType()->IsPtrInt32()) {
throw std::runtime_error(
FormatError("ir", "LoadInst 当前只支持从 i32* 加载"));
}
ReturnInst::ReturnInst() : Instruction(Opcode::Ret, Type::GetVoidType(), "") {}
Value* ReturnInst::GetValue() const {
return GetNumOperands() == 0 ? nullptr : GetOperand(0);
}
AllocaInst::AllocaInst(std::shared_ptr<Type> allocated_type, std::string name)
: Instruction(Opcode::Alloca, Type::GetPointerType(allocated_type),
std::move(name)),
allocated_type_(std::move(allocated_type)) {
Require(allocated_type_ != nullptr, "alloca 缺少目标类型");
}
LoadInst::LoadInst(Value* ptr, std::shared_ptr<Type> value_type, std::string name)
: Instruction(Opcode::Load, std::move(value_type), std::move(name)) {
Require(ptr != nullptr, "load 缺少 ptr");
Require(type_ != nullptr, "load 缺少 value type");
Require(ptr->GetType() && ptr->GetType()->IsPointer(), "load 需要指针操作数");
Require(SameType(GetPointeeType(ptr), type_), "load 类型不匹配");
AddOperand(ptr);
}
Value* LoadInst::GetPtr() const { return GetOperand(0); }
StoreInst::StoreInst(std::shared_ptr<Type> void_ty, Value* val, Value* ptr)
: Instruction(Opcode::Store, std::move(void_ty), "") {
if (!val) {
throw std::runtime_error(FormatError("ir", "StoreInst 缺少 value"));
}
if (!ptr) {
throw std::runtime_error(FormatError("ir", "StoreInst 缺少 ptr"));
StoreInst::StoreInst(Value* value, Value* ptr)
: Instruction(Opcode::Store, Type::GetVoidType(), "") {
Require(value != nullptr, "store 缺少 value");
Require(ptr != nullptr, "store 缺少 ptr");
Require(ptr->GetType() && ptr->GetType()->IsPointer(), "store 需要指针操作数");
Require(SameType(value->GetType(), GetPointeeType(ptr)), "store 类型不匹配");
AddOperand(value);
AddOperand(ptr);
}
Value* StoreInst::GetValue() const { return GetOperand(0); }
Value* StoreInst::GetPtr() const { return GetOperand(1); }
BranchInst::BranchInst(BasicBlock* target)
: Instruction(Opcode::Br, Type::GetVoidType(), "") {
Require(target != nullptr, "br 缺少目标块");
AddOperand(target);
}
BasicBlock* BranchInst::GetTarget() const {
return static_cast<BasicBlock*>(GetOperand(0));
}
CondBranchInst::CondBranchInst(Value* cond, BasicBlock* true_block,
BasicBlock* false_block)
: Instruction(Opcode::CondBr, Type::GetVoidType(), "") {
Require(cond != nullptr, "condbr 缺少条件");
Require(cond->GetType() && cond->GetType()->IsInt1(), "condbr 条件必须为 i1");
Require(true_block != nullptr && false_block != nullptr,
"condbr 缺少目标块");
AddOperand(cond);
AddOperand(true_block);
AddOperand(false_block);
}
Value* CondBranchInst::GetCond() const { return GetOperand(0); }
BasicBlock* CondBranchInst::GetTrueBlock() const {
return static_cast<BasicBlock*>(GetOperand(1));
}
BasicBlock* CondBranchInst::GetFalseBlock() const {
return static_cast<BasicBlock*>(GetOperand(2));
}
CallInst::CallInst(Function* callee, std::vector<Value*> args, std::string name)
: Instruction(Opcode::Call, callee ? callee->GetReturnType() : Type::GetVoidType(),
std::move(name)) {
Require(callee != nullptr, "call 缺少 callee");
AddOperand(callee);
const auto& params = callee->GetFunctionType()->GetParamTypes();
Require(params.size() == args.size(), "call 参数个数不匹配");
for (size_t i = 0; i < args.size(); ++i) {
Require(args[i] != nullptr, "call 缺少实参");
Require(SameType(params[i], args[i]->GetType()), "call 参数类型不匹配");
AddOperand(args[i]);
}
if (!type_ || !type_->IsVoid()) {
throw std::runtime_error(FormatError("ir", "StoreInst 返回类型必须为 void"));
}
Function* CallInst::GetCallee() const {
return static_cast<Function*>(GetOperand(0));
}
std::vector<Value*> CallInst::GetArgs() const {
std::vector<Value*> args;
for (size_t i = 1; i < GetNumOperands(); ++i) {
args.push_back(GetOperand(i));
}
if (!val->GetType() || !val->GetType()->IsInt32()) {
throw std::runtime_error(FormatError("ir", "StoreInst 当前只支持存储 i32"));
return args;
}
GetElementPtrInst::GetElementPtrInst(Value* base_ptr, std::vector<Value*> indices,
std::shared_ptr<Type> result_type,
std::string name)
: Instruction(Opcode::GEP, std::move(result_type), std::move(name)) {
Require(base_ptr != nullptr, "gep 缺少 base_ptr");
Require(base_ptr->GetType() && base_ptr->GetType()->IsPointer(),
"gep 需要指针基址");
Require(type_ != nullptr && type_->IsPointer(), "gep 结果必须是指针");
AddOperand(base_ptr);
for (auto* index : indices) {
Require(index != nullptr, "gep 缺少索引");
Require(index->GetType() && index->GetType()->IsInt32(), "gep 索引必须为 i32");
AddOperand(index);
}
if (!ptr->GetType() || !ptr->GetType()->IsPtrInt32()) {
throw std::runtime_error(
FormatError("ir", "StoreInst 当前只支持写入 i32*"));
}
Value* GetElementPtrInst::GetBasePtr() const { return GetOperand(0); }
std::vector<Value*> GetElementPtrInst::GetIndices() const {
std::vector<Value*> indices;
for (size_t i = 1; i < GetNumOperands(); ++i) {
indices.push_back(GetOperand(i));
}
AddOperand(val);
AddOperand(ptr);
return indices;
}
Value* StoreInst::GetValue() const { return GetOperand(0); }
std::shared_ptr<Type> GetElementPtrInst::GetSourceElementType() const {
return GetBasePtr()->GetType()->GetElementType();
}
Value* StoreInst::GetPtr() const { return GetOperand(1); }
CastInst::CastInst(Opcode op, Value* value, std::shared_ptr<Type> dst_type,
std::string name)
: Instruction(op, std::move(dst_type), std::move(name)) {
Require(value != nullptr, "cast 缺少 value");
Require(type_ != nullptr, "cast 缺少目标类型");
switch (op) {
case Opcode::SIToFP:
Require(value->GetType() && value->GetType()->IsInt32() && type_->IsFloat32(),
"sitofp 需要 i32 -> float");
break;
case Opcode::FPToSI:
Require(value->GetType() && value->GetType()->IsFloat32() && type_->IsInt32(),
"fptosi 需要 float -> i32");
break;
case Opcode::ZExt:
Require(value->GetType() && value->GetType()->IsInt1() && type_->IsInt32(),
"zext 需要 i1 -> i32");
break;
default:
throw std::runtime_error("不支持的 cast opcode");
}
AddOperand(value);
}
Value* CastInst::GetValue() const { return GetOperand(0); }
} // namespace ir

@ -1,5 +1,3 @@
// 保存函数列表并提供模块级上下文访问。
#include "ir/IR.h"
namespace ir {
@ -8,12 +6,45 @@ Context& Module::GetContext() { return context_; }
const Context& Module::GetContext() const { return context_; }
GlobalVariable* Module::CreateGlobal(std::string name,
std::shared_ptr<Type> value_type,
ConstantValue* initializer,
bool is_constant) {
globals_.push_back(std::make_unique<GlobalVariable>(
std::move(name), std::move(value_type), initializer, is_constant));
return globals_.back().get();
}
Function* Module::CreateFunction(const std::string& name,
std::shared_ptr<Type> ret_type) {
functions_.push_back(std::make_unique<Function>(name, std::move(ret_type)));
std::shared_ptr<Type> function_type,
bool is_declaration) {
functions_.push_back(
std::make_unique<Function>(name, std::move(function_type), is_declaration));
return functions_.back().get();
}
Function* Module::FindFunction(const std::string& name) const {
for (const auto& func : functions_) {
if (func && func->GetName() == name) {
return func.get();
}
}
return nullptr;
}
GlobalVariable* Module::FindGlobal(const std::string& name) const {
for (const auto& global : globals_) {
if (global && global->GetName() == name) {
return global.get();
}
}
return nullptr;
}
const std::vector<std::unique_ptr<GlobalVariable>>& Module::GetGlobals() const {
return globals_;
}
const std::vector<std::unique_ptr<Function>>& Module::GetFunctions() const {
return functions_;
}

@ -1,31 +1,141 @@
// 当前仅支持 void、i32 和 i32*。
#include "ir/IR.h"
#include <stdexcept>
namespace ir {
Type::Type(Kind k) : kind_(k) {}
Type::Type(Kind kind) : kind_(kind) {}
Type::Type(Kind kind, std::shared_ptr<Type> element_type)
: kind_(kind), element_type_(std::move(element_type)) {}
Type::Type(Kind kind, std::shared_ptr<Type> element_type, size_t array_size)
: kind_(kind),
element_type_(std::move(element_type)),
array_size_(array_size) {}
Type::Type(std::shared_ptr<Type> return_type,
std::vector<std::shared_ptr<Type>> params)
: kind_(Kind::Function),
return_type_(std::move(return_type)),
param_types_(std::move(params)) {}
const std::shared_ptr<Type>& Type::GetVoidType() {
static const std::shared_ptr<Type> type = std::make_shared<Type>(Kind::Void);
static const auto type = std::make_shared<Type>(Kind::Void);
return type;
}
const std::shared_ptr<Type>& Type::GetInt1Type() {
static const auto type = std::make_shared<Type>(Kind::Int1);
return type;
}
const std::shared_ptr<Type>& Type::GetInt32Type() {
static const std::shared_ptr<Type> type = std::make_shared<Type>(Kind::Int32);
static const auto type = std::make_shared<Type>(Kind::Int32);
return type;
}
const std::shared_ptr<Type>& Type::GetFloatType() {
static const auto type = std::make_shared<Type>(Kind::Float32);
return type;
}
std::shared_ptr<Type> Type::GetPointerType(std::shared_ptr<Type> element_type) {
if (!element_type) {
throw std::runtime_error("GetPointerType 缺少 element_type");
}
return std::make_shared<Type>(Kind::Pointer, std::move(element_type));
}
std::shared_ptr<Type> Type::GetArrayType(std::shared_ptr<Type> element_type,
size_t array_size) {
if (!element_type) {
throw std::runtime_error("GetArrayType 缺少 element_type");
}
return std::make_shared<Type>(Kind::Array, std::move(element_type), array_size);
}
std::shared_ptr<Type> Type::GetFunctionType(
std::shared_ptr<Type> return_type,
std::vector<std::shared_ptr<Type>> param_types) {
if (!return_type) {
throw std::runtime_error("GetFunctionType 缺少 return_type");
}
return std::make_shared<Type>(std::move(return_type), std::move(param_types));
}
const std::shared_ptr<Type>& Type::GetPtrInt32Type() {
static const std::shared_ptr<Type> type = std::make_shared<Type>(Kind::PtrInt32);
static const auto type = GetPointerType(GetInt32Type());
return type;
}
Type::Kind Type::GetKind() const { return kind_; }
const std::shared_ptr<Type>& Type::GetElementType() const { return element_type_; }
size_t Type::GetArraySize() const { return array_size_; }
const std::shared_ptr<Type>& Type::GetReturnType() const { return return_type_; }
const std::vector<std::shared_ptr<Type>>& Type::GetParamTypes() const {
return param_types_;
}
bool Type::IsVoid() const { return kind_ == Kind::Void; }
bool Type::IsInt1() const { return kind_ == Kind::Int1; }
bool Type::IsInt32() const { return kind_ == Kind::Int32; }
bool Type::IsPtrInt32() const { return kind_ == Kind::PtrInt32; }
bool Type::IsFloat32() const { return kind_ == Kind::Float32; }
bool Type::IsPointer() const { return kind_ == Kind::Pointer; }
bool Type::IsArray() const { return kind_ == Kind::Array; }
bool Type::IsFunction() const { return kind_ == Kind::Function; }
bool Type::IsScalar() const { return IsInt1() || IsInt32() || IsFloat32(); }
bool Type::IsInteger() const { return IsInt1() || IsInt32(); }
bool Type::IsNumeric() const { return IsInteger() || IsFloat32(); }
bool Type::IsPtrInt32() const {
return IsPointer() && element_type_ && element_type_->IsInt32();
}
bool Type::Equals(const Type& other) const {
if (kind_ != other.kind_) {
return false;
}
switch (kind_) {
case Kind::Void:
case Kind::Int1:
case Kind::Int32:
case Kind::Float32:
return true;
case Kind::Pointer:
return element_type_ && other.element_type_ &&
element_type_->Equals(*other.element_type_);
case Kind::Array:
return array_size_ == other.array_size_ && element_type_ &&
other.element_type_ && element_type_->Equals(*other.element_type_);
case Kind::Function:
if (!return_type_ || !other.return_type_ ||
!return_type_->Equals(*other.return_type_) ||
param_types_.size() != other.param_types_.size()) {
return false;
}
for (size_t i = 0; i < param_types_.size(); ++i) {
if (!param_types_[i] || !other.param_types_[i] ||
!param_types_[i]->Equals(*other.param_types_[i])) {
return false;
}
}
return true;
}
return false;
}
} // namespace ir

@ -1,9 +1,7 @@
// SSA 值体系抽象:
// - 常量、参数、指令结果等统一为 Value
// - 提供类型信息与使用/被使用关系(按需要实现)
#include "ir/IR.h"
#include <algorithm>
#include <stdexcept>
namespace ir {
@ -14,12 +12,22 @@ const std::shared_ptr<Type>& Value::GetType() const { return type_; }
const std::string& Value::GetName() const { return name_; }
void Value::SetName(std::string n) { name_ = std::move(n); }
void Value::SetName(std::string name) { name_ = std::move(name); }
bool Value::IsVoid() const { return type_ && type_->IsVoid(); }
bool Value::IsInt1() const { return type_ && type_->IsInt1(); }
bool Value::IsInt32() const { return type_ && type_->IsInt32(); }
bool Value::IsFloat32() const { return type_ && type_->IsFloat32(); }
bool Value::IsPointer() const { return type_ && type_->IsPointer(); }
bool Value::IsArray() const { return type_ && type_->IsArray(); }
bool Value::IsFunctionValue() const { return type_ && type_->IsFunction(); }
bool Value::IsPtrInt32() const { return type_ && type_->IsPtrInt32(); }
bool Value::IsConstant() const {
@ -30,27 +38,34 @@ bool Value::IsInstruction() const {
return dynamic_cast<const Instruction*>(this) != nullptr;
}
bool Value::IsUser() const {
return dynamic_cast<const User*>(this) != nullptr;
}
bool Value::IsUser() const { return dynamic_cast<const User*>(this) != nullptr; }
bool Value::IsFunction() const {
return dynamic_cast<const Function*>(this) != nullptr;
}
bool Value::IsGlobalVariable() const {
return dynamic_cast<const GlobalVariable*>(this) != nullptr;
}
bool Value::IsArgument() const {
return dynamic_cast<const Argument*>(this) != nullptr;
}
void Value::AddUse(User* user, size_t operand_index) {
if (!user) return;
uses_.push_back(Use(this, user, operand_index));
if (!user) {
return;
}
uses_.emplace_back(this, user, operand_index);
}
void Value::RemoveUse(User* user, size_t operand_index) {
uses_.erase(
std::remove_if(uses_.begin(), uses_.end(),
[&](const Use& use) {
return use.GetUser() == user &&
use.GetOperandIndex() == operand_index;
}),
uses_.end());
uses_.erase(std::remove_if(uses_.begin(), uses_.end(),
[&](const Use& use) {
return use.GetUser() == user &&
use.GetOperandIndex() == operand_index;
}),
uses_.end());
}
const std::vector<Use>& Value::GetUses() const { return uses_; }
@ -62,22 +77,39 @@ void Value::ReplaceAllUsesWith(Value* new_value) {
if (new_value == this) {
return;
}
auto uses = uses_;
for (const auto& use : uses) {
auto snapshot = uses_;
for (const auto& use : snapshot) {
auto* user = use.GetUser();
if (!user) continue;
size_t operand_index = use.GetOperandIndex();
if (user->GetOperand(operand_index) == this) {
user->SetOperand(operand_index, new_value);
if (!user) {
continue;
}
user->SetOperand(use.GetOperandIndex(), new_value);
}
}
ConstantValue::ConstantValue(std::shared_ptr<Type> ty, std::string name)
: Value(std::move(ty), std::move(name)) {}
ConstantInt::ConstantInt(std::shared_ptr<Type> ty, int v)
: ConstantValue(std::move(ty), ""), value_(v) {}
ConstantInt::ConstantInt(std::shared_ptr<Type> ty, int value)
: ConstantValue(std::move(ty), ""), value_(value) {}
ConstantFloat::ConstantFloat(std::shared_ptr<Type> ty, float value)
: ConstantValue(std::move(ty), ""), value_(value) {}
ConstantZero::ConstantZero(std::shared_ptr<Type> ty)
: ConstantValue(std::move(ty), "") {}
ConstantArray::ConstantArray(std::shared_ptr<Type> ty,
std::vector<ConstantValue*> elements)
: ConstantValue(std::move(ty), ""), elements_(std::move(elements)) {}
bool ConstantArray::IsZeroValue() const {
for (auto* element : elements_) {
if (!element || !element->IsZeroValue()) {
return false;
}
}
return true;
}
} // namespace ir

@ -1,107 +1,486 @@
#include "irgen/IRGen.h"
#include <cstdlib>
#include <functional>
#include <stdexcept>
#include "SysYParser.h"
#include "ir/IR.h"
#include "utils/Log.h"
namespace {
std::string GetLValueName(SysYParser::LValueContext& lvalue) {
if (!lvalue.ID()) {
throw std::runtime_error(FormatError("irgen", "非法左值"));
using ir::Type;
size_t ScalarCount(const std::shared_ptr<Type>& type) {
return type->IsArray() ? type->GetArraySize() * ScalarCount(type->GetElementType()) : 1;
}
std::shared_ptr<Type> ScalarLeafType(const std::shared_ptr<Type>& type) {
auto current = type;
while (current->IsArray()) {
current = current->GetElementType();
}
return lvalue.ID()->getText();
return current;
}
} // namespace
ConstantData ZeroForType(const std::shared_ptr<Type>& type) {
return type->IsFloat32() ? ConstantData::FromFloat(0.0f)
: ConstantData::FromInt(0);
}
ConstantData ParseNumberValue(const std::string& text) {
if (text.find_first_of(".pPeE") == std::string::npos) {
return ConstantData::FromInt(static_cast<int>(std::strtoll(text.c_str(), nullptr, 0)));
}
return ConstantData::FromFloat(std::strtof(text.c_str(), nullptr));
}
bool SameType(const std::shared_ptr<Type>& lhs, const std::shared_ptr<Type>& rhs) {
return lhs && rhs && lhs->Equals(*rhs);
}
ConstantData EvalGlobalConstAddExp(
SysYParser::AddExpContext& add,
const std::unordered_map<std::string, ConstantData>& const_values);
ConstantData EvalGlobalConstPrimary(
SysYParser::PrimaryContext& primary,
const std::unordered_map<std::string, ConstantData>& const_values) {
if (primary.Number()) {
return ParseNumberValue(primary.Number()->getText());
}
if (primary.exp()) {
return EvalGlobalConstAddExp(*primary.exp()->addExp(), const_values);
}
if (primary.lVal() && primary.lVal()->Ident() && primary.lVal()->exp().empty()) {
auto found = const_values.find(primary.lVal()->Ident()->getText());
if (found == const_values.end()) {
throw std::runtime_error(
FormatError("irgen", "全局初始化器引用了非常量符号: " +
primary.lVal()->Ident()->getText()));
}
return found->second;
}
throw std::runtime_error(
FormatError("irgen", "全局初始化器暂不支持该常量表达式"));
}
ConstantData EvalGlobalConstUnaryExp(
SysYParser::UnaryExpContext& unary,
const std::unordered_map<std::string, ConstantData>& const_values) {
if (unary.primary()) {
return EvalGlobalConstPrimary(*unary.primary(), const_values);
}
if (unary.unaryExp()) {
ConstantData value = EvalGlobalConstUnaryExp(*unary.unaryExp(), const_values);
const std::string op = unary.unaryOp()->getText();
if (op == "+") {
return value;
}
if (op == "-") {
return value.IsFloat() ? ConstantData::FromFloat(-value.AsFloat())
: ConstantData::FromInt(-value.AsInt());
}
if (op == "!") {
return ConstantData::FromInt(value.IsFloat() ? (value.AsFloat() == 0.0f)
: (value.AsInt() == 0));
}
}
throw std::runtime_error(FormatError("irgen", "全局初始化器不支持函数调用"));
}
ConstantData EvalGlobalConstMulExp(
SysYParser::MulExpContext& mul,
const std::unordered_map<std::string, ConstantData>& const_values) {
ConstantData acc = EvalGlobalConstUnaryExp(*mul.unaryExp(0), const_values);
for (size_t i = 1; i < mul.unaryExp().size(); ++i) {
ConstantData rhs = EvalGlobalConstUnaryExp(*mul.unaryExp(i), const_values);
const std::string op = mul.children[2 * i - 1]->getText();
if (op == "%") {
if (!acc.GetType()->IsInt32() || !rhs.GetType()->IsInt32()) {
throw std::runtime_error(FormatError("irgen", "% 只支持 int"));
}
acc = ConstantData::FromInt(acc.AsInt() % rhs.AsInt());
continue;
}
auto result_type =
(acc.GetType()->IsFloat32() || rhs.GetType()->IsFloat32()) ? Type::GetFloatType()
: Type::GetInt32Type();
acc = acc.CastTo(result_type);
rhs = rhs.CastTo(result_type);
if (result_type->IsFloat32()) {
float value = op == "*" ? acc.AsFloat() * rhs.AsFloat()
: acc.AsFloat() / rhs.AsFloat();
acc = ConstantData::FromFloat(value);
} else {
int value = op == "*" ? acc.AsInt() * rhs.AsInt()
: acc.AsInt() / rhs.AsInt();
acc = ConstantData::FromInt(value);
}
}
return acc;
}
ConstantData EvalGlobalConstAddExp(
SysYParser::AddExpContext& add,
const std::unordered_map<std::string, ConstantData>& const_values) {
ConstantData acc = EvalGlobalConstMulExp(*add.mulExp(0), const_values);
for (size_t i = 1; i < add.mulExp().size(); ++i) {
ConstantData rhs = EvalGlobalConstMulExp(*add.mulExp(i), const_values);
auto result_type =
(acc.GetType()->IsFloat32() || rhs.GetType()->IsFloat32()) ? Type::GetFloatType()
: Type::GetInt32Type();
acc = acc.CastTo(result_type);
rhs = rhs.CastTo(result_type);
if (result_type->IsFloat32()) {
float value = add.children[2 * i - 1]->getText() == "+"
? acc.AsFloat() + rhs.AsFloat()
: acc.AsFloat() - rhs.AsFloat();
acc = ConstantData::FromFloat(value);
} else {
int value = add.children[2 * i - 1]->getText() == "+"
? acc.AsInt() + rhs.AsInt()
: acc.AsInt() - rhs.AsInt();
acc = ConstantData::FromInt(value);
}
}
return acc;
}
std::any IRGenImpl::visitBlockStmt(SysYParser::BlockStmtContext* ctx) {
if (!ctx) {
throw std::runtime_error(FormatError("irgen", "缺少语句块"));
void FlattenInitValue(const std::shared_ptr<Type>& type, SysYParser::InitValContext& init,
std::vector<SysYParser::InitValContext*>& leaves,
size_t& cursor, size_t start) {
if (!type->IsArray()) {
if (cursor >= leaves.size()) {
throw std::runtime_error(FormatError("irgen", "初始化器过长"));
}
leaves[cursor++] = &init;
return;
}
if (init.exp()) {
if (cursor >= leaves.size()) {
throw std::runtime_error(FormatError("irgen", "初始化器过长"));
}
leaves[cursor++] = &init;
return;
}
auto elem_type = type->GetElementType();
const size_t elem_span = ScalarCount(elem_type);
for (auto* child : init.initVal()) {
if (!child) {
continue;
}
if (child->L_BRACE()) {
size_t rel = cursor - start;
if (rel % elem_span != 0) {
cursor += elem_span - (rel % elem_span);
}
size_t child_start = cursor;
FlattenInitValue(elem_type, *child, leaves, cursor, child_start);
cursor = child_start + elem_span;
} else {
if (cursor >= leaves.size()) {
throw std::runtime_error(FormatError("irgen", "初始化器过长"));
}
leaves[cursor++] = child;
}
}
}
void FlattenConstInitValue(const std::shared_ptr<Type>& type,
SysYParser::ConstInitValContext& init,
std::vector<SysYParser::ConstInitValContext*>& leaves,
size_t& cursor, size_t start) {
if (!type->IsArray()) {
if (cursor >= leaves.size()) {
throw std::runtime_error(FormatError("irgen", "初始化器过长"));
}
leaves[cursor++] = &init;
return;
}
for (auto* item : ctx->blockItem()) {
if (item) {
if (VisitBlockItemResult(*item) == BlockFlow::Terminated) {
// 当前语法要求 return 为块内最后一条语句;命中后可停止生成。
break;
if (init.constExp()) {
if (cursor >= leaves.size()) {
throw std::runtime_error(FormatError("irgen", "初始化器过长"));
}
leaves[cursor++] = &init;
return;
}
auto elem_type = type->GetElementType();
const size_t elem_span = ScalarCount(elem_type);
for (auto* child : init.constInitVal()) {
if (!child) {
continue;
}
if (child->L_BRACE()) {
size_t rel = cursor - start;
if (rel % elem_span != 0) {
cursor += elem_span - (rel % elem_span);
}
size_t child_start = cursor;
FlattenConstInitValue(elem_type, *child, leaves, cursor, child_start);
cursor = child_start + elem_span;
} else {
if (cursor >= leaves.size()) {
throw std::runtime_error(FormatError("irgen", "初始化器过长"));
}
leaves[cursor++] = child;
}
}
return {};
}
IRGenImpl::BlockFlow IRGenImpl::VisitBlockItemResult(
SysYParser::BlockItemContext& item) {
return std::any_cast<BlockFlow>(item.accept(this));
} // namespace
void IRGenImpl::GenGlobals(SysYParser::CompUnitContext& cu) {
for (auto* decl : cu.decl()) {
if (!decl) {
continue;
}
if (decl->constDecl()) {
for (auto* def : decl->constDecl()->constDef()) {
auto* symbol = sema_.ResolveConstDef(def);
auto* global = module_.CreateGlobal(
symbol->name, symbol->type,
BuildGlobalConstInitializer(symbol->type, def->constInitVal()), true);
globals_[symbol->name] = {global, symbol->type, false, true, true};
if (symbol->has_const_value) {
global_const_values_[symbol->name] = symbol->const_value;
}
}
} else if (decl->varDecl()) {
for (auto* def : decl->varDecl()->varDef()) {
auto* symbol = sema_.ResolveVarDef(def);
auto* global =
module_.CreateGlobal(symbol->name, symbol->type,
BuildGlobalInitializer(symbol->type, def->initVal()), false);
globals_[symbol->name] = {global, symbol->type, false, true, false};
}
}
}
}
std::any IRGenImpl::visitBlockItem(SysYParser::BlockItemContext* ctx) {
if (!ctx) {
throw std::runtime_error(FormatError("irgen", "缺少块内项"));
void IRGenImpl::GenDecl(SysYParser::DeclContext& decl) {
if (decl.constDecl()) {
GenConstDecl(*decl.constDecl());
return;
}
if (ctx->decl()) {
ctx->decl()->accept(this);
return BlockFlow::Continue;
if (decl.varDecl()) {
GenVarDecl(*decl.varDecl());
return;
}
if (ctx->stmt()) {
return ctx->stmt()->accept(this);
throw std::runtime_error(FormatError("irgen", "未知声明类型"));
}
void IRGenImpl::GenConstDecl(SysYParser::ConstDeclContext& decl) {
for (auto* def : decl.constDef()) {
auto* symbol = sema_.ResolveConstDef(def);
if (!symbol) {
throw std::runtime_error(FormatError("irgen", "const 声明缺少语义绑定"));
}
auto* slot =
builder_.CreateAlloca(symbol->type, module_.GetContext().NextTemp());
if (symbol->type->IsArray()) {
EmitLocalConstArrayInit(slot, symbol->type, *def->constInitVal());
} else {
ir::Value* value = GenAddExpr(*def->constInitVal()->constExp()->addExp());
value = CastValue(value, symbol->type);
builder_.CreateStore(value, slot);
}
DeclareLocal(symbol->name, {slot, symbol->type, false, false, true});
}
}
void IRGenImpl::GenVarDecl(SysYParser::VarDeclContext& decl) {
for (auto* def : decl.varDef()) {
auto* symbol = sema_.ResolveVarDef(def);
if (!symbol) {
throw std::runtime_error(FormatError("irgen", "变量声明缺少语义绑定"));
}
auto* slot =
builder_.CreateAlloca(symbol->type, module_.GetContext().NextTemp());
if (symbol->type->IsArray()) {
if (def->initVal()) {
EmitLocalArrayInit(slot, symbol->type, *def->initVal());
}
} else {
ir::Value* init = symbol->type->IsFloat32()
? static_cast<ir::Value*>(builder_.CreateConstFloat(0.0f))
: static_cast<ir::Value*>(builder_.CreateConstInt(0));
if (auto* init_val = def->initVal()) {
init = GenExpr(*init_val->exp());
init = CastValue(init, symbol->type);
}
builder_.CreateStore(init, slot);
}
DeclareLocal(symbol->name, {slot, symbol->type, false, false, false});
}
throw std::runtime_error(FormatError("irgen", "暂不支持的语句或声明"));
}
// 变量声明的 IR 生成目前也是最小实现:
// - 先检查声明的基础类型,当前仅支持局部 int
// - 再把 Decl 中的变量定义交给 visitVarDef 继续处理。
//
// 和更完整的版本相比,这里还没有:
// - 一个 Decl 中多个变量定义的顺序处理;
// - const、数组、全局变量等不同声明形态
// - 更丰富的类型系统。
std::any IRGenImpl::visitDecl(SysYParser::DeclContext* ctx) {
if (!ctx) {
throw std::runtime_error(FormatError("irgen", "缺少变量声明"));
void IRGenImpl::EmitArrayStore(ir::Value* base_ptr,
const std::shared_ptr<Type>& array_type,
size_t flat_index, ir::Value* value) {
auto indices = FlatIndexToIndices(array_type, flat_index);
std::vector<ir::Value*> gep_indices;
gep_indices.push_back(builder_.CreateConstInt(0));
for (int index : indices) {
gep_indices.push_back(builder_.CreateConstInt(index));
}
if (!ctx->btype() || !ctx->btype()->INT()) {
throw std::runtime_error(FormatError("irgen", "当前仅支持局部 int 变量声明"));
auto* addr =
builder_.CreateGEP(base_ptr, gep_indices, module_.GetContext().NextTemp());
builder_.CreateStore(CastValue(value, addr->GetType()->GetElementType()), addr);
}
void IRGenImpl::ZeroInitializeLocalArray(ir::Value* base_ptr,
const std::shared_ptr<Type>& array_type) {
const auto scalar_type = ScalarLeafType(array_type);
for (size_t i = 0; i < CountScalars(array_type); ++i) {
ir::Value* zero = scalar_type->IsFloat32()
? static_cast<ir::Value*>(builder_.CreateConstFloat(0.0f))
: static_cast<ir::Value*>(builder_.CreateConstInt(0));
EmitArrayStore(base_ptr, array_type, i, zero);
}
auto* var_def = ctx->varDef();
if (!var_def) {
throw std::runtime_error(FormatError("irgen", "非法变量声明"));
}
void IRGenImpl::EmitLocalArrayInit(ir::Value* base_ptr,
const std::shared_ptr<Type>& array_type,
SysYParser::InitValContext& init) {
ZeroInitializeLocalArray(base_ptr, array_type);
std::vector<SysYParser::InitValContext*> leaves(CountScalars(array_type), nullptr);
size_t cursor = 0;
FlattenInitValue(array_type, init, leaves, cursor, 0);
for (size_t i = 0; i < leaves.size(); ++i) {
if (!leaves[i] || !leaves[i]->exp()) {
continue;
}
EmitArrayStore(base_ptr, array_type, i, GenExpr(*leaves[i]->exp()));
}
var_def->accept(this);
return {};
}
void IRGenImpl::EmitLocalConstArrayInit(ir::Value* base_ptr,
const std::shared_ptr<Type>& array_type,
SysYParser::ConstInitValContext& init) {
ZeroInitializeLocalArray(base_ptr, array_type);
std::vector<SysYParser::ConstInitValContext*> leaves(CountScalars(array_type),
nullptr);
size_t cursor = 0;
FlattenConstInitValue(array_type, init, leaves, cursor, 0);
for (size_t i = 0; i < leaves.size(); ++i) {
if (!leaves[i] || !leaves[i]->constExp()) {
continue;
}
EmitArrayStore(base_ptr, array_type, i, GenAddExpr(*leaves[i]->constExp()->addExp()));
}
}
// 当前仍是教学用的最小版本,因此这里只支持:
// - 局部 int 变量;
// - 标量初始化;
// - 一个 VarDef 对应一个槽位。
std::any IRGenImpl::visitVarDef(SysYParser::VarDefContext* ctx) {
if (!ctx) {
throw std::runtime_error(FormatError("irgen", "缺少变量定义"));
ir::ConstantValue* IRGenImpl::BuildGlobalInitializer(const std::shared_ptr<Type>& type,
SysYParser::InitValContext* init) {
if (!init) {
return builder_.CreateZero(type);
}
if (!ctx->lValue()) {
throw std::runtime_error(FormatError("irgen", "变量声明缺少名称"));
if (!type->IsArray()) {
auto value = EvalGlobalConstAddExp(*init->exp()->addExp(), global_const_values_)
.CastTo(type);
return type->IsFloat32()
? static_cast<ir::ConstantValue*>(
module_.GetContext().GetConstFloat(value.AsFloat()))
: static_cast<ir::ConstantValue*>(
module_.GetContext().GetConstInt(value.AsInt()));
}
GetLValueName(*ctx->lValue());
if (storage_map_.find(ctx) != storage_map_.end()) {
throw std::runtime_error(FormatError("irgen", "声明重复生成存储槽位"));
const auto scalar_type = ScalarLeafType(type);
std::vector<ConstantData> flat(CountScalars(type), ZeroForType(scalar_type));
if (init->exp()) {
flat[0] =
EvalGlobalConstAddExp(*init->exp()->addExp(), global_const_values_)
.CastTo(scalar_type);
} else if (init->L_BRACE()) {
std::vector<SysYParser::InitValContext*> leaves(flat.size(), nullptr);
size_t cursor = 0;
FlattenInitValue(type, *init, leaves, cursor, 0);
for (size_t i = 0; i < leaves.size(); ++i) {
if (leaves[i] && leaves[i]->exp()) {
flat[i] = EvalGlobalConstAddExp(*leaves[i]->exp()->addExp(), global_const_values_)
.CastTo(scalar_type);
}
}
}
auto* slot = builder_.CreateAllocaI32(module_.GetContext().NextTemp());
storage_map_[ctx] = slot;
ir::Value* init = nullptr;
if (auto* init_value = ctx->initValue()) {
if (!init_value->exp()) {
throw std::runtime_error(FormatError("irgen", "当前不支持聚合初始化"));
size_t offset = 0;
std::function<ir::ConstantValue*(const std::shared_ptr<Type>&)> build =
[&](const std::shared_ptr<Type>& current) -> ir::ConstantValue* {
if (!current->IsArray()) {
ConstantData value = flat[offset++].CastTo(current);
return current->IsFloat32()
? static_cast<ir::ConstantValue*>(
module_.GetContext().GetConstFloat(value.AsFloat()))
: static_cast<ir::ConstantValue*>(
module_.GetContext().GetConstInt(value.AsInt()));
}
init = EvalExpr(*init_value->exp());
} else {
init = builder_.CreateConstInt(0);
std::vector<ir::ConstantValue*> elements;
bool all_zero = true;
for (size_t i = 0; i < current->GetArraySize(); ++i) {
auto* child = build(current->GetElementType());
all_zero = all_zero && child->IsZeroValue();
elements.push_back(child);
}
if (all_zero) {
return module_.GetContext().CreateOwnedConstant<ir::ConstantZero>(current);
}
return module_.GetContext().CreateOwnedConstant<ir::ConstantArray>(current,
elements);
};
return build(type);
}
ir::ConstantValue* IRGenImpl::BuildGlobalConstInitializer(
const std::shared_ptr<Type>& type, SysYParser::ConstInitValContext* init) {
if (!type->IsArray()) {
auto value =
EvalGlobalConstAddExp(*init->constExp()->addExp(), global_const_values_)
.CastTo(type);
return type->IsFloat32()
? static_cast<ir::ConstantValue*>(
module_.GetContext().GetConstFloat(value.AsFloat()))
: static_cast<ir::ConstantValue*>(
module_.GetContext().GetConstInt(value.AsInt()));
}
builder_.CreateStore(init, slot);
return {};
const auto scalar_type = ScalarLeafType(type);
std::vector<ConstantData> flat(CountScalars(type), ZeroForType(scalar_type));
std::vector<SysYParser::ConstInitValContext*> leaves(flat.size(), nullptr);
size_t cursor = 0;
FlattenConstInitValue(type, *init, leaves, cursor, 0);
for (size_t i = 0; i < leaves.size(); ++i) {
if (leaves[i] && leaves[i]->constExp()) {
flat[i] =
EvalGlobalConstAddExp(*leaves[i]->constExp()->addExp(), global_const_values_)
.CastTo(scalar_type);
}
}
size_t offset = 0;
std::function<ir::ConstantValue*(const std::shared_ptr<Type>&)> build =
[&](const std::shared_ptr<Type>& current) -> ir::ConstantValue* {
if (!current->IsArray()) {
ConstantData value = flat[offset++].CastTo(current);
return current->IsFloat32()
? static_cast<ir::ConstantValue*>(
module_.GetContext().GetConstFloat(value.AsFloat()))
: static_cast<ir::ConstantValue*>(
module_.GetContext().GetConstInt(value.AsInt()));
}
std::vector<ir::ConstantValue*> elements;
bool all_zero = true;
for (size_t i = 0; i < current->GetArraySize(); ++i) {
auto* child = build(current->GetElementType());
all_zero = all_zero && child->IsZeroValue();
elements.push_back(child);
}
if (all_zero) {
return module_.GetContext().CreateOwnedConstant<ir::ConstantZero>(current);
}
return module_.GetContext().CreateOwnedConstant<ir::ConstantArray>(current,
elements);
};
return build(type);
}

@ -2,14 +2,10 @@
#include <memory>
#include "SysYParser.h"
#include "ir/IR.h"
#include "utils/Log.h"
std::unique_ptr<ir::Module> GenerateIR(SysYParser::CompUnitContext& tree,
const SemanticContext& sema) {
auto module = std::make_unique<ir::Module>();
IRGenImpl gen(*module, sema);
tree.accept(&gen);
gen.Gen(tree);
return module;
}

@ -1,80 +1,293 @@
#include "irgen/IRGen.h"
#include <cstdlib>
#include <stdexcept>
#include "SysYParser.h"
#include "ir/IR.h"
#include "utils/Log.h"
// 表达式生成当前也只实现了很小的一个子集。
// 目前支持:
// - 整数字面量
// - 普通局部变量读取
// - 括号表达式
// - 二元加法
//
// 还未支持:
// - 减乘除与一元运算
// - 赋值表达式
// - 函数调用
// - 数组、指针、下标访问
// - 条件与比较表达式
// - ...
ir::Value* IRGenImpl::EvalExpr(SysYParser::ExpContext& expr) {
return std::any_cast<ir::Value*>(expr.accept(this));
namespace {
using ir::FCmpPred;
using ir::ICmpPred;
using ir::Opcode;
using ir::Type;
bool SameType(const std::shared_ptr<Type>& lhs, const std::shared_ptr<Type>& rhs) {
return lhs && rhs && lhs->Equals(*rhs);
}
std::shared_ptr<Type> ArithmeticType(const std::shared_ptr<Type>& lhs,
const std::shared_ptr<Type>& rhs) {
return (lhs->IsFloat32() || rhs->IsFloat32()) ? Type::GetFloatType()
: Type::GetInt32Type();
}
} // namespace
ir::Value* IRGenImpl::CastValue(ir::Value* value,
const std::shared_ptr<ir::Type>& dst_type) {
if (!value || !dst_type) {
throw std::runtime_error(FormatError("irgen", "CastValue 缺少参数"));
}
if (SameType(value->GetType(), dst_type)) {
return value;
}
if (value->GetType()->IsInt1() && dst_type->IsInt32()) {
return builder_.CreateZExt(value, dst_type, module_.GetContext().NextTemp());
}
if (value->GetType()->IsInt32() && dst_type->IsFloat32()) {
return builder_.CreateSIToFP(value, module_.GetContext().NextTemp());
}
if (value->GetType()->IsFloat32() && dst_type->IsInt32()) {
return builder_.CreateFPToSI(value, module_.GetContext().NextTemp());
}
throw std::runtime_error(FormatError("irgen", "不支持的类型转换"));
}
ir::Value* IRGenImpl::ToBool(ir::Value* value) {
if (!value) {
throw std::runtime_error(FormatError("irgen", "ToBool 缺少 value"));
}
if (value->GetType()->IsInt1()) {
return value;
}
if (value->GetType()->IsInt32()) {
return builder_.CreateICmp(ICmpPred::Ne, value, builder_.CreateConstInt(0),
module_.GetContext().NextTemp());
}
if (value->GetType()->IsFloat32()) {
return builder_.CreateFCmp(FCmpPred::One, value, builder_.CreateConstFloat(0.0f),
module_.GetContext().NextTemp());
}
throw std::runtime_error(FormatError("irgen", "条件表达式只支持 int/float"));
}
ir::Value* IRGenImpl::DecayArrayPointer(ir::Value* array_ptr) {
return builder_.CreateGEP(array_ptr,
{builder_.CreateConstInt(0), builder_.CreateConstInt(0)},
module_.GetContext().NextTemp());
}
ir::Value* IRGenImpl::GenExpr(SysYParser::ExpContext& expr) {
return GenAddExpr(*expr.addExp());
}
ir::Value* IRGenImpl::GenAddExpr(SysYParser::AddExpContext& add) {
ir::Value* acc = GenMulExpr(*add.mulExp(0));
for (size_t i = 1; i < add.mulExp().size(); ++i) {
ir::Value* rhs = GenMulExpr(*add.mulExp(i));
auto result_type = ArithmeticType(acc->GetType(), rhs->GetType());
acc = CastValue(acc, result_type);
rhs = CastValue(rhs, result_type);
const std::string op = add.children[2 * i - 1]->getText();
if (result_type->IsFloat32()) {
acc = builder_.CreateBinary(op == "+" ? Opcode::FAdd : Opcode::FSub, acc, rhs,
module_.GetContext().NextTemp());
} else {
acc = builder_.CreateBinary(op == "+" ? Opcode::Add : Opcode::Sub, acc, rhs,
module_.GetContext().NextTemp());
}
}
return acc;
}
std::any IRGenImpl::visitParenExp(SysYParser::ParenExpContext* ctx) {
if (!ctx || !ctx->exp()) {
throw std::runtime_error(FormatError("irgen", "非法括号表达式"));
ir::Value* IRGenImpl::GenMulExpr(SysYParser::MulExpContext& mul) {
ir::Value* acc = GenUnaryExpr(*mul.unaryExp(0));
for (size_t i = 1; i < mul.unaryExp().size(); ++i) {
ir::Value* rhs = GenUnaryExpr(*mul.unaryExp(i));
const std::string op = mul.children[2 * i - 1]->getText();
if (op == "%") {
acc = CastValue(acc, Type::GetInt32Type());
rhs = CastValue(rhs, Type::GetInt32Type());
acc = builder_.CreateBinary(Opcode::SRem, acc, rhs,
module_.GetContext().NextTemp());
continue;
}
auto result_type = ArithmeticType(acc->GetType(), rhs->GetType());
acc = CastValue(acc, result_type);
rhs = CastValue(rhs, result_type);
Opcode opcode = Opcode::Mul;
if (result_type->IsFloat32()) {
opcode = op == "*" ? Opcode::FMul : Opcode::FDiv;
} else {
opcode = op == "*" ? Opcode::Mul : Opcode::SDiv;
}
acc = builder_.CreateBinary(opcode, acc, rhs, module_.GetContext().NextTemp());
}
return EvalExpr(*ctx->exp());
return acc;
}
ir::Value* IRGenImpl::GenUnaryExpr(SysYParser::UnaryExpContext& unary) {
if (unary.primary()) {
return GenPrimary(*unary.primary());
}
if (unary.Ident()) {
auto* symbol = sema_.ResolveCall(&unary);
auto* callee = symbol ? module_.FindFunction(symbol->name) : nullptr;
if (!callee) {
throw std::runtime_error(FormatError("irgen", "函数声明缺失"));
}
std::vector<ir::Value*> args;
const auto& params = callee->GetFunctionType()->GetParamTypes();
if (unary.funcRParams()) {
for (size_t i = 0; i < unary.funcRParams()->exp().size(); ++i) {
auto* value = GenExpr(*unary.funcRParams()->exp(i));
args.push_back(CastValue(value, params[i]));
}
}
std::string name;
if (!callee->GetReturnType()->IsVoid()) {
name = module_.GetContext().NextTemp();
}
return builder_.CreateCall(callee, args, name);
}
if (unary.unaryExp()) {
const std::string op = unary.unaryOp()->getText();
auto* value = GenUnaryExpr(*unary.unaryExp());
if (op == "+") {
return value;
}
if (op == "-") {
if (value->GetType()->IsFloat32()) {
return builder_.CreateBinary(Opcode::FSub, builder_.CreateConstFloat(0.0f),
value, module_.GetContext().NextTemp());
}
value = CastValue(value, Type::GetInt32Type());
return builder_.CreateBinary(Opcode::Sub, builder_.CreateConstInt(0), value,
module_.GetContext().NextTemp());
}
if (op == "!") {
auto* bool_value = ToBool(value);
auto* as_i32 = builder_.CreateZExt(bool_value, Type::GetInt32Type(),
module_.GetContext().NextTemp());
auto* is_zero = builder_.CreateICmp(ICmpPred::Eq, as_i32,
builder_.CreateConstInt(0),
module_.GetContext().NextTemp());
return builder_.CreateZExt(is_zero, Type::GetInt32Type(),
module_.GetContext().NextTemp());
}
}
throw std::runtime_error(FormatError("irgen", "非法一元表达式"));
}
std::any IRGenImpl::visitNumberExp(SysYParser::NumberExpContext* ctx) {
if (!ctx || !ctx->number() || !ctx->number()->ILITERAL()) {
throw std::runtime_error(FormatError("irgen", "当前仅支持整数字面量"));
ir::Value* IRGenImpl::GenPrimary(SysYParser::PrimaryContext& primary) {
if (primary.Number()) {
const std::string text = primary.Number()->getText();
if (text.find_first_of(".pPeE") == std::string::npos) {
return builder_.CreateConstInt(static_cast<int>(std::strtoll(text.c_str(), nullptr, 0)));
}
return builder_.CreateConstFloat(std::strtof(text.c_str(), nullptr));
}
if (primary.exp()) {
return GenExpr(*primary.exp());
}
if (primary.lVal()) {
return GenLValueValue(*primary.lVal());
}
return static_cast<ir::Value*>(
builder_.CreateConstInt(std::stoi(ctx->number()->getText())));
throw std::runtime_error(FormatError("irgen", "非法 primary 表达式"));
}
// 变量使用的处理流程:
// 1. 先通过语义分析结果把变量使用绑定回声明;
// 2. 再通过 storage_map_ 找到该声明对应的栈槽位;
// 3. 最后生成 load把内存中的值读出来。
//
// 因此当前 IRGen 自己不再做名字查找,而是直接消费 Sema 的绑定结果。
std::any IRGenImpl::visitVarExp(SysYParser::VarExpContext* ctx) {
if (!ctx || !ctx->var() || !ctx->var()->ID()) {
throw std::runtime_error(FormatError("irgen", "当前仅支持普通整型变量"));
}
auto* decl = sema_.ResolveVarUse(ctx->var());
if (!decl) {
throw std::runtime_error(
FormatError("irgen",
"变量使用缺少语义绑定: " + ctx->var()->ID()->getText()));
}
auto it = storage_map_.find(decl);
if (it == storage_map_.end()) {
throw std::runtime_error(
FormatError("irgen",
"变量声明缺少存储槽位: " + ctx->var()->ID()->getText()));
}
return static_cast<ir::Value*>(
builder_.CreateLoad(it->second, module_.GetContext().NextTemp()));
ir::Value* IRGenImpl::GenRelExpr(SysYParser::RelExpContext& rel) {
ir::Value* acc = GenAddExpr(*rel.addExp(0));
for (size_t i = 1; i < rel.addExp().size(); ++i) {
ir::Value* rhs = GenAddExpr(*rel.addExp(i));
const std::string op = rel.children[2 * i - 1]->getText();
ir::Value* cmp = nullptr;
if (acc->GetType()->IsFloat32() || rhs->GetType()->IsFloat32()) {
acc = CastValue(acc, Type::GetFloatType());
rhs = CastValue(rhs, Type::GetFloatType());
FCmpPred pred = FCmpPred::Olt;
if (op == "<") pred = FCmpPred::Olt;
if (op == "<=") pred = FCmpPred::Ole;
if (op == ">") pred = FCmpPred::Ogt;
if (op == ">=") pred = FCmpPred::Oge;
cmp = builder_.CreateFCmp(pred, acc, rhs, module_.GetContext().NextTemp());
} else {
acc = CastValue(acc, Type::GetInt32Type());
rhs = CastValue(rhs, Type::GetInt32Type());
ICmpPred pred = ICmpPred::Slt;
if (op == "<") pred = ICmpPred::Slt;
if (op == "<=") pred = ICmpPred::Sle;
if (op == ">") pred = ICmpPred::Sgt;
if (op == ">=") pred = ICmpPred::Sge;
cmp = builder_.CreateICmp(pred, acc, rhs, module_.GetContext().NextTemp());
}
acc = builder_.CreateZExt(cmp, Type::GetInt32Type(), module_.GetContext().NextTemp());
}
return acc;
}
ir::Value* IRGenImpl::GenEqExpr(SysYParser::EqExpContext& eq) {
ir::Value* acc = GenRelExpr(*eq.relExp(0));
for (size_t i = 1; i < eq.relExp().size(); ++i) {
ir::Value* rhs = GenRelExpr(*eq.relExp(i));
const std::string op = eq.children[2 * i - 1]->getText();
ir::Value* cmp = nullptr;
if (acc->GetType()->IsFloat32() || rhs->GetType()->IsFloat32()) {
acc = CastValue(acc, Type::GetFloatType());
rhs = CastValue(rhs, Type::GetFloatType());
cmp = builder_.CreateFCmp(op == "==" ? FCmpPred::Oeq : FCmpPred::One, acc, rhs,
module_.GetContext().NextTemp());
} else {
acc = CastValue(acc, Type::GetInt32Type());
rhs = CastValue(rhs, Type::GetInt32Type());
cmp = builder_.CreateICmp(op == "==" ? ICmpPred::Eq : ICmpPred::Ne, acc, rhs,
module_.GetContext().NextTemp());
}
acc = builder_.CreateZExt(cmp, Type::GetInt32Type(), module_.GetContext().NextTemp());
}
return acc;
}
ir::Value* IRGenImpl::GenLValueAddress(SysYParser::LValContext& lval) {
auto* symbol = sema_.ResolveLVal(&lval);
if (!symbol) {
throw std::runtime_error(FormatError("irgen", "左值缺少语义绑定"));
}
auto* entry = LookupStorage(symbol->name);
if (!entry || !entry->storage) {
throw std::runtime_error(FormatError("irgen", "找不到变量存储: " + symbol->name));
}
auto current_type = entry->declared_type;
ir::Value* current_ptr = entry->storage;
if (entry->is_array_param) {
current_ptr = builder_.CreateLoad(entry->storage, module_.GetContext().NextTemp());
}
std::any IRGenImpl::visitAdditiveExp(SysYParser::AdditiveExpContext* ctx) {
if (!ctx || !ctx->exp(0) || !ctx->exp(1)) {
throw std::runtime_error(FormatError("irgen", "非法加法表达式"));
for (auto* index_expr : lval.exp()) {
auto* index = CastValue(GenExpr(*index_expr), Type::GetInt32Type());
if (current_type->IsArray()) {
current_ptr = builder_.CreateGEP(
current_ptr, {builder_.CreateConstInt(0), index},
module_.GetContext().NextTemp());
current_type = current_type->GetElementType();
continue;
}
if (current_type->IsPointer()) {
current_ptr =
builder_.CreateGEP(current_ptr, {index}, module_.GetContext().NextTemp());
current_type = current_type->GetElementType();
continue;
}
throw std::runtime_error(FormatError("irgen", "非法下标访问"));
}
return current_ptr;
}
ir::Value* IRGenImpl::GenLValueValue(SysYParser::LValContext& lval) {
auto result_type = sema_.ResolveExprType(&lval);
auto* addr = GenLValueAddress(lval);
if (!result_type) {
throw std::runtime_error(FormatError("irgen", "左值缺少结果类型"));
}
if (result_type->IsPointer()) {
if (SameType(addr->GetType(), result_type)) {
return addr;
}
if (addr->GetType()->GetElementType()->IsArray()) {
return DecayArrayPointer(addr);
}
}
ir::Value* lhs = EvalExpr(*ctx->exp(0));
ir::Value* rhs = EvalExpr(*ctx->exp(1));
return static_cast<ir::Value*>(
builder_.CreateBinary(ir::Opcode::Add, lhs, rhs,
module_.GetContext().NextTemp()));
return builder_.CreateLoad(addr, module_.GetContext().NextTemp());
}

@ -2,21 +2,19 @@
#include <stdexcept>
#include "SysYParser.h"
#include "ir/IR.h"
#include "utils/Log.h"
namespace {
void VerifyFunctionStructure(const ir::Function& func) {
// 当前 IRGen 仍是单入口、顺序生成;这里在生成结束后补一层块终结校验。
for (const auto& bb : func.GetBlocks()) {
if (!bb || !bb->HasTerminator()) {
throw std::runtime_error(
FormatError("irgen", "基本块未正确终结: " +
(bb ? bb->GetName() : std::string("<null>"))));
}
}
using ir::Type;
std::shared_ptr<Type> BuiltinFn(std::shared_ptr<Type> ret,
std::vector<std::shared_ptr<Type>> params) {
return Type::GetFunctionType(std::move(ret), std::move(params));
}
bool SameType(const std::shared_ptr<Type>& lhs, const std::shared_ptr<Type>& rhs) {
return lhs && rhs && lhs->Equals(*rhs);
}
} // namespace
@ -24,64 +22,187 @@ void VerifyFunctionStructure(const ir::Function& func) {
IRGenImpl::IRGenImpl(ir::Module& module, const SemanticContext& sema)
: module_(module),
sema_(sema),
func_(nullptr),
current_return_type_(Type::GetVoidType()),
builder_(module.GetContext(), nullptr) {}
// 编译单元的 IR 生成当前只实现了最小功能:
// - Module 已在 GenerateIR 中创建,这里只负责继续生成其中的内容;
// - 当前会读取编译单元中的函数定义,并交给 visitFuncDef 生成函数 IR
//
// 当前还没有实现:
// - 多个函数定义的遍历与生成;
// - 全局变量、全局常量的 IR 生成。
std::any IRGenImpl::visitCompUnit(SysYParser::CompUnitContext* ctx) {
if (!ctx) {
throw std::runtime_error(FormatError("irgen", "缺少编译单元"));
void IRGenImpl::Gen(SysYParser::CompUnitContext& cu) {
DeclareBuiltins();
GenGlobals(cu);
GenFunctionDecls(cu);
GenFunctionBodies(cu);
}
void IRGenImpl::DeclareBuiltins() {
const auto i32 = Type::GetInt32Type();
const auto f32 = Type::GetFloatType();
const auto void_ty = Type::GetVoidType();
const struct {
const char* name;
std::shared_ptr<Type> type;
} builtins[] = {
{"getint", BuiltinFn(i32, {})},
{"getch", BuiltinFn(i32, {})},
{"getfloat", BuiltinFn(f32, {})},
{"getarray", BuiltinFn(i32, {Type::GetPointerType(i32)})},
{"getfarray", BuiltinFn(i32, {Type::GetPointerType(f32)})},
{"putint", BuiltinFn(void_ty, {i32})},
{"putch", BuiltinFn(void_ty, {i32})},
{"putfloat", BuiltinFn(void_ty, {f32})},
{"putarray", BuiltinFn(void_ty, {i32, Type::GetPointerType(i32)})},
{"putfarray", BuiltinFn(void_ty, {i32, Type::GetPointerType(f32)})},
{"starttime", BuiltinFn(void_ty, {})},
{"stoptime", BuiltinFn(void_ty, {})},
};
for (const auto& builtin : builtins) {
if (!module_.FindFunction(builtin.name)) {
module_.CreateFunction(builtin.name, builtin.type, true);
}
}
}
void IRGenImpl::GenFunctionDecls(SysYParser::CompUnitContext& cu) {
for (auto* func : cu.funcDef()) {
if (!func || !func->Ident()) {
continue;
}
auto* symbol = sema_.ResolveFuncDef(func);
if (!symbol) {
throw std::runtime_error(FormatError("irgen", "缺少函数语义信息"));
}
auto* ir_func = module_.FindFunction(symbol->name);
if (ir_func) {
continue;
}
ir_func = module_.CreateFunction(symbol->name, symbol->type, false);
const auto& params = symbol->type->GetParamTypes();
for (size_t i = 0; i < params.size(); ++i) {
ir_func->AddArgument(params[i], "%arg" + std::to_string(i));
}
}
}
void IRGenImpl::GenFunctionBodies(SysYParser::CompUnitContext& cu) {
for (auto* func : cu.funcDef()) {
if (func) {
GenFuncDef(*func);
}
}
}
void IRGenImpl::GenFuncDef(SysYParser::FuncDefContext& func) {
auto* symbol = sema_.ResolveFuncDef(&func);
if (!symbol) {
throw std::runtime_error(FormatError("irgen", "函数缺少语义绑定"));
}
current_function_ = module_.FindFunction(symbol->name);
if (!current_function_) {
throw std::runtime_error(FormatError("irgen", "函数声明缺失: " + symbol->name));
}
auto* func = ctx->funcDef();
if (!func) {
throw std::runtime_error(FormatError("irgen", "缺少函数定义"));
current_return_type_ = symbol->type->GetReturnType();
auto* entry = current_function_->CreateBlock("entry");
auto* body = current_function_->CreateBlock("entry.body");
builder_.SetInsertPoint(body);
local_scopes_.clear();
break_targets_.clear();
continue_targets_.clear();
EnterScope();
if (auto* params = func.funcFParams()) {
const auto& args = current_function_->GetArguments();
for (size_t i = 0; i < params->funcFParam().size(); ++i) {
auto* param = params->funcFParam(i);
const auto* arg = args.at(i).get();
const std::string name = param->Ident()->getText();
auto* slot = builder_.CreateAlloca(arg->GetType(), module_.GetContext().NextTemp());
builder_.CreateStore(const_cast<ir::Argument*>(arg), slot);
DeclareLocal(name, {slot, arg->GetType(), !param->L_BRACK().empty(), false, false});
}
}
GenBlock(*func.block());
ExitScope();
ir::IRBuilder entry_builder(module_.GetContext(), entry);
entry_builder.CreateBr(body);
if (builder_.GetInsertBlock() && !builder_.GetInsertBlock()->HasTerminator()) {
if (current_return_type_->IsVoid()) {
builder_.CreateRetVoid();
} else if (current_return_type_->IsFloat32()) {
builder_.CreateRet(builder_.CreateConstFloat(0.0f));
} else {
builder_.CreateRet(builder_.CreateConstInt(0));
}
}
func->accept(this);
return {};
}
// 函数 IR 生成当前实现了:
// 1. 获取函数名;
// 2. 检查函数返回类型;
// 3. 在 Module 中创建 Function
// 4. 将 builder 插入点设置到入口基本块;
// 5. 继续生成函数体。
//
// 当前还没有实现:
// - 通用函数返回类型处理;
// - 形参列表遍历与参数类型收集;
// - FunctionType 这样的函数类型对象;
// - Argument/形式参数 IR 对象;
// - 入口块中的参数初始化逻辑。
// ...
// 因此这里目前只支持最小的“无参 int 函数”生成。
std::any IRGenImpl::visitFuncDef(SysYParser::FuncDefContext* ctx) {
if (!ctx) {
throw std::runtime_error(FormatError("irgen", "缺少函数定义"));
void IRGenImpl::EnterScope() { local_scopes_.emplace_back(); }
void IRGenImpl::ExitScope() {
if (!local_scopes_.empty()) {
local_scopes_.pop_back();
}
if (!ctx->blockStmt()) {
throw std::runtime_error(FormatError("irgen", "函数体为空"));
}
void IRGenImpl::EnsureInsertableBlock() {
if (!builder_.GetInsertBlock()) {
auto* block = current_function_->CreateBlock(module_.GetContext().NextBlock("dead"));
builder_.SetInsertPoint(block);
return;
}
if (!ctx->ID()) {
throw std::runtime_error(FormatError("irgen", "缺少函数名"));
if (builder_.GetInsertBlock()->HasTerminator()) {
auto* block = current_function_->CreateBlock(module_.GetContext().NextBlock("dead"));
builder_.SetInsertPoint(block);
}
if (!ctx->funcType() || !ctx->funcType()->INT()) {
throw std::runtime_error(FormatError("irgen", "当前仅支持无参 int 函数"));
}
void IRGenImpl::DeclareLocal(const std::string& name, StorageEntry entry) {
if (local_scopes_.empty()) {
EnterScope();
}
local_scopes_.back()[name] = std::move(entry);
}
IRGenImpl::StorageEntry* IRGenImpl::LookupStorage(const std::string& name) {
for (auto it = local_scopes_.rbegin(); it != local_scopes_.rend(); ++it) {
auto found = it->find(name);
if (found != it->end()) {
return &found->second;
}
}
auto global = globals_.find(name);
return global == globals_.end() ? nullptr : &global->second;
}
const IRGenImpl::StorageEntry* IRGenImpl::LookupStorage(const std::string& name) const {
for (auto it = local_scopes_.rbegin(); it != local_scopes_.rend(); ++it) {
auto found = it->find(name);
if (found != it->end()) {
return &found->second;
}
}
auto global = globals_.find(name);
return global == globals_.end() ? nullptr : &global->second;
}
func_ = module_.CreateFunction(ctx->ID()->getText(), ir::Type::GetInt32Type());
builder_.SetInsertPoint(func_->GetEntry());
storage_map_.clear();
size_t IRGenImpl::CountScalars(const std::shared_ptr<Type>& type) const {
if (!type->IsArray()) {
return 1;
}
return type->GetArraySize() * CountScalars(type->GetElementType());
}
ctx->blockStmt()->accept(this);
// 语义正确性主要由 sema 保证,这里只兜底检查 IR 结构是否合法。
VerifyFunctionStructure(*func_);
return {};
std::vector<int> IRGenImpl::FlatIndexToIndices(const std::shared_ptr<Type>& type,
size_t flat_index) const {
if (!type->IsArray()) {
return {};
}
size_t inner = CountScalars(type->GetElementType());
int current = static_cast<int>(flat_index / inner);
auto tail = FlatIndexToIndices(type->GetElementType(), flat_index % inner);
tail.insert(tail.begin(), current);
return tail;
}

@ -2,38 +2,164 @@
#include <stdexcept>
#include "SysYParser.h"
#include "ir/IR.h"
#include "utils/Log.h"
// 语句生成当前只实现了最小子集。
// 目前支持:
// - return <exp>;
//
// 还未支持:
// - 赋值语句
// - if / while 等控制流
// - 空语句、块语句嵌套分发之外的更多语句形态
void IRGenImpl::GenBlock(SysYParser::BlockContext& block) {
EnterScope();
for (auto* item : block.blockItem()) {
if (!item) {
continue;
}
EnsureInsertableBlock();
GenBlockItem(*item);
}
ExitScope();
}
void IRGenImpl::GenBlockItem(SysYParser::BlockItemContext& item) {
if (item.decl()) {
GenDecl(*item.decl());
return;
}
if (item.stmt()) {
GenStmt(*item.stmt());
return;
}
throw std::runtime_error(FormatError("irgen", "未知 block item"));
}
std::any IRGenImpl::visitStmt(SysYParser::StmtContext* ctx) {
if (!ctx) {
throw std::runtime_error(FormatError("irgen", "缺少语句"));
void IRGenImpl::GenStmt(SysYParser::StmtContext& stmt) {
if (stmt.assignStmt()) {
auto* assign = stmt.assignStmt();
auto* addr = GenLValueAddress(*assign->lVal());
auto* value = CastValue(GenExpr(*assign->exp()), addr->GetType()->GetElementType());
builder_.CreateStore(value, addr);
return;
}
if (ctx->returnStmt()) {
return ctx->returnStmt()->accept(this);
if (stmt.expStmt()) {
if (stmt.expStmt()->exp()) {
(void)GenExpr(*stmt.expStmt()->exp());
}
return;
}
if (stmt.block()) {
GenBlock(*stmt.block());
return;
}
if (stmt.ifStmt()) {
auto* then_block =
current_function_->CreateBlock(module_.GetContext().NextBlock("if.then"));
ir::BasicBlock* else_block = nullptr;
ir::BasicBlock* merge_block = nullptr;
if (stmt.ifStmt()->Else()) {
else_block =
current_function_->CreateBlock(module_.GetContext().NextBlock("if.else"));
merge_block =
current_function_->CreateBlock(module_.GetContext().NextBlock("if.end"));
GenCond(*stmt.ifStmt()->cond(), then_block, else_block);
builder_.SetInsertPoint(then_block);
GenStmt(*stmt.ifStmt()->stmt(0));
if (!builder_.GetInsertBlock()->HasTerminator()) {
builder_.CreateBr(merge_block);
}
builder_.SetInsertPoint(else_block);
GenStmt(*stmt.ifStmt()->stmt(1));
if (!builder_.GetInsertBlock()->HasTerminator()) {
builder_.CreateBr(merge_block);
}
builder_.SetInsertPoint(merge_block);
} else {
merge_block =
current_function_->CreateBlock(module_.GetContext().NextBlock("if.end"));
GenCond(*stmt.ifStmt()->cond(), then_block, merge_block);
builder_.SetInsertPoint(then_block);
GenStmt(*stmt.ifStmt()->stmt(0));
if (!builder_.GetInsertBlock()->HasTerminator()) {
builder_.CreateBr(merge_block);
}
builder_.SetInsertPoint(merge_block);
}
return;
}
if (stmt.whileStmt()) {
auto* cond_block =
current_function_->CreateBlock(module_.GetContext().NextBlock("while.cond"));
auto* body_block =
current_function_->CreateBlock(module_.GetContext().NextBlock("while.body"));
auto* exit_block =
current_function_->CreateBlock(module_.GetContext().NextBlock("while.end"));
builder_.CreateBr(cond_block);
builder_.SetInsertPoint(cond_block);
GenCond(*stmt.whileStmt()->cond(), body_block, exit_block);
break_targets_.push_back(exit_block);
continue_targets_.push_back(cond_block);
builder_.SetInsertPoint(body_block);
GenStmt(*stmt.whileStmt()->stmt());
if (!builder_.GetInsertBlock()->HasTerminator()) {
builder_.CreateBr(cond_block);
}
break_targets_.pop_back();
continue_targets_.pop_back();
builder_.SetInsertPoint(exit_block);
return;
}
if (stmt.breakStmt()) {
builder_.CreateBr(break_targets_.back());
return;
}
if (stmt.continueStmt()) {
builder_.CreateBr(continue_targets_.back());
return;
}
if (stmt.returnStmt()) {
if (!stmt.returnStmt()->exp()) {
builder_.CreateRetVoid();
return;
}
auto* value = GenExpr(*stmt.returnStmt()->exp());
builder_.CreateRet(CastValue(value, current_return_type_));
return;
}
throw std::runtime_error(FormatError("irgen", "暂不支持的语句类型"));
}
void IRGenImpl::GenCond(SysYParser::CondContext& cond, ir::BasicBlock* true_block,
ir::BasicBlock* false_block) {
GenLOrCond(*cond.lOrExp(), true_block, false_block);
}
std::any IRGenImpl::visitReturnStmt(SysYParser::ReturnStmtContext* ctx) {
if (!ctx) {
throw std::runtime_error(FormatError("irgen", "缺少 return 语句"));
void IRGenImpl::GenLOrCond(SysYParser::LOrExpContext& expr,
ir::BasicBlock* true_block,
ir::BasicBlock* false_block) {
const auto& terms = expr.lAndExp();
for (size_t i = 0; i + 1 < terms.size(); ++i) {
auto* next_block =
current_function_->CreateBlock(module_.GetContext().NextBlock("lor.rhs"));
GenLAndCond(*terms[i], true_block, next_block);
builder_.SetInsertPoint(next_block);
}
if (!ctx->exp()) {
throw std::runtime_error(FormatError("irgen", "return 缺少表达式"));
GenLAndCond(*terms.back(), true_block, false_block);
}
void IRGenImpl::GenLAndCond(SysYParser::LAndExpContext& expr,
ir::BasicBlock* true_block,
ir::BasicBlock* false_block) {
const auto& terms = expr.eqExp();
for (size_t i = 0; i + 1 < terms.size(); ++i) {
auto* next_block =
current_function_->CreateBlock(module_.GetContext().NextBlock("land.rhs"));
auto* value = ToBool(GenEqExpr(*terms[i]));
builder_.CreateCondBr(value, next_block, false_block);
builder_.SetInsertPoint(next_block);
}
ir::Value* v = EvalExpr(*ctx->exp());
builder_.CreateRet(v);
return BlockFlow::Terminated;
auto* value = ToBool(GenEqExpr(*terms.back()));
builder_.CreateCondBr(value, true_block, false_block);
}

@ -29,6 +29,9 @@ int main(int argc, char** argv) {
}
#if !COMPILER_PARSE_ONLY
if (!opts.emit_ir && !opts.emit_asm) {
return 0;
}
auto* comp_unit = dynamic_cast<SysYParser::CompUnitContext*>(antlr.tree);
if (!comp_unit) {
throw std::runtime_error(FormatError("main", "语法树根节点不是 compUnit"));

File diff suppressed because it is too large Load Diff

@ -1,17 +1,38 @@
// 维护局部变量声明的注册与查找。
#include "sem/SymbolTable.h"
void SymbolTable::Add(const std::string& name,
SysYParser::VarDefContext* decl) {
table_[name] = decl;
#include <stdexcept>
void SymbolTable::EnterScope() { scopes_.emplace_back(); }
void SymbolTable::ExitScope() {
if (scopes_.empty()) {
throw std::runtime_error("作用域栈为空,无法退出");
}
scopes_.pop_back();
}
bool SymbolTable::Declare(const std::string& name, const SymbolInfo* symbol) {
if (scopes_.empty()) {
EnterScope();
}
auto& scope = scopes_.back();
return scope.emplace(name, symbol).second;
}
bool SymbolTable::Contains(const std::string& name) const {
return table_.find(name) != table_.end();
const SymbolInfo* SymbolTable::Lookup(const std::string& name) const {
for (auto it = scopes_.rbegin(); it != scopes_.rend(); ++it) {
auto found = it->find(name);
if (found != it->end()) {
return found->second;
}
}
return nullptr;
}
SysYParser::VarDefContext* SymbolTable::Lookup(const std::string& name) const {
auto it = table_.find(name);
return it == table_.end() ? nullptr : it->second;
const SymbolInfo* SymbolTable::LookupCurrent(const std::string& name) const {
if (scopes_.empty()) {
return nullptr;
}
auto found = scopes_.back().find(name);
return found == scopes_.back().end() ? nullptr : found->second;
}

@ -1,4 +1,66 @@
// SysY 运行库实现:
// - 按实验/评测规范提供 I/O 等函数实现
// - 与编译器生成的目标代码链接,支撑运行时行为
#include "sylib.h"
#include <stdio.h>
#include <stdlib.h>
static float ReadFloatToken(void) {
char buffer[128] = {0};
if (scanf("%127s", buffer) != 1) {
return 0.0f;
}
return strtof(buffer, NULL);
}
int getint(void) {
int value = 0;
scanf("%d", &value);
return value;
}
int getch(void) {
return getchar();
}
float getfloat(void) { return ReadFloatToken(); }
int getarray(int a[]) {
int n = getint();
for (int i = 0; i < n; ++i) {
a[i] = getint();
}
return n;
}
int getfarray(float a[]) {
int n = getint();
for (int i = 0; i < n; ++i) {
a[i] = getfloat();
}
return n;
}
void putint(int x) { printf("%d", x); }
void putch(int x) { putchar(x); }
void putfloat(float x) { printf("%a", x); }
void putarray(int n, int a[]) {
printf("%d:", n);
for (int i = 0; i < n; ++i) {
printf(" %d", a[i]);
}
putchar('\n');
}
void putfarray(int n, float a[]) {
printf("%d:", n);
for (int i = 0; i < n; ++i) {
printf(" %a", a[i]);
}
putchar('\n');
}
void starttime(void) {}
void stoptime(void) {}

@ -1,4 +1,16 @@
// SysY 运行库头文件:
// - 声明运行库函数原型(供编译器生成 call 或链接阶段引用)
// - 与 sylib.c 配套,按规范逐步补齐声明
#pragma once
int getint(void);
int getch(void);
float getfloat(void);
int getarray(int a[]);
int getfarray(float a[]);
void putint(int x);
void putch(int x);
void putfloat(float x);
void putarray(int n, int a[]);
void putfarray(int n, float a[]);
void starttime(void);
void stoptime(void);

@ -0,0 +1,6 @@
int a[3] = 1;
float b[2] = 2.5;
int main() {
return a[0] + a[1] + a[2] + b[0] + b[1];
}

@ -0,0 +1,5 @@
const int a = 5 % 2.0;
int main() {
return a;
}

@ -0,0 +1,3 @@
int main( {
return 0;
}

@ -0,0 +1,4 @@
int main() {
int a = 1
return a;
}

@ -0,0 +1,3 @@
int main() {
else return 0;
}
Loading…
Cancel
Save