compiler
jakeallen 1 year ago
parent 80e0a0e081
commit ab4bd7a366

@ -0,0 +1,15 @@
cmake_minimum_required(VERSION 3.16)
add_subdirectory(parser)
add_subdirectory(ir)
add_subdirectory(opt)
add_subdirectory(riscv)
set(SOURCE_FILES main.cpp)
add_executable(carrotcompiler ${SOURCE_FILES})
set_target_properties(carrotcompiler PROPERTIES OUTPUT_NAME "compiler")
target_include_directories(compiler PRIVATE parser ir opt riscv)
target_link_libraries(compiler parser ir opt riscv)

@ -0,0 +1,7 @@
cmake_minimum_required(VERSION 3.21)
set(SOURCE_FILES "ir.cpp" "genIR.cpp")
add_library(ir STATIC ${SOURCE_FILES})
target_include_directories(ir PRIVATE "${PARSER_INCLUDE}")

File diff suppressed because it is too large Load Diff

@ -0,0 +1,210 @@
#pragma once
#include "ast.h"
#include "ir.h"
#include <map>
class Scope {
public:
// enter a new scope
void enter() { symbol.push_back({}); }
// exit a scope
void exit() { symbol.pop_back(); }
bool in_global() { return symbol.size() == 1; }
// push a name to scope
// return true if successful
// return false if this name already exits
// but func name could be same with variable name
bool push(std::string name, Value *val) {
bool result;
result = (symbol[symbol.size() - 1].insert({name, val})).second;
return result;
}
Value *find(std::string name) {
for (auto s = symbol.rbegin(); s != symbol.rend(); s++) {
auto iter = s->find(name);
if (iter != s->end()) {
return iter->second;
}
}
return nullptr;
}
private:
std::vector<std::map<std::string, Value *>> symbol;
};
class GenIR : public Visitor {
public:
void visit(CompUnitAST &ast) override;
void visit(DeclDefAST &ast) override;
void visit(DeclAST &ast) override;
void visit(DefAST &ast) override;
void visit(InitValAST &ast) override;
void visit(FuncDefAST &ast) override;
void visit(FuncFParamAST &ast) override;
void visit(BlockAST &ast) override;
void visit(BlockItemAST &ast) override;
void visit(StmtAST &ast) override;
void visit(ReturnStmtAST &ast) override;
void visit(SelectStmtAST &ast) override;
void visit(IterationStmtAST &ast) override;
void visit(AddExpAST &ast) override;
void visit(LValAST &ast) override;
void visit(MulExpAST &ast) override;
void visit(UnaryExpAST &ast) override;
void visit(PrimaryExpAST &ast) override;
void visit(CallAST &ast) override;
void visit(NumberAST &ast) override;
void visit(RelExpAST &ast) override;
void visit(EqExpAST &ast) override;
void visit(LAndExpAST &ast) override;
void visit(LOrExpAST &ast) override;
IRStmtBuilder *builder;
Scope scope;
std::unique_ptr<Module> module;
GenIR() {
module = std::unique_ptr<Module>(new Module());
builder = new IRStmtBuilder(nullptr, module.get());
auto TyVoid = module->void_ty_;
auto TyInt32 = module->int32_ty_;
auto TyInt32Ptr = module->get_pointer_type(module->int32_ty_);
auto TyFloat = module->float32_ty_;
auto TyFloatPtr = module->get_pointer_type(module->float32_ty_);
auto input_type = new FunctionType(TyInt32, {});
auto get_int = new Function(input_type, "getint", module.get());
input_type = new FunctionType(TyFloat, {});
auto get_float = new Function(input_type, "getfloat", module.get());
input_type = new FunctionType(TyInt32, {});
auto get_char = new Function(input_type, "getch", module.get());
std::vector<Type *> input_params;
std::vector<Type *>().swap(input_params);
input_params.push_back(TyInt32Ptr);
input_type = new FunctionType(TyInt32, input_params);
auto get_int_array = new Function(input_type, "getarray", module.get());
std::vector<Type *>().swap(input_params);
input_params.push_back(TyFloatPtr);
input_type = new FunctionType(TyInt32, input_params);
auto get_float_array = new Function(input_type, "getfarray", module.get());
std::vector<Type *> output_params;
std::vector<Type *>().swap(output_params);
output_params.push_back(TyInt32);
auto output_type = new FunctionType(TyVoid, output_params);
auto put_int = new Function(output_type, "putint", module.get());
std::vector<Type *>().swap(output_params);
output_params.push_back(TyFloat);
output_type = new FunctionType(TyVoid, output_params);
auto put_float = new Function(output_type, "putfloat", module.get());
std::vector<Type *>().swap(output_params);
output_params.push_back(TyInt32);
output_type = new FunctionType(TyVoid, output_params);
auto put_char = new Function(output_type, "putch", module.get());
std::vector<Type *>().swap(output_params);
output_params.push_back(TyInt32);
output_params.push_back(TyInt32Ptr);
output_type = new FunctionType(TyVoid, output_params);
auto put_int_array = new Function(output_type, "putarray", module.get());
std::vector<Type *>().swap(output_params);
output_params.push_back(TyInt32);
output_params.push_back(TyFloatPtr);
output_type = new FunctionType(TyVoid, output_params);
auto put_float_array = new Function(output_type, "putfarray", module.get());
output_params.clear();
// output_params.push_back(TyInt32);
auto time_type = new FunctionType(TyVoid, output_params);
auto sysy_start_time =
new Function(time_type, "_sysy_starttime", module.get());
auto sysy_stop_time =
new Function(time_type, "_sysy_stoptime", module.get());
output_params.clear();
output_params.push_back(TyInt32Ptr);
output_params.push_back(TyInt32Ptr);
output_params.push_back(TyInt32);
output_type = new FunctionType(TyVoid, output_params);
auto memcpy = new Function(output_type, "__aeabi_memcpy4", module.get());
output_params.clear();
output_params.push_back(TyInt32Ptr);
output_params.push_back(TyInt32);
output_type = new FunctionType(TyVoid, output_params);
auto memclr = new Function(output_type, "__aeabi_memclr4", module.get());
output_params.push_back(TyInt32);
output_type = new FunctionType(TyVoid, output_params);
auto memset = new Function(output_type, "__aeabi_memset4", module.get());
output_params.clear();
output_type = new FunctionType(TyVoid, output_params);
auto llvm_memset =
new Function(output_type, "llvm.memset.p0.i32", module.get());
// output_params.clear();
// output_params.push_back(TyInt32);
// output_type = new FunctionType(TyInt32, output_params);
// auto my_malloc = new Function(output_type, "malloc", module.get());
scope.enter();
scope.push("getint", get_int);
scope.push("getfloat", get_float);
scope.push("getch", get_char);
scope.push("getarray", get_int_array);
scope.push("getfarray", get_float_array);
scope.push("putint", put_int);
scope.push("putfloat", put_float);
scope.push("putch", put_char);
scope.push("putarray", put_int_array);
scope.push("putfarray", put_float_array);
scope.push("starttime", sysy_start_time);
scope.push("stoptime", sysy_stop_time);
scope.push("memcpy", memcpy);
scope.push("memclr", memclr);
scope.push("memset", memset);
scope.push("llvm.memset.p0.i32", llvm_memset);
// scope.push("malloc",my_malloc);
}
std::unique_ptr<Module> getModule() { return std::move(module); }
void checkInitType() const;
static int getNextDim(vector<int> &dimensionsCnt, int up, int cnt);
void localInit(Value *ptr, vector<unique_ptr<InitValAST>> &list,
vector<int> &dimensionsCnt, int up);
static int getNextDim(vector<int> &elementsCnts, int up);
ConstantArray *globalInit(vector<int> &dimensions,
vector<ArrayType *> &arrayTys, int up,
vector<unique_ptr<InitValAST>> &list);
static void mergeElements(vector<int> &dimensions,
vector<ArrayType *> &arrayTys, int up, int dimAdd,
vector<Constant *> &elements,
vector<int> &elementsCnts);
void finalMerge(vector<int> &dimensions, vector<ArrayType *> &arrayTys,
int up, vector<Constant *> &elements,
vector<int> &elementsCnts) const;
bool checkCalType(Value **val, int *intVal, float *floatVal);
void checkCalType(Value **val);
};

@ -0,0 +1,838 @@
#include "ir.h"
std::map<Instruction::OpID, std::string> instr_id2string_ = {
{Instruction::Ret, "ret"},
{Instruction::Br, "br"},
{Instruction::FNeg, "fneg"},
{Instruction::Add, "add"},
{Instruction::Sub, "sub"},
{Instruction::Mul, "mul"},
{Instruction::SDiv, "sdiv"},
{Instruction::SRem, "srem"},
{Instruction::UDiv, "udiv"},
{Instruction::URem, "urem"},
{Instruction::FAdd, "fadd"},
{Instruction::FSub, "fsub"},
{Instruction::FMul, "fmul"},
{Instruction::FDiv, "fdiv"},
{Instruction::Shl, "shl"},
{Instruction::LShr, "lshr"},
{Instruction::AShr, "ashr"},
{Instruction::And, "and"},
{Instruction::Or, "or"},
{Instruction::Xor, "xor"},
{Instruction::Alloca, "alloca"},
{Instruction::Load, "load"},
{Instruction::Store, "store"},
{Instruction::GetElementPtr, "getelementptr"},
{Instruction::ZExt, "zext"},
{Instruction::FPtoSI, "fptosi"},
{Instruction::SItoFP, "sitofp"},
{Instruction::BitCast, "bitcast"},
{Instruction::ICmp, "icmp"},
{Instruction::FCmp, "fcmp"},
{Instruction::PHI, "phi"},
{Instruction::Call, "call"}}; // Instruction from opid to string
const std::map<ICmpInst::ICmpOp, std::string> ICmpInst::ICmpOpName = {
{ICmpInst::ICmpOp::ICMP_EQ, "eq"}, {ICmpInst::ICmpOp::ICMP_NE, "ne"},
{ICmpInst::ICmpOp::ICMP_UGT, "hi"}, {ICmpInst::ICmpOp::ICMP_UGE, "cs"},
{ICmpInst::ICmpOp::ICMP_ULT, "cc"}, {ICmpInst::ICmpOp::ICMP_ULE, "ls"},
{ICmpInst::ICmpOp::ICMP_SGT, "gt"}, {ICmpInst::ICmpOp::ICMP_SGE, "ge"},
{ICmpInst::ICmpOp::ICMP_SLT, "lt"}, {ICmpInst::ICmpOp::ICMP_SLE, "le"}};
const std::map<FCmpInst::FCmpOp, std::string> FCmpInst::FCmpOpName = {
{FCmpInst::FCmpOp::FCMP_FALSE, "nv"}, {FCmpInst::FCmpOp::FCMP_OEQ, "eq"},
{FCmpInst::FCmpOp::FCMP_OGT, "gt"}, {FCmpInst::FCmpOp::FCMP_OGE, "ge"},
{FCmpInst::FCmpOp::FCMP_OLT, "cc"}, {FCmpInst::FCmpOp::FCMP_OLE, "ls"},
{FCmpInst::FCmpOp::FCMP_ONE, "ne"}, {FCmpInst::FCmpOp::FCMP_ORD, "vc"},
{FCmpInst::FCmpOp::FCMP_UNO, "vs"}, {FCmpInst::FCmpOp::FCMP_UEQ, "eq"},
{FCmpInst::FCmpOp::FCMP_UGT, "hi"}, {FCmpInst::FCmpOp::FCMP_UGE, "cs"},
{FCmpInst::FCmpOp::FCMP_ULT, "lt"}, {FCmpInst::FCmpOp::FCMP_ULE, "le"},
{FCmpInst::FCmpOp::FCMP_UNE, "ne"}, {FCmpInst::FCmpOp::FCMP_TRUE, "al"}};
std::string print_as_op(Value *v, bool print_ty);
std::string print_cmp_type(ICmpInst::ICmpOp op);
std::string print_fcmp_type(FCmpInst::FCmpOp op);
//-----------------------------------------------Type-----------------------------------------------
std::string Type::print() {
std::string type_ir;
switch (this->tid_) {
case VoidTyID:
type_ir += "void";
break;
case LabelTyID:
type_ir += "label";
break;
case IntegerTyID:
type_ir += "i";
type_ir += std::to_string(static_cast<IntegerType *>(this)->num_bits_);
break;
case FloatTyID:
type_ir += "float";
break;
case FunctionTyID:
type_ir += static_cast<FunctionType *>(this)->result_->print();
type_ir += " (";
for (size_t i = 0; i < static_cast<FunctionType *>(this)->args_.size();
i++) {
if (i)
type_ir += ", ";
type_ir += static_cast<FunctionType *>(this)->args_[i]->print();
}
type_ir += ")";
break;
case PointerTyID:
type_ir += static_cast<PointerType *>(this)->contained_->print();
type_ir += "*";
break;
case ArrayTyID:
type_ir += "[";
type_ir += std::to_string(static_cast<ArrayType *>(this)->num_elements_);
type_ir += " x ";
type_ir += static_cast<ArrayType *>(this)->contained_->print();
type_ir += "]";
break;
default:
break;
}
return type_ir;
}
//-----------------------------------------------Value-----------------------------------------------
void Value::replace_all_use_with(Value *new_val) {
for (auto use : use_list_) {
auto val = dynamic_cast<Instruction *>(use.val_);
#ifdef DEBUG
assert(val && "new_val is not a user");
#endif
val->set_operand(use.arg_no_, new_val);
}
}
bool Value::remove_used(Instruction *user, unsigned int i) {
if (this != user->operands_[i]) {
return false;
}
auto pos = user->use_pos_[i];
use_list_.erase(pos);
user->operands_[i] =
nullptr; // 表示user->use_pos_[i]失效了提示set_operand不要再删除
return true;
}
bool Value::is_constant() { return name_[0] == 0; }
//-----------------------------------------------Constant-----------------------------------------------
std::string ConstantInt::print() {
std::string const_ir;
if (this->type_->tid_ == Type::IntegerTyID &&
static_cast<IntegerType *>(this->type_)->num_bits_ == 1) {
// int1
const_ir += (this->value_ == 0) ? "0" : "1";
} else // int32
const_ir += std::to_string(this->value_);
return const_ir;
}
std::string ConstantFloat::print() {
std::stringstream fp_ir_ss;
std::string fp_ir;
double val = this->value_;
fp_ir_ss << "0x" << std::hex << *(uint64_t *)&val << std::endl;
fp_ir_ss >> fp_ir;
return fp_ir;
}
std::string ConstantFloat::print32() {
std::stringstream fp_ir_ss;
std::string fp_ir;
float val = this->value_;
fp_ir_ss << "0x" << std::hex << *(uint32_t *)&val << std::endl;
fp_ir_ss >> fp_ir;
return fp_ir;
}
std::string ConstantArray::print() {
std::string const_ir;
const_ir += "[";
const_ir += static_cast<ArrayType *>(this->type_)->contained_->print();
const_ir += " ";
const_ir += const_array[0]->print();
for (size_t i = 1; i < this->const_array.size(); i++) {
const_ir += ", ";
const_ir += static_cast<ArrayType *>(this->type_)->contained_->print();
const_ir += " ";
const_ir += const_array[i]->print();
}
const_ir += "]";
return const_ir;
}
std::string ConstantZero::print() { return "zeroinitializer"; }
//-----------------------------------------------Module-----------------------------------------------
std::string Module::print() {
std::string module_ir;
for (auto global_val : this->global_list_) {
module_ir += global_val->print();
module_ir += "\n";
}
for (auto func : this->function_list_) {
module_ir += func->print();
module_ir += "\n";
}
return module_ir;
}
Function *Module::getMainFunc() {
for (auto f : function_list_) {
if (f->name_ == "main") {
return f;
}
}
return nullptr;
}
//-----------------------------------------------GlobalVariable-----------------------------------------------
std::string GlobalVariable::print() {
std::string global_val_ir;
global_val_ir += print_as_op(this, false);
global_val_ir += " = ";
global_val_ir += (this->is_const_ ? "constant " : "global ");
global_val_ir += static_cast<PointerType *>(this->type_)->contained_->print();
global_val_ir += " ";
global_val_ir += this->init_val_->print();
return global_val_ir;
}
//-----------------------------------------------Function-----------------------------------------------
std::string Function::print() {
if (this->name_ == "llvm.memset.p0.i32") {
std::string func_ir = "declare void @llvm.memset.p0.i32(i32*, i8, i32, i1)";
return func_ir;
}
set_instr_name();
std::string func_ir;
if (this->is_declaration())
func_ir += "declare ";
else
func_ir += "define ";
func_ir += this->get_return_type()->print();
func_ir += " ";
func_ir += print_as_op(this, false);
func_ir += "(";
// print arg
if (this->is_declaration()) {
for (size_t i = 0; i < this->arguments_.size(); i++) {
if (i)
func_ir += ", ";
func_ir += static_cast<FunctionType *>(this->type_)->args_[i]->print();
}
} else {
for (auto arg = this->arguments_.begin(); arg != arguments_.end(); arg++) {
if (arg != this->arguments_.begin()) {
func_ir += ", ";
}
func_ir += static_cast<Argument *>(*arg)->print();
}
}
func_ir += ")";
// print bb
if (!this->is_declaration()) {
func_ir += " {";
func_ir += "\n";
for (auto bb : this->basic_blocks_) {
func_ir += bb->print();
}
func_ir += "}";
}
return func_ir;
}
std::string Argument::print() {
std::string arg_ir;
arg_ir += this->type_->print();
arg_ir += " %";
arg_ir += this->name_;
return arg_ir;
}
void Function::remove_bb(BasicBlock *bb) {
// basic_blocks_.remove(bb);
basic_blocks_.erase(
std::remove(basic_blocks_.begin(), basic_blocks_.end(), bb),
basic_blocks_.end());
for (auto pre : bb->pre_bbs_) {
pre->remove_succ_basic_block(bb);
}
for (auto succ : bb->succ_bbs_) {
succ->remove_pre_basic_block(bb);
}
}
BasicBlock *Function::getRetBB() {
for (auto bb : basic_blocks_) {
if (bb->get_terminator()->is_ret()) {
return bb;
}
}
return nullptr;
}
//-----------------------------------------------BasicBlock-----------------------------------------------
std::string BasicBlock::print() {
std::string bb_ir;
bb_ir += this->name_;
bb_ir += ":";
// print prebb
if (!this->pre_bbs_.empty()) {
bb_ir += " ; preds = ";
}
for (auto bb : this->pre_bbs_) {
if (bb != *this->pre_bbs_.begin())
bb_ir += ", ";
bb_ir += print_as_op(bb, false);
}
// print prebb
if (!this->parent_) {
bb_ir += "\n";
bb_ir += "; Error: Block without parent!";
}
bb_ir += "\n";
for (auto instr : this->instr_list_) {
bb_ir += " ";
bb_ir += instr->print();
bb_ir += "\n";
}
return bb_ir;
}
Instruction *BasicBlock::get_terminator() {
if (instr_list_.empty())
return nullptr;
switch (instr_list_.back()->op_id_) {
case Instruction::Ret:
case Instruction::Br:
return instr_list_.back();
default:
return nullptr;
}
}
bool BasicBlock::delete_instr(Instruction *instr) {
//******************--------instvec2list-----qwc20220814
// instr_list_.remove(instr);
if ((!instr) || instr->pos_in_bb.size() != 1 || instr->parent_ != this)
return false;
this->instr_list_.erase(instr->pos_in_bb.back());
// instr_list_.erase(std::remove(instr_list_.begin(), instr_list_.end(),
// instr) , instr_list_.end());
instr->remove_use_of_ops();
instr->pos_in_bb.clear(); // 保证指令自由身
instr->parent_ = nullptr;
return true;
}
bool BasicBlock::add_instruction(Instruction *instr) {
//******************--------instvec2list-----qwc20220814
if (instr->pos_in_bb.size() != 0) { // 指令已经插入到某个地方了
return false;
} else {
instr_list_.push_back(instr);
std::list<Instruction *>::iterator tail = instr_list_.end();
instr->pos_in_bb.emplace_back(--tail);
instr->parent_ = this;
return true;
}
}
bool BasicBlock::add_instruction_front(Instruction *instr) {
//******************--------instvec2list-----qwc20220814
if (instr->pos_in_bb.size() != 0) { // 指令已经插入到某个地方了
return false;
} else {
instr_list_.push_front(instr);
std::list<Instruction *>::iterator head = instr_list_.begin();
instr->pos_in_bb.emplace_back(head);
instr->parent_ = this;
return true;
}
}
// 插入到倒数第二位
bool BasicBlock::add_instruction_before_terminator(Instruction *instr) {
if (instr->pos_in_bb.size() != 0) { // 指令已经插入到某个地方了
return false;
} else if (instr_list_.empty()) { // 没有“倒数第1位”何来的倒数第二位
return false;
} else {
auto it = std::end(instr_list_); // 最后一位的后一位位置
instr_list_.emplace(
--it,
instr); // 插入使得代替最后一位(--it的位置此时it是最后一位插入的是倒数第二位
instr->pos_in_bb.emplace_back(--it); // 记录插入的结果
instr->parent_ = this;
return true;
}
}
bool BasicBlock::add_instruction_before_inst(Instruction *new_instr,
Instruction *instr) {
if ((!instr) || instr->pos_in_bb.size() != 1 || instr->parent_ != this)
return false;
if (new_instr->pos_in_bb.size() != 0) // 指令已经插入到某个地方了
return false;
else if (
instr_list_
.empty()) // bb原本没有指令那instr不在bb内那为啥instr->parent_==
// this
return false;
else {
auto it = instr->pos_in_bb.back(); //
instr_list_.emplace(
it,
new_instr); // 插入使得代替最后一位(--it的位置此时it是最后一位插入的是倒数第二位
new_instr->pos_in_bb.emplace_back(--it); // 记录插入的结果
new_instr->parent_ = this;
return true;
}
}
// 从bb移出一个指令但是不删指令的use关系因为还要插入其他bb
bool BasicBlock::remove_instr(Instruction *instr) {
// instr_list_.remove(instr);
if ((!instr) || instr->pos_in_bb.size() != 1 || instr->parent_ != this)
return false;
this->instr_list_.erase(instr->pos_in_bb.back());
// instr->remove_use_of_ops();
instr->pos_in_bb.clear(); // 保证指令自由身
instr->parent_ = nullptr;
return true;
}
//-----------------------------------------------Instruction-----------------------------------------------
std::string BinaryInst::print() {
std::string instr_ir;
instr_ir += "%";
instr_ir += this->name_;
instr_ir += " = ";
instr_ir += instr_id2string_[this->op_id_];
instr_ir += " ";
instr_ir += this->operands_[0]->type_->print();
instr_ir += " ";
instr_ir += print_as_op(this->get_operand(0), false);
instr_ir += ", ";
assert(this->get_operand(0)->type_->tid_ ==
this->get_operand(1)->type_->tid_);
instr_ir += print_as_op(this->get_operand(1), false);
// instr_ir += print_as_op(this->get_operand(1), true);
return instr_ir;
}
std::string UnaryInst::print() {
std::string instr_ir;
instr_ir += "%";
instr_ir += this->name_;
instr_ir += " = ";
instr_ir += instr_id2string_[this->op_id_];
instr_ir += " ";
instr_ir += this->operands_[0]->type_->print();
instr_ir += " ";
instr_ir += print_as_op(this->get_operand(0), false);
switch (this->op_id_) {
case Instruction::ZExt:
assert(this->type_->tid_ == Type::IntegerTyID);
instr_ir += " to i32";
break;
case Instruction::FPtoSI:
assert(this->type_->tid_ == Type::IntegerTyID);
instr_ir += " to i32";
break;
case Instruction::SItoFP:
assert(this->type_->tid_ == Type::FloatTyID);
instr_ir += " to float";
break;
default:
assert(0 && "UnaryInst opID invalid!");
break;
}
return instr_ir;
}
std::string ICmpInst::print() {
std::string instr_ir;
instr_ir += "%";
instr_ir += this->name_;
instr_ir += " = ";
instr_ir += instr_id2string_[this->op_id_];
instr_ir += " ";
instr_ir += print_cmp_type(this->icmp_op_);
instr_ir += " ";
instr_ir += this->get_operand(0)->type_->print();
instr_ir += " ";
instr_ir += print_as_op(this->get_operand(0), false);
instr_ir += ", ";
if (this->get_operand(0)->type_->tid_ == this->get_operand(1)->type_->tid_) {
instr_ir += print_as_op(this->get_operand(1), false);
} else {
instr_ir += print_as_op(this->get_operand(1), true);
}
return instr_ir;
}
std::string FCmpInst::print() {
std::string instr_ir;
instr_ir += "%";
instr_ir += this->name_;
instr_ir += " = ";
instr_ir += instr_id2string_[this->op_id_];
instr_ir += " ";
instr_ir += print_fcmp_type(this->fcmp_op_);
instr_ir += " ";
instr_ir += this->get_operand(0)->type_->print();
instr_ir += " ";
instr_ir += print_as_op(this->get_operand(0), false);
instr_ir += ", ";
if (this->get_operand(0)->type_->tid_ == this->get_operand(1)->type_->tid_) {
instr_ir += print_as_op(this->get_operand(1), false);
} else {
instr_ir += print_as_op(this->get_operand(1), true);
}
return instr_ir;
}
std::string CallInst::print() {
std::string instr_ir;
if (!(this->type_->tid_ == Type::VoidTyID)) {
instr_ir += "%";
instr_ir += this->name_;
instr_ir += " = ";
}
instr_ir += instr_id2string_[this->op_id_];
instr_ir += " ";
unsigned int numops = this->num_ops_;
instr_ir += static_cast<FunctionType *>(this->get_operand(numops - 1)->type_)
->result_->print();
instr_ir += " ";
assert(dynamic_cast<Function *>(this->get_operand(numops - 1)) &&
"Wrong call operand function");
//__aeabi_memclr4 -> llvm_memset
if (dynamic_cast<Function *>(this->get_operand(numops - 1))->name_ ==
"__aeabi_memclr4") {
instr_ir += "@llvm.memset.p0.i32(";
// i32* 目的内存地址
instr_ir += this->get_operand(0)->type_->print();
instr_ir += " ";
instr_ir += print_as_op(this->get_operand(0), false);
// i8 0
instr_ir += ", i8 0, ";
// i32 修改总字节数
instr_ir += this->get_operand(1)->type_->print();
instr_ir += " ";
instr_ir += print_as_op(this->get_operand(1), false);
// i1 false
instr_ir += ", i1 false)";
return instr_ir;
}
instr_ir += print_as_op(this->get_operand(numops - 1), false);
instr_ir += "(";
for (unsigned int i = 0; i < numops - 1; i++) {
if (i > 0)
instr_ir += ", ";
instr_ir += this->get_operand(i)->type_->print();
instr_ir += " ";
instr_ir += print_as_op(this->get_operand(i), false);
}
instr_ir += ")";
return instr_ir;
}
std::string BranchInst::print() {
std::string instr_ir;
instr_ir += instr_id2string_[this->op_id_];
instr_ir += " ";
instr_ir += print_as_op(this->get_operand(0), true);
if (this->num_ops_ == 3) {
instr_ir += ", ";
instr_ir += print_as_op(this->get_operand(1), true);
instr_ir += ", ";
instr_ir += print_as_op(this->get_operand(2), true);
}
return instr_ir;
}
std::string ReturnInst::print() {
std::string instr_ir;
instr_ir += instr_id2string_[this->op_id_];
instr_ir += " ";
if (this->num_ops_ != 0) {
instr_ir += this->get_operand(0)->type_->print();
instr_ir += " ";
instr_ir += print_as_op(this->get_operand(0), false);
} else {
instr_ir += "void";
}
return instr_ir;
}
std::string GetElementPtrInst::print() {
std::string instr_ir;
instr_ir += "%";
instr_ir += this->name_;
instr_ir += " = ";
instr_ir += instr_id2string_[this->op_id_];
instr_ir += " ";
assert(this->get_operand(0)->type_->tid_ == Type::PointerTyID);
instr_ir += static_cast<PointerType *>(this->get_operand(0)->type_)
->contained_->print();
instr_ir += ", ";
for (unsigned int i = 0; i < this->num_ops_; i++) {
if (i > 0)
instr_ir += ", ";
instr_ir += this->get_operand(i)->type_->print();
instr_ir += " ";
instr_ir += print_as_op(this->get_operand(i), false);
}
return instr_ir;
}
std::string StoreInst::print() {
std::string instr_ir;
instr_ir += instr_id2string_[this->op_id_];
instr_ir += " ";
instr_ir += this->get_operand(0)->type_->print();
instr_ir += " ";
instr_ir += print_as_op(this->get_operand(0), false);
instr_ir += ", ";
instr_ir += print_as_op(this->get_operand(1), true);
return instr_ir;
}
std::string LoadInst::print() {
std::string instr_ir;
instr_ir += "%";
instr_ir += this->name_;
instr_ir += " = ";
instr_ir += instr_id2string_[this->op_id_];
instr_ir += " ";
assert(this->get_operand(0)->type_->tid_ == Type::PointerTyID);
instr_ir += static_cast<PointerType *>(this->get_operand(0)->type_)
->contained_->print();
instr_ir += ",";
instr_ir += " ";
instr_ir += print_as_op(this->get_operand(0), true);
return instr_ir;
}
std::string AllocaInst::print() {
std::string instr_ir;
instr_ir += "%";
instr_ir += this->name_;
instr_ir += " = ";
instr_ir += instr_id2string_[this->op_id_];
instr_ir += " ";
instr_ir += alloca_ty_->print();
return instr_ir;
}
std::string ZextInst::print() {
std::string instr_ir;
instr_ir += "%";
instr_ir += this->name_;
instr_ir += " = ";
instr_ir += instr_id2string_[this->op_id_];
instr_ir += " ";
instr_ir += this->get_operand(0)->type_->print();
instr_ir += " ";
instr_ir += print_as_op(this->get_operand(0), false);
instr_ir += " to ";
instr_ir += this->dest_ty_->print();
return instr_ir;
}
std::string FpToSiInst::print() {
std::string instr_ir;
instr_ir += "%";
instr_ir += this->name_;
instr_ir += " = ";
instr_ir += instr_id2string_[this->op_id_];
instr_ir += " ";
instr_ir += this->get_operand(0)->type_->print();
instr_ir += " ";
instr_ir += print_as_op(this->get_operand(0), false);
instr_ir += " to ";
instr_ir += this->dest_ty_->print();
return instr_ir;
}
std::string SiToFpInst::print() {
std::string instr_ir;
instr_ir += "%";
instr_ir += this->name_;
instr_ir += " = ";
instr_ir += instr_id2string_[this->op_id_];
instr_ir += " ";
instr_ir += this->get_operand(0)->type_->print();
instr_ir += " ";
instr_ir += print_as_op(this->get_operand(0), false);
instr_ir += " to ";
instr_ir += this->dest_ty_->print();
return instr_ir;
}
std::string Bitcast::print() {
std::string instr_ir;
instr_ir += "%";
instr_ir += this->name_;
instr_ir += " = ";
instr_ir += instr_id2string_[this->op_id_];
instr_ir += " ";
instr_ir += this->get_operand(0)->type_->print();
instr_ir += " ";
instr_ir += print_as_op(this->get_operand(0), false);
instr_ir += " to ";
instr_ir += this->dest_ty_->print();
return instr_ir;
}
std::string PhiInst::print() {
std::string instr_ir;
instr_ir += "%";
instr_ir += this->name_;
instr_ir += " = ";
instr_ir += instr_id2string_[this->op_id_];
instr_ir += " ";
instr_ir += this->get_operand(0)->type_->print();
instr_ir += " ";
for (int i = 0; i < this->num_ops_ / 2; i++) {
if (i > 0)
instr_ir += ", ";
instr_ir += "[ ";
instr_ir += print_as_op(this->get_operand(2 * i), false);
instr_ir += ", ";
instr_ir += print_as_op(this->get_operand(2 * i + 1), false);
instr_ir += " ]";
}
if (this->num_ops_ / 2 < this->parent_->pre_bbs_.size()) {
for (auto pre_bb : this->parent_->pre_bbs_) {
if (std::find(this->operands_.begin(), this->operands_.end(),
static_cast<Value *>(pre_bb)) == this->operands_.end()) {
// find a pre_bb is not in phi
instr_ir += ", [ undef, " + print_as_op(pre_bb, false) + " ]";
}
}
}
return instr_ir;
}
std::string print_as_op(Value *v, bool print_ty) {
std::string op_ir;
if (print_ty) {
op_ir += v->type_->print();
op_ir += " ";
}
if (dynamic_cast<GlobalVariable *>(v)) {
op_ir += "@" + v->name_;
} else if (dynamic_cast<Function *>(v)) {
op_ir += "@" + v->name_;
} else if (dynamic_cast<Constant *>(v)) {
op_ir += v->print();
} else {
op_ir += "%" + v->name_;
}
return op_ir;
}
std::string print_cmp_type(ICmpInst::ICmpOp op) {
switch (op) {
case ICmpInst::ICMP_SGE:
return "sge";
break;
case ICmpInst::ICMP_SGT:
return "sgt";
break;
case ICmpInst::ICMP_SLE:
return "sle";
break;
case ICmpInst::ICMP_SLT:
return "slt";
break;
case ICmpInst::ICMP_EQ:
return "eq";
break;
case ICmpInst::ICMP_NE:
return "ne";
break;
default:
break;
}
return "wrong cmpop";
}
std::string print_fcmp_type(FCmpInst::FCmpOp op) {
switch (op) {
case FCmpInst::FCMP_UGE:
return "uge";
break;
case FCmpInst::FCMP_UGT:
return "ugt";
break;
case FCmpInst::FCMP_ULE:
return "ule";
break;
case FCmpInst::FCMP_ULT:
return "ult";
break;
case FCmpInst::FCMP_UEQ:
return "ueq";
break;
case FCmpInst::FCMP_UNE:
return "une";
break;
default:
break;
}
return "wrong fcmpop";
}
void Function::set_instr_name() {
std::map<Value *, int> seq;
for (auto arg : this->arguments_) {
if (!seq.count(arg)) {
auto seq_num = seq.size() + seq_cnt_;
if (arg->name_ == "") {
arg->name_ = "arg_" + std::to_string(seq_num);
seq.insert({arg, seq_num});
}
}
}
for (auto bb : basic_blocks_) {
if (!seq.count(bb)) {
auto seq_num = seq.size() + seq_cnt_;
if (bb->name_.length() <= 6 || bb->name_.substr(0, 6) != "label_") {
bb->name_ = "label_" + std::to_string(seq_num);
seq.insert({bb, seq_num});
}
}
for (auto instr : bb->instr_list_) {
if (instr->type_->tid_ != Type::VoidTyID && !seq.count(instr)) {
auto seq_num = seq.size() + seq_cnt_;
if (instr->name_ == "") {
instr->name_ = "v" + std::to_string(seq_num);
seq.insert({instr, seq_num});
}
}
}
}
seq_cnt_ += seq.size();
}

@ -0,0 +1,961 @@
#pragma once
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iostream>
#include <list>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
class Type;
class IntegerType;
class ArrayType;
class PointerType;
class FunctionType;
class Value;
class Constant;
class ConstantInt;
class ConstantFloat;
class ConstantArray;
class ConstantZero;
class Module;
class GlobalVariable;
class Function;
class BasicBlock;
class Argument;
class Instruction;
class BinaryInst;
class UnaryInst;
class ICmpInst;
class FCmpInst;
class CallInst;
class BranchInst;
class ReturnInst;
class GetElementPtrInst;
class StoreInst;
class LoadInst;
class AllocaInst;
struct Use {
Value *val_;
unsigned int arg_no_; // 操作数的序号如func(a,b)中a的序号为0b的序号为1
Use(Value *val, unsigned int no) : val_(val), arg_no_(no) {}
};
class Type {
public:
enum TypeID {
VoidTyID, // Void
LabelTyID, // Labels, e.g., BasicBlock
IntegerTyID, // Integers, include 32 bits and 1 bit
FloatTyID, // Floats, only 32 bits
FunctionTyID, // Functions
ArrayTyID, // Arrays
PointerTyID, // Pointer
};
explicit Type(TypeID tid) : tid_(tid) {}
~Type() = default;
virtual std::string print();
TypeID tid_;
};
class IntegerType : public Type {
public:
explicit IntegerType(unsigned num_bits)
: Type(Type::IntegerTyID), num_bits_(num_bits) {}
unsigned num_bits_;
};
//[2 x [3 x i32]]: num_elements_ = 2, contained_ = [3 x i32]
class ArrayType : public Type {
public:
ArrayType(Type *contained, unsigned num_elements)
: Type(Type::ArrayTyID), num_elements_(num_elements),
contained_(contained) {}
Type *contained_; // The element type of the array.
unsigned num_elements_; // Number of elements in the array.
};
//[2 x [3 x i32]]*
class PointerType : public Type {
public:
PointerType(Type *contained)
: Type(Type::PointerTyID), contained_(contained) {}
Type *contained_; // The element type of the ptr.
};
// declare i32 @putarray(i32, i32*)
class FunctionType : public Type {
public:
FunctionType(Type *result, std::vector<Type *> params)
: Type(Type::FunctionTyID) {
result_ = result;
for (Type *p : params) {
args_.push_back(p);
}
}
Type *result_;
std::vector<Type *> args_;
};
class Value {
public:
explicit Value(Type *ty, const std::string &name = "")
: type_(ty), name_(name) {}
~Value() = default;
virtual std::string print() = 0;
void remove_use(Value *val) {
auto is_val = [val](const Use &use) { return use.val_ == val; };
use_list_.remove_if(is_val);
}
//******************************************************************
std::list<Use>::iterator add_use(Value *val, unsigned arg_no) {
use_list_.emplace_back(Use(val, arg_no));
std::list<Use>::iterator re = use_list_.end();
return --re;
}
// 删除迭代器指出的use
void remove_use(std::list<Use>::iterator it) { use_list_.erase(it); }
// user的第i个操作数准备不再使用this因此删除this与user相关的use联系
bool remove_used(Instruction *user, unsigned int i);
// Return if the value is a constant.
bool is_constant();
//******************************************************************
void replace_all_use_with(Value *new_val);
Type *type_;
std::string name_;
std::list<Use>
use_list_; // 所有引用该Value的Instruction的集合以及该Value在该Instruction的第几个操作数位置被引用
};
// 常量都是无名的(name=="")
class Constant : public Value {
public:
Constant(Type *ty, const std::string &name = "") : Value(ty, name) {}
~Constant() = default;
};
// i32 -23
class ConstantInt : public Constant {
public:
ConstantInt(Type *ty, int val) : Constant(ty, ""), value_(val) {}
virtual std::string print() override;
int value_;
};
// float 0x4057C21FC0000000
// float -3.300000e+04
class ConstantFloat : public Constant {
public:
ConstantFloat(Type *ty, float val) : Constant(ty, ""), value_(val) {}
virtual std::string print() override;
float value_;
std::string print32();
};
//[3 x i32] [i32 42, i32 11, i32 74]
class ConstantArray : public Constant {
public:
ConstantArray(ArrayType *ty, const std::vector<Constant *> &val)
: Constant(ty, "") {
this->const_array.assign(val.begin(), val.end());
}
~ConstantArray() = default;
virtual std::string print() override;
std::vector<Constant *> const_array;
};
// i32 zeroinitializer
//[2 x [100 x float]] zeroinitializer
// 注意zeroinitializer是有类型的
class ConstantZero : public Constant {
public:
ConstantZero(Type *ty) : Constant(ty, "") {}
virtual std::string print() override;
};
class Module {
public:
explicit Module() {
void_ty_ = new Type(Type::VoidTyID);
label_ty_ = new Type(Type::LabelTyID);
int1_ty_ = new IntegerType(1);
int32_ty_ = new IntegerType(32);
float32_ty_ = new Type(Type::FloatTyID);
}
~Module() {
delete void_ty_;
delete label_ty_;
delete int1_ty_;
delete int32_ty_;
delete float32_ty_;
}
virtual std::string print();
void add_global_variable(GlobalVariable *g) { global_list_.push_back(g); }
void add_function(Function *f) { function_list_.push_back(f); }
PointerType *get_pointer_type(Type *contained) {
if (!pointer_map_.count(contained)) {
pointer_map_[contained] = new PointerType(contained);
}
return pointer_map_[contained];
}
ArrayType *get_array_type(Type *contained, unsigned num_elements) {
if (!array_map_.count({contained, num_elements})) {
array_map_[{contained, num_elements}] =
new ArrayType(contained, num_elements);
}
return array_map_[{contained, num_elements}];
}
Function *getMainFunc();
std::vector<GlobalVariable *> global_list_;
std::vector<Function *> function_list_;
IntegerType *int1_ty_;
IntegerType *int32_ty_;
Type *float32_ty_;
Type *label_ty_;
Type *void_ty_;
std::map<Type *, PointerType *> pointer_map_;
std::map<std::pair<Type *, int>, ArrayType *> array_map_;
};
//-----------------------------------------------GlobalVariable-----------------------------------------------
//@c = global [4 x i32] [i32 6, i32 7, i32 8, i32 9]
//@a = constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4]
class GlobalVariable : public Value {
public:
GlobalVariable(std::string name, Module *m, Type *ty, bool is_const,
Constant *init = nullptr)
: Value(m->get_pointer_type(ty), name), is_const_(is_const),
init_val_(init) {
m->add_global_variable(this);
}
virtual std::string print() override;
bool is_const_;
Constant *init_val_;
};
-
//Argument的构造函数只由Function的构造函数调用不单独调用
class Argument : public Value {
public:
explicit Argument(Type *ty, const std::string &name = "",
Function *f = nullptr, unsigned arg_no = 0)
: Value(ty, name), parent_(f), arg_no_(arg_no) {}
~Argument() {}
virtual std::string print() override;
Function *parent_;
unsigned arg_no_; // argument No.
};
class Function : public Value {
public:
Function(FunctionType *ty, const std::string &name, Module *parent)
: Value(ty, name), parent_(parent), seq_cnt_(0) {
parent->add_function(this);
size_t num_args = ty->args_.size();
use_ret_cnt = 0;
for (size_t i = 0; i < num_args; i++) {
arguments_.push_back(new Argument(ty->args_[i], "", this, i));
}
}
~Function();
virtual std::string print() override;
void add_basic_block(BasicBlock *bb) { basic_blocks_.push_back(bb); }
Type *get_return_type() const {
return static_cast<FunctionType *>(type_)->result_;
}
bool is_declaration() { return basic_blocks_.empty(); }
void set_instr_name();
void remove_bb(BasicBlock *bb);
BasicBlock *getRetBB();
std::vector<BasicBlock *> basic_blocks_; // basic blocks
std::vector<Argument *> arguments_; // argument
Module *parent_;
unsigned seq_cnt_;
std::vector<std::set<Value *>> vreg_set_;
int use_ret_cnt; // 程序中真正使用返回值的次数
};
// BasicBlock一定是LabelTyID
class BasicBlock : public Value {
public:
explicit BasicBlock(Module *m, const std::string &name, Function *parent)
: Value(m->label_ty_, name), parent_(parent) {
parent_->add_basic_block(this);
}
bool add_instruction(Instruction *instr); // 尾部插入指令,返回成功与否
bool add_instruction_front(Instruction *instr); // 头部插入指令,返回成功与否
bool add_instruction_before_terminator(
Instruction *instr); // 插入到BB倒数第二条指令即br前
bool add_instruction_before_inst(
Instruction *new_inst,
Instruction *
inst); // 将新指令插入到原来指令前返回成功与否需要保证原指令在bb内
void add_pre_basic_block(BasicBlock *bb) { pre_bbs_.push_back(bb); }
void add_succ_basic_block(BasicBlock *bb) { succ_bbs_.push_back(bb); }
void remove_pre_basic_block(BasicBlock *bb) {
// pre_bbs_.remove(bb);
pre_bbs_.erase(std::remove(pre_bbs_.begin(), pre_bbs_.end(), bb),
pre_bbs_.end());
}
void remove_succ_basic_block(BasicBlock *bb) {
// succ_bbs_.remove(bb);
succ_bbs_.erase(std::remove(succ_bbs_.begin(), succ_bbs_.end(), bb),
succ_bbs_.end());
}
int isDominate(
BasicBlock
*bb2) { // 返回1表示支配bb2返回0表示不支配返回-1输入的块出错
if (!bb2 || this->parent_ != bb2->parent_)
return -1;
while (bb2->name_ != "label_entry") {
if (bb2->idom_ == this)
return 1;
bb2 = bb2->idom_;
}
return 0;
}
// Returns the terminator instruction if the block is well formed or null
// if the block is not well formed.
Instruction *get_terminator();
bool delete_instr(
Instruction *instr); // 返回false则说明指令不能重复删除或者不属于这个bb
bool remove_instr(
Instruction *
instr); // 从bb移出一个指令但是不删指令的use关系因为还要插入其他bb
virtual std::string print() override;
//********************使用list替换vector---------
std::list<Instruction *> instr_list_;
//********************使用list替换vector---------
Function *parent_;
/****************api about cfg****************/
std::vector<BasicBlock *> pre_bbs_;
std::vector<BasicBlock *> succ_bbs_;
/****************api about dominate tree****************/
std::set<BasicBlock *> dom_frontier_;
std::set<BasicBlock *> rdom_frontier_;
std::set<BasicBlock *> rdoms_;
BasicBlock *idom_;
std::set<Value *> live_in;
std::set<Value *> live_out;
};
class Instruction : public Value {
public:
enum OpID {
// Terminator Instructions
Ret = 11,
Br,
// Standard unary operators
FNeg,
// Standard binary operators
Add,
Sub,
Mul,
SDiv,
SRem,
UDiv,
URem,
// Float binary opeartors
FAdd,
FSub,
FMul,
FDiv,
// Logical operators
Shl,
LShr,
AShr,
And,
Or,
Xor,
// Memory operators
Alloca,
Load,
Store,
GetElementPtr,
// Cast operators
ZExt,
FPtoSI,
SItoFP,
BitCast,
// Other operators
ICmp,
FCmp,
PHI,
Call,
};
// 创建指令并插入基本块ty是指令返回值类型
// If before set to true, then use add_instruction_front() instead of
// add_instruction()
Instruction(Type *ty, OpID id, unsigned num_ops, BasicBlock *parent,
bool before = false)
: Value(ty, ""), op_id_(id), num_ops_(num_ops), parent_(parent) {
operands_.resize(
num_ops_,
nullptr); // 此句不能删去否则operands_为空时无法用set_operand设置操作数而只能用push_back设置操作数
use_pos_.resize(num_ops_);
if (!before)
parent_->add_instruction(this);
else
parent_->add_instruction_front(this);
}
// 仅创建指令不插入基本块ty是指令返回值类型
Instruction(Type *ty, OpID id, unsigned num_ops)
: Value(ty, ""), op_id_(id), num_ops_(num_ops), parent_(nullptr) {
operands_.resize(num_ops_, nullptr);
use_pos_.resize(num_ops_);
}
Value *get_operand(unsigned i) const { return operands_[i]; }
//***************************
void set_operand(unsigned i, Value *v) {
operands_[i] = v;
use_pos_[i] = v->add_use(this, i);
}
void add_operand(Value *v) { // 添加指令操作数用于phi指令
operands_.push_back(v);
use_pos_.emplace_back(v->add_use(this, num_ops_));
num_ops_++;
}
void
remove_use_of_ops() { // 删除此指令所有操作数的uselist中与此指令相关的use
for (int i = 0; i < operands_.size(); i++) {
operands_[i]->remove_use(use_pos_[i]);
}
}
// 删除phi指令中的一对操作数
void remove_operands(int index1, int index2) {
for (int i = index1; i <= index2; i++) {
operands_[i]->remove_use(use_pos_[i]);
}
// 后面操作数的位置要做相应修改
for (int i = index2 + 1; i < operands_.size(); i++) {
for (auto &use : operands_[i]->use_list_) {
if (use.val_ == this) {
use.arg_no_ -= index2 - index1 + 1;
break;
}
}
}
operands_.erase(operands_.begin() + index1, operands_.begin() + index2 + 1);
use_pos_.erase(use_pos_.begin() + index1, use_pos_.begin() + index2 + 1);
// std::cout<<operands_.size()<<std::endl;
num_ops_ = operands_.size();
}
// --------增加快速类型判断------------
bool is_void() {
return ((op_id_ == Ret) || (op_id_ == Br) || (op_id_ == Store) ||
(op_id_ == Call && this->type_->tid_ == Type::VoidTyID));
}
bool is_phi() { return op_id_ == PHI; }
bool is_store() { return op_id_ == Store; }
bool is_alloca() { return op_id_ == Alloca; }
bool is_ret() { return op_id_ == Ret; }
bool is_load() { return op_id_ == Load; }
bool is_br() { return op_id_ == Br; }
bool is_add() { return op_id_ == Add; }
bool is_sub() { return op_id_ == Sub; }
bool is_mul() { return op_id_ == Mul; }
bool is_div() { return op_id_ == SDiv; }
bool is_rem() { return op_id_ == SRem; }
bool is_fadd() { return op_id_ == FAdd; }
bool is_fsub() { return op_id_ == FSub; }
bool is_fmul() { return op_id_ == FMul; }
bool is_fdiv() { return op_id_ == FDiv; }
bool is_cmp() { return op_id_ == ICmp; }
bool is_fcmp() { return op_id_ == FCmp; }
bool is_call() { return op_id_ == Call; }
bool is_gep() { return op_id_ == GetElementPtr; }
bool is_zext() { return op_id_ == ZExt; }
bool is_fptosi() { return op_id_ == FPtoSI; }
bool is_sitofp() { return op_id_ == SItoFP; }
bool is_int_binary() {
return (is_add() || is_sub() || is_mul() || is_div() || is_rem()) &&
(num_ops_ == 2);
}
bool is_float_binary() {
return (is_fadd() || is_fsub() || is_fmul() || is_fdiv()) &&
(num_ops_ == 2);
}
bool is_binary() { return is_int_binary() || is_float_binary(); }
bool isTerminator() { return is_br() || is_ret(); }
virtual std::string print() = 0;
BasicBlock *parent_;
OpID op_id_;
unsigned num_ops_;
std::vector<Value *> operands_; // operands of this value
std::vector<std::list<Use>::iterator>
use_pos_; // 与操作数数组一一对应是对应的操作数的uselist里面与当前指令相关的use的迭代器
std::vector<std::list<Instruction *>::iterator>
pos_in_bb; // 在bb的指令list的位置迭代器,最多只能有一个
};
class BinaryInst : public Instruction {
public:
BinaryInst(Type *ty, OpID op, Value *v1, Value *v2, BasicBlock *bb)
: Instruction(ty, op, 2, bb) {
set_operand(0, v1);
set_operand(1, v2);
}
// 只创建,不加入基本块末尾
BinaryInst(Type *ty, OpID op, Value *v1, Value *v2, BasicBlock *bb, bool flag)
: Instruction(ty, op, 2) {
set_operand(0, v1);
set_operand(1, v2);
this->parent_ = bb;
}
virtual std::string print() override;
};
class UnaryInst : public Instruction {
public:
UnaryInst(Type *ty, OpID op, Value *val, BasicBlock *bb)
: Instruction(ty, op, 1, bb) {
set_operand(0, val);
}
virtual std::string print() override;
};
//%18 = icmp ne i32 %12, %17
class ICmpInst : public Instruction {
public:
enum ICmpOp {
ICMP_EQ = 32, ///< equal
ICMP_NE = 33, ///< not equal
ICMP_UGT = 34, ///< unsigned greater than
ICMP_UGE = 35, ///< unsigned greater or equal
ICMP_ULT = 36, ///< unsigned less than
ICMP_ULE = 37, ///< unsigned less or equal
ICMP_SGT = 38, ///< signed greater than
ICMP_SGE = 39, ///< signed greater or equal
ICMP_SLT = 40, ///< signed less than
ICMP_SLE = 41 ///< signed less or equal
};
static const std::map<ICmpInst::ICmpOp, std::string> ICmpOpName;
ICmpInst(ICmpOp op, Value *v1, Value *v2, BasicBlock *bb)
: Instruction(bb->parent_->parent_->int1_ty_, Instruction::ICmp, 2, bb),
icmp_op_(op) {
set_operand(0, v1);
set_operand(1, v2);
}
virtual std::string print() override;
ICmpOp icmp_op_;
};
//%5 = fcmp olt float %4, 0.000000e+00
class FCmpInst : public Instruction {
public:
enum FCmpOp {
FCMP_FALSE = 10, // Always false (always folded)
FCMP_OEQ = 11, // True if ordered and equal
FCMP_OGT = 12, // True if ordered and greater than
FCMP_OGE = 13, // True if ordered and greater than or equal
FCMP_OLT = 14, // True if ordered and less than
FCMP_OLE = 15, // True if ordered and less than or equal
FCMP_ONE = 16, // True if ordered and operands are unequal
FCMP_ORD = 17, // True if ordered (no nans)
FCMP_UNO = 18, // True if unordered: isnan(X) | isnan(Y)
FCMP_UEQ = 19, // True if unordered or equal
FCMP_UGT = 20, // True if unordered or greater than
FCMP_UGE = 21, // True if unordered, greater than, or equal
FCMP_ULT = 22, // True if unordered or less than
FCMP_ULE = 23, // True if unordered, less than, or equal
FCMP_UNE = 24, // True if unordered or not equal
FCMP_TRUE = 25 // Always true (always folded)
};
static const std::map<FCmpInst::FCmpOp, std::string> FCmpOpName;
FCmpInst(FCmpOp op, Value *v1, Value *v2, BasicBlock *bb)
: Instruction(bb->parent_->parent_->int1_ty_, Instruction::FCmp, 2, bb),
fcmp_op_(op) {
set_operand(0, v1);
set_operand(1, v2);
}
virtual std::string print() override;
FCmpOp fcmp_op_;
};
//%111 = call i32 @QuickSort(i32* %108, i32 %109, i32 %110)
class CallInst : public Instruction {
public:
CallInst(Function *func, std::vector<Value *> args, BasicBlock *bb)
: Instruction(static_cast<FunctionType *>(func->type_)->result_,
Instruction::Call, args.size() + 1, bb) {
int num_ops = args.size() + 1;
for (int i = 0; i < num_ops - 1; i++) {
set_operand(i, args[i]);
}
set_operand(num_ops - 1, func);
}
virtual std::string print() override;
};
// br的返回值类型一定是VoidTyID
class BranchInst : public Instruction {
public:
BranchInst(Value *cond, BasicBlock *if_true, BasicBlock *if_false,
BasicBlock *bb)
: Instruction(if_true->parent_->parent_->void_ty_, Instruction::Br, 3,
bb) {
if_true->add_pre_basic_block(bb);
if_false->add_pre_basic_block(bb);
bb->add_succ_basic_block(if_false);
bb->add_succ_basic_block(if_true);
set_operand(0, cond);
set_operand(1, if_true);
set_operand(2, if_false);
}
// br label %31
BranchInst(BasicBlock *if_true, BasicBlock *bb)
: Instruction(if_true->parent_->parent_->void_ty_, Instruction::Br, 1,
bb) {
if_true->add_pre_basic_block(bb);
bb->add_succ_basic_block(if_true);
set_operand(0, if_true);
}
virtual std::string print() override;
};
// ret的返回值类型一定是VoidTyID
class ReturnInst : public Instruction {
public:
ReturnInst(Value *val, BasicBlock *bb)
: Instruction(bb->parent_->parent_->void_ty_, Instruction::Ret, 1, bb) {
set_operand(0, val);
}
ReturnInst(Value *val, BasicBlock *bb, bool flag)
: Instruction(bb->parent_->parent_->void_ty_, Instruction::Ret, 1) {
set_operand(0, val);
this->parent_ = bb;
}
ReturnInst(BasicBlock *bb)
: Instruction(bb->parent_->parent_->void_ty_, Instruction::Ret, 0, bb) {}
virtual std::string print() override;
};
//%1 = getelementptr [5 x [4 x i32]], [5 x [4 x i32]]* @a, i32 0, i32 2, i32 3
class GetElementPtrInst : public Instruction {
public:
GetElementPtrInst(Value *ptr, std::vector<Value *> idxs, BasicBlock *bb)
: Instruction(bb->parent_->parent_->get_pointer_type(
get_GEP_return_type(ptr, idxs.size())),
Instruction::GetElementPtr, idxs.size() + 1, bb) {
set_operand(0, ptr);
for (size_t i = 0; i < idxs.size(); i++) {
set_operand(i + 1, idxs[i]);
}
}
Type *get_GEP_return_type(Value *ptr, size_t idxs_size) {
Type *ty =
static_cast<PointerType *>(ptr->type_)->contained_; //[5 x [4 x i32]]
if (ty->tid_ == Type::ArrayTyID) {
ArrayType *arr_ty = static_cast<ArrayType *>(ty);
for (size_t i = 1; i < idxs_size; i++) {
ty = arr_ty->contained_; //[4 x i32], i32
if (ty->tid_ == Type::ArrayTyID) {
arr_ty = static_cast<ArrayType *>(ty);
}
}
}
return ty;
}
virtual std::string print() override;
};
// store的返回值类型一定是VoidTyID
class StoreInst : public Instruction {
public:
StoreInst(Value *val, Value *ptr, BasicBlock *bb)
: Instruction(bb->parent_->parent_->void_ty_, Instruction::Store, 2, bb) {
assert(val->type_ == static_cast<PointerType *>(ptr->type_)->contained_);
set_operand(0, val);
set_operand(1, ptr);
}
// 创建store指令不插入到基本块中但是设定parent
StoreInst(Value *val, Value *ptr, BasicBlock *bb, bool)
: Instruction(bb->parent_->parent_->void_ty_, Instruction::Store, 2) {
assert(val->type_ == static_cast<PointerType *>(ptr->type_)->contained_);
set_operand(0, val);
set_operand(1, ptr);
this->parent_ = bb;
}
virtual std::string print() override;
};
//<result> = load <ty>, <ty>* <pointer>
class LoadInst : public Instruction {
public:
LoadInst(Value *ptr, BasicBlock *bb)
: Instruction(static_cast<PointerType *>(ptr->type_)->contained_,
Instruction::Load, 1, bb) {
set_operand(0, ptr);
}
virtual std::string print() override;
};
//%8 = alloca i32
class AllocaInst : public Instruction {
public:
AllocaInst(Type *ty, BasicBlock *bb)
: Instruction(bb->parent_->parent_->get_pointer_type(ty),
Instruction::Alloca, 0, bb, true),
alloca_ty_(ty) {}
// 创建指令不插入到最后但是会设定parent
AllocaInst(Type *ty, BasicBlock *bb, bool)
: Instruction(bb->parent_->parent_->get_pointer_type(ty),
Instruction::Alloca, 0),
alloca_ty_(ty) {
this->parent_ = bb;
}
virtual std::string print() override;
Type *alloca_ty_;
};
class ZextInst : public Instruction {
public:
ZextInst(OpID op, Value *val, Type *ty, BasicBlock *bb)
: Instruction(ty, op, 1, bb), dest_ty_(ty) {
set_operand(0, val);
}
virtual std::string print() override;
Type *dest_ty_;
};
class FpToSiInst : public Instruction {
public:
FpToSiInst(OpID op, Value *val, Type *ty, BasicBlock *bb)
: Instruction(ty, op, 1, bb), dest_ty_(ty) {
set_operand(0, val);
}
virtual std::string print() override;
Type *dest_ty_;
};
class SiToFpInst : public Instruction {
public:
SiToFpInst(OpID op, Value *val, Type *ty, BasicBlock *bb)
: Instruction(ty, op, 1, bb), dest_ty_(ty) {
set_operand(0, val);
}
virtual std::string print() override;
Type *dest_ty_;
};
//%3 = bitcast [4 x [2 x i32]]* %2 to i32*
class Bitcast : public Instruction {
public:
Bitcast(OpID op, Value *val, Type *ty, BasicBlock *bb)
: Instruction(ty, op, 1, bb), dest_ty_(ty) {
set_operand(0, val);
}
virtual std::string print() override;
Type *dest_ty_;
};
//%4 = phi i32 [ 1, %2 ], [ %6, %5 ]
class PhiInst : public Instruction {
public:
PhiInst(OpID op, std::vector<Value *> vals, std::vector<BasicBlock *> val_bbs,
Type *ty, BasicBlock *bb)
: Instruction(ty, op, 2 * vals.size()) {
for (int i = 0; i < vals.size(); i++) {
set_operand(2 * i, vals[i]);
set_operand(2 * i + 1, val_bbs[i]);
}
this->parent_ = bb;
}
static PhiInst *create_phi(Type *ty, BasicBlock *bb) {
std::vector<Value *> vals;
std::vector<BasicBlock *> val_bbs;
return new PhiInst(Instruction::PHI, vals, val_bbs, ty, bb);
}
void add_phi_pair_operand(Value *val, Value *pre_bb) {
this->add_operand(val);
this->add_operand(pre_bb);
}
virtual std::string print() override;
Value *l_val_;
};
class IRStmtBuilder {
public:
BasicBlock *BB_;
Module *m_;
IRStmtBuilder(BasicBlock *bb, Module *m) : BB_(bb), m_(m){};
~IRStmtBuilder() = default;
Module *get_module() { return m_; }
BasicBlock *get_insert_block() { return this->BB_; }
void set_insert_point(BasicBlock *bb) {
this->BB_ = bb;
} // 在某个基本块中插入指令
BinaryInst *create_iadd(Value *v1, Value *v2) {
return new BinaryInst(this->m_->int32_ty_, Instruction::Add, v1, v2,
this->BB_);
} // 创建加法指令(以及其他算术指令)
BinaryInst *create_isub(Value *v1, Value *v2) {
return new BinaryInst(this->m_->int32_ty_, Instruction::Sub, v1, v2,
this->BB_);
}
BinaryInst *create_imul(Value *v1, Value *v2) {
return new BinaryInst(this->m_->int32_ty_, Instruction::Mul, v1, v2,
this->BB_);
}
BinaryInst *create_isdiv(Value *v1, Value *v2) {
return new BinaryInst(this->m_->int32_ty_, Instruction::SDiv, v1, v2,
this->BB_);
}
BinaryInst *create_isrem(Value *v1, Value *v2) {
return new BinaryInst(this->m_->int32_ty_, Instruction::SRem, v1, v2,
this->BB_);
}
ICmpInst *create_icmp_eq(Value *v1, Value *v2) {
return new ICmpInst(ICmpInst::ICMP_EQ, v1, v2, this->BB_);
}
ICmpInst *create_icmp_ne(Value *v1, Value *v2) {
return new ICmpInst(ICmpInst::ICMP_NE, v1, v2, this->BB_);
}
ICmpInst *create_icmp_gt(Value *v1, Value *v2) {
return new ICmpInst(ICmpInst::ICMP_SGT, v1, v2, this->BB_);
}
ICmpInst *create_icmp_ge(Value *v1, Value *v2) {
return new ICmpInst(ICmpInst::ICMP_SGE, v1, v2, this->BB_);
}
ICmpInst *create_icmp_lt(Value *v1, Value *v2) {
return new ICmpInst(ICmpInst::ICMP_SLT, v1, v2, this->BB_);
}
ICmpInst *create_icmp_le(Value *v1, Value *v2) {
return new ICmpInst(ICmpInst::ICMP_SLE, v1, v2, this->BB_);
}
BinaryInst *create_fadd(Value *v1, Value *v2) {
return new BinaryInst(this->m_->float32_ty_, Instruction::FAdd, v1, v2,
this->BB_);
}
BinaryInst *create_fsub(Value *v1, Value *v2) {
return new BinaryInst(this->m_->float32_ty_, Instruction::FSub, v1, v2,
this->BB_);
}
BinaryInst *create_fmul(Value *v1, Value *v2) {
return new BinaryInst(this->m_->float32_ty_, Instruction::FMul, v1, v2,
this->BB_);
}
BinaryInst *create_fdiv(Value *v1, Value *v2) {
return new BinaryInst(this->m_->float32_ty_, Instruction::FDiv, v1, v2,
this->BB_);
}
FCmpInst *create_fcmp_eq(Value *v1, Value *v2) {
return new FCmpInst(FCmpInst::FCMP_UEQ, v1, v2, this->BB_);
}
FCmpInst *create_fcmp_ne(Value *v1, Value *v2) {
return new FCmpInst(FCmpInst::FCMP_UNE, v1, v2, this->BB_);
}
FCmpInst *create_fcmp_gt(Value *v1, Value *v2) {
return new FCmpInst(FCmpInst::FCMP_UGT, v1, v2, this->BB_);
}
FCmpInst *create_fcmp_ge(Value *v1, Value *v2) {
return new FCmpInst(FCmpInst::FCMP_UGE, v1, v2, this->BB_);
}
FCmpInst *create_fcmp_lt(Value *v1, Value *v2) {
return new FCmpInst(FCmpInst::FCMP_ULT, v1, v2, this->BB_);
}
FCmpInst *create_fcmp_le(Value *v1, Value *v2) {
return new FCmpInst(FCmpInst::FCMP_ULE, v1, v2, this->BB_);
}
CallInst *create_call(Value *func, std::vector<Value *> args) {
#ifdef DEBUG
assert(dynamic_cast<Function *>(func) && "func must be Function * type");
#endif
return new CallInst(static_cast<Function *>(func), args, this->BB_);
}
BranchInst *create_br(BasicBlock *if_true) {
return new BranchInst(if_true, this->BB_);
}
BranchInst *create_cond_br(Value *cond, BasicBlock *if_true,
BasicBlock *if_false) {
return new BranchInst(cond, if_true, if_false, this->BB_);
}
ReturnInst *create_ret(Value *val) { return new ReturnInst(val, this->BB_); }
ReturnInst *create_void_ret() { return new ReturnInst(this->BB_); }
GetElementPtrInst *create_gep(Value *ptr, std::vector<Value *> idxs) {
return new GetElementPtrInst(ptr, idxs, this->BB_);
}
StoreInst *create_store(Value *val, Value *ptr) {
return new StoreInst(val, ptr, this->BB_);
}
LoadInst *create_load(Type *ty, Value *ptr) {
return new LoadInst(ptr, this->BB_);
}
LoadInst *create_load(Value *ptr) {
#ifdef DEBUG
assert(ptr->get_type()->is_pointer_type() && "ptr must be pointer type");
#endif
return new LoadInst(ptr, this->BB_);
}
AllocaInst *create_alloca(Type *ty) { return new AllocaInst(ty, this->BB_); }
ZextInst *create_zext(Value *val, Type *ty) {
return new ZextInst(Instruction::ZExt, val, ty, this->BB_);
}
FpToSiInst *create_fptosi(Value *val, Type *ty) {
return new FpToSiInst(Instruction::FPtoSI, val, ty, this->BB_);
}
SiToFpInst *create_sitofp(Value *val, Type *ty) {
return new SiToFpInst(Instruction::SItoFP, val, ty, this->BB_);
}
Bitcast *create_bitcast(Value *val, Type *ty) {
return new Bitcast(Instruction::BitCast, val, ty, this->BB_);
}
};

@ -0,0 +1,105 @@
#include "CombineInstr.h"
#include "ConstSpread.h"
#include "LoopInvariant.h"
#include "SimplifyJump.h"
#include "ast.h"
#include "backend.h"
#include "define.h"
#include "genIR.h"
#include "DeleteDeadCode.h"
#include "opt.h"
#include <fstream>
#include <iostream>
#include <ostream>
#include <unistd.h>
extern unique_ptr<CompUnitAST> root;
extern int yyparse();
extern FILE *yyin;
int main(int argc, char **argv) {
// Assert the number of arguments
assert(argc >= 2);
char *filename = nullptr;
int print_ir = false;
int print_asm = false;
std::string output = "-";
int opt;
bool isO2 = false;
while ((opt = getopt(argc, argv, "Sco:O::")) != -1) {
switch (opt) {
case 'S':
print_asm = true;
print_ir = false;
break;
case 'c':
print_ir = true;
print_asm = false;
break;
case 'o':
output = optarg;
break;
case 'O':
isO2 = true;
break;
default:
break;
}
}
filename = argv[optind];
yyin = fopen(filename, "r");
if (yyin == nullptr) {
std::cout << "yyin open" << filename << "failed" << std::endl;
return -1;
}
// Frontend parser
yyparse();
// Generate IR from AST
GenIR genIR;
root->accept(genIR);
std::unique_ptr<Module> m = genIR.getModule();
// Run IR optimization
if (isO2) {
std::vector<Optimization *> Opt;
Opt.push_back(new DeadCodeDeletion(m.get()));
Opt.push_back(new ConstSpread(m.get()));
Opt.push_back(new CombineInstr(m.get()));
Opt.push_back(new DomainTree(m.get()));
Opt.push_back(new SimplifyJump(m.get()));
Opt.push_back(new LoopInvariant(m.get()));
Opt.push_back(new SimplifyJump(m.get()));
for (auto x : Opt)
x->execute();
}
// Open output file
std::ofstream fout;
std::ostream *out;
if (output == "-") {
out = &std::cout;
} else {
fout.open(output);
out = &fout;
}
// Print IR result
const std::string IR = m->print();
if (print_ir) {
*out << IR << std::endl;
}
// Generate assembly file
if (print_asm) {
auto builder = new RiscvBuilder();
const std::string RiscvCode = builder->buildRISCV(m.get());
*out << RiscvCode << std::endl;
}
return 0;
}

@ -0,0 +1,54 @@
#include "BasicOperation.h"
void deleteUse(Value *opnd, Instruction *inst) {
for (auto it = opnd->use_list_.begin(); it != opnd->use_list_.end(); ++it)
if (it->val_ == inst) {
opnd->use_list_.erase(it);
return;
}
}
void SolvePhi(BasicBlock *bb, BasicBlock *suc) {
std::vector<Instruction *> uselessPhi;
for (auto instr : suc->instr_list_) {
if (instr->op_id_ == Instruction::PHI) {
for (int i = 1; i < instr->num_ops_; i = i + 2)
if (instr->get_operand(i) == bb) {
instr->remove_operands(i - 1, i);
break;
}
if (instr->parent_->pre_bbs_.size() == 1) {
Value *only = instr->get_operand(0);
instr->replace_all_use_with(only);
uselessPhi.push_back(instr);
}
}
}
for (auto instr : uselessPhi)
suc->delete_instr(instr);
}
void dfsGraph(BasicBlock *bb, std::set<BasicBlock *> &vis) {
if (!bb)
return;
vis.insert(bb);
for (auto suc : bb->succ_bbs_) {
if (vis.find(suc) == vis.end())
dfsGraph(suc, vis);
}
}
void DeleteUnusedBB(Function *func) {
std::set<BasicBlock *> vis;
for (auto bb : func->basic_blocks_)
if (bb->name_ == "label_entry") {
dfsGraph(bb, vis);
break;
}
for (auto bb : func->basic_blocks_)
if (vis.find(bb) == vis.end()) {
bb->parent_->remove_bb(bb);
for (auto suc : bb->succ_bbs_)
SolvePhi(bb, suc);
}
}

@ -0,0 +1,15 @@
#ifndef BASICOPERATION
#define BASICOPERATION
#include "../ir/ir.h"
#include <vector>
#include <map>
#include <set>
#include <stack>
#include "opt.h"
void deleteUse(Value* opnd,Instruction *inst);
void dfsGraph(BasicBlock *bb, std::set<BasicBlock *> &vis);
void SolvePhi(BasicBlock *bb, BasicBlock *succ_bb);
void DeleteUnusedBB(Function *func);
#endif // !BASICOPERATION

@ -0,0 +1,6 @@
set(SOURCE_FILES ConstSpread.cpp BasicOperation.cpp LoopInvariant.cpp CombineInstr.cpp SimplifyJump.cpp opt.cpp DeleteDeadCode.cpp)
add_library(opt ${SOURCE_FILES})
target_link_libraries(opt PRIVATE ir)
target_include_directories(opt PRIVATE ${CMAKE_SOURCE_DIR}/src/ir)

@ -0,0 +1,95 @@
#include "CombineInstr.h"
#include <unordered_map>
void CombineInstr::execute() {
for (auto foo : m->function_list_)
if (!foo->basic_blocks_.empty())
for (BasicBlock *bb : foo->basic_blocks_)
checkBlock(bb);
}
void CombineInstr::checkBlock(BasicBlock *bb) {
bool change = true;
while (change) {
change = false;
for (auto instr : bb->instr_list_) {
if (instr->op_id_ != Instruction::Add &&
instr->op_id_ != Instruction::Sub)
continue;
if (instr->use_list_.size() != 1 || instr->use_list_.back().arg_no_ != 0)
continue;
Instruction *nextInstr =
dynamic_cast<Instruction *>(instr->use_list_.back().val_);
if (nextInstr == nullptr || instr->op_id_ != nextInstr->op_id_ ||
instr->parent_ != nextInstr->parent_)
continue;
std::unordered_map<Value *, unsigned int> Optime;
Instruction *StartInstr = instr, *EndInstr = nullptr;
Instruction *invalidStart = nullptr, *invalidEnd = nullptr; // 无效指令
bool isRepeat = false;
Value *dupOp = nullptr, *Candi0 = instr->get_operand(0),
*Candi1 = instr->get_operand(1);
Optime[Candi0]++;
Optime[Candi1]++;
Optime[nextInstr->get_operand(1)]++;
// 迭代过程
instr = nextInstr;
while (instr->use_list_.size() == 1 &&
instr->use_list_.back().arg_no_ == 0) {
nextInstr = dynamic_cast<Instruction *>(instr->use_list_.back().val_);
if (nextInstr == nullptr || instr->op_id_ != nextInstr->op_id_ ||
instr->parent_ != nextInstr->parent_)
break;
if (!isRepeat) {
if (Optime.find(nextInstr->get_operand(1)) != Optime.end())
isRepeat = true;
EndInstr = instr;
invalidStart = nextInstr;
} else if (Optime.find(nextInstr->get_operand(1)) == Optime.end())
break;
Optime[nextInstr->get_operand(1)]++;
instr = nextInstr;
}
invalidEnd = instr;
// 尝试合并1相同很多00相同很多1都可以合并
unsigned int dupTime = 0;
if (Optime[Candi1] == 1 && Optime[Candi0] > 1) {
dupOp = Candi0;
dupTime = Optime[Candi0];
} else if (Optime[Candi0] == 1 && Optime[Candi1] > 1) {
dupOp = Candi1;
dupTime = Optime[Candi1];
} else
continue;
for (auto p : Optime) {
if (p.second == 1) {
if (p.first != dupOp) {
dupTime = 0;
break;
}
} else {
if (dupTime != p.second) {
dupTime = 0;
break;
}
}
}
if (!dupTime)
continue;
ConstantInt *dupTimeConst =
new ConstantInt(instr->parent_->parent_->parent_->int32_ty_, dupTime);
Instruction *toMulInst = new BinaryInst(
bb->parent_->parent_->int32_ty_, Instruction::Mul,
static_cast<Value *>(EndInstr), dupTimeConst, bb, true);
toMulInst->name_ = invalidStart->name_;
bb->add_instruction_before_inst(toMulInst, invalidStart);
invalidEnd->replace_all_use_with(toMulInst);
for (Instruction *ins = invalidStart; ins != nextInstr;) {
bb->delete_instr(ins);
ins = dynamic_cast<Instruction *>(ins->use_list_.back().val_);
}
change = true;
break;
}
}
}

@ -0,0 +1,13 @@
#ifndef COMBINEINSTRH
#define COMBINEINSTRH
#include "opt.h"
class CombineInstr : public Optimization {
public:
CombineInstr(Module *m) : Optimization(m) {}
void execute();
void checkBlock(BasicBlock *bb);
};
#endif // !COMBINEINSTRH

@ -0,0 +1,354 @@
#include "ConstSpread.h"
ConstantInt *ConstSpread::CalcInt(Instruction::OpID op, ConstantInt *v1,
ConstantInt *v2) {
int a = v1->value_, b = v2->value_;
switch (op) {
case Instruction::Add:
return new ConstantInt(m->int32_ty_, a + b);
case Instruction::Sub:
return new ConstantInt(m->int32_ty_, a - b);
case Instruction::Mul:
return new ConstantInt(m->int32_ty_, a * b);
case Instruction::SDiv:
return new ConstantInt(m->int32_ty_, a / b);
case Instruction::SRem:
return new ConstantInt(m->int32_ty_, a % b);
case Instruction::Shl:
return new ConstantInt(m->int32_ty_, a << b);
case Instruction::LShr:
return new ConstantInt(m->int32_ty_, (unsigned)a >> b);
case Instruction::AShr:
return new ConstantInt(m->int32_ty_, a >> b);
case Instruction::And:
return new ConstantInt(m->int32_ty_, a & b);
case Instruction::Or:
return new ConstantInt(m->int32_ty_, a | b);
case Instruction::Xor:
return new ConstantInt(m->int32_ty_, a ^ b);
default:
return nullptr;
}
}
ConstantFloat *ConstSpread::CalcFloat(Instruction::OpID op, ConstantFloat *v1,
ConstantFloat *v2) {
float a = v1->value_, b = v2->value_;
switch (op) {
case Instruction::FAdd:
return new ConstantFloat(m->float32_ty_, a + b);
case Instruction::FSub:
return new ConstantFloat(m->float32_ty_, a - b);
case Instruction::FMul:
return new ConstantFloat(m->float32_ty_, a * b);
case Instruction::FDiv:
return new ConstantFloat(m->float32_ty_, a / b);
default:
return nullptr;
}
}
ConstantInt *ConstSpread::CalcICMP(ICmpInst::ICmpOp op, ConstantInt *v1,
ConstantInt *v2) {
int lhs = v1->value_;
int rhs = v2->value_;
switch (op) {
case ICmpInst::ICMP_EQ:
return new ConstantInt(m->int1_ty_, lhs == rhs);
case ICmpInst::ICMP_NE:
return new ConstantInt(m->int1_ty_, lhs != rhs);
case ICmpInst::ICMP_SGT:
return new ConstantInt(m->int1_ty_, lhs > rhs);
case ICmpInst::ICMP_SGE:
return new ConstantInt(m->int1_ty_, lhs >= rhs);
case ICmpInst::ICMP_SLE:
return new ConstantInt(m->int1_ty_, lhs <= rhs);
case ICmpInst::ICMP_SLT:
return new ConstantInt(m->int1_ty_, lhs < rhs);
case ICmpInst::ICMP_UGE:
return new ConstantInt(m->int1_ty_, (unsigned)lhs >= (unsigned)rhs);
case ICmpInst::ICMP_ULE:
return new ConstantInt(m->int1_ty_, (unsigned)lhs <= (unsigned)rhs);
case ICmpInst::ICMP_ULT:
return new ConstantInt(m->int1_ty_, (unsigned)lhs < (unsigned)rhs);
case ICmpInst::ICMP_UGT:
return new ConstantInt(m->int1_ty_, (unsigned)lhs > (unsigned)rhs);
default:
return nullptr;
}
}
ConstantInt *ConstSpread::CalcFCMP(FCmpInst::FCmpOp op, ConstantFloat *v1,
ConstantFloat *v2) {
float lhs = v1->value_;
float rhs = v2->value_;
switch (op) {
case FCmpInst::FCMP_UEQ:
return new ConstantInt(m->int1_ty_, lhs == rhs);
case FCmpInst::FCMP_UNE:
return new ConstantInt(m->int1_ty_, lhs != rhs);
case FCmpInst::FCMP_UGT:
return new ConstantInt(m->int1_ty_, lhs > rhs);
case FCmpInst::FCMP_UGE:
return new ConstantInt(m->int1_ty_, lhs >= rhs);
case FCmpInst::FCMP_ULE:
return new ConstantInt(m->int1_ty_, lhs <= rhs);
case FCmpInst::FCMP_ULT:
return new ConstantInt(m->int1_ty_, lhs < rhs);
case FCmpInst::FCMP_FALSE:
return new ConstantInt(m->int1_ty_, 0);
case FCmpInst::FCMP_TRUE:
return new ConstantInt(m->int1_ty_, 1);
case FCmpInst::FCMP_OEQ:
return new ConstantInt(m->int1_ty_, lhs == rhs);
case FCmpInst::FCMP_ONE:
return new ConstantInt(m->int1_ty_, lhs != rhs);
case FCmpInst::FCMP_OGE:
return new ConstantInt(m->int1_ty_, lhs >= rhs);
case FCmpInst::FCMP_OGT:
return new ConstantInt(m->int1_ty_, lhs > rhs);
case FCmpInst::FCMP_OLE:
return new ConstantInt(m->int1_ty_, lhs <= rhs);
case FCmpInst::FCMP_OLT:
return new ConstantInt(m->int1_ty_, lhs < rhs);
default:
return nullptr;
}
}
void ConstSpread::execute() {
assert(m != nullptr);
for (Function *foo : m->function_list_) {
if (foo->basic_blocks_.size()) {
bool change = true;
while (change) {
change = false;
change |= SpreadingConst(foo);
change |= BranchProcess(foo);
DeleteUnusedBB(foo);
}
}
}
}
bool ConstSpread::SpreadingConst(Function *func) {
uselessInstr.clear();
for (auto bb : func->basic_blocks_) {
ConstIntMap.clear();
ConstFloatMap.clear();
for (auto instr : bb->instr_list_) {
ConstantInt *testConstInta = nullptr, *testConstIntb = nullptr;
ConstantFloat *testConstFloata = nullptr, *testConstFloatb = nullptr;
switch (instr->op_id_) {
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
case Instruction::SDiv:
case Instruction::UDiv:
case Instruction::SRem:
case Instruction::URem:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
case Instruction::Shl:
case Instruction::AShr:
case Instruction::LShr:
testConstInta = dynamic_cast<ConstantInt *>(instr->get_operand(0));
testConstIntb = dynamic_cast<ConstantInt *>(instr->get_operand(1));
if (testConstInta && testConstIntb) {
auto intRes =
this->CalcInt(instr->op_id_, testConstInta, testConstIntb);
if (intRes) {
instr->replace_all_use_with(intRes);
uselessInstr[instr] = bb;
}
}
break;
case Instruction::ICmp:
testConstInta = dynamic_cast<ConstantInt *>(instr->get_operand(0));
testConstIntb = dynamic_cast<ConstantInt *>(instr->get_operand(1));
if (testConstInta && testConstIntb) {
auto res = this->CalcICMP(dynamic_cast<ICmpInst *>(instr)->icmp_op_,
testConstInta, testConstIntb);
if (res) {
instr->replace_all_use_with(res);
uselessInstr[instr] = bb;
}
}
break;
case Instruction::FCmp:
testConstFloata = dynamic_cast<ConstantFloat *>(instr->get_operand(0));
testConstFloatb = dynamic_cast<ConstantFloat *>(instr->get_operand(1));
if (testConstFloata && testConstFloatb) {
auto res = this->CalcFCMP(dynamic_cast<FCmpInst *>(instr)->fcmp_op_,
testConstFloata, testConstFloatb);
if (res) {
instr->replace_all_use_with(res);
uselessInstr[instr] = bb;
}
}
break;
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
case Instruction::FDiv:
testConstFloata = dynamic_cast<ConstantFloat *>(instr->get_operand(0));
testConstFloatb = dynamic_cast<ConstantFloat *>(instr->get_operand(1));
if (testConstFloata && testConstFloatb) {
auto floaRes =
this->CalcFloat(instr->op_id_, testConstFloata, testConstFloatb);
if (floaRes) {
instr->replace_all_use_with(floaRes);
uselessInstr[instr] = bb;
}
}
break;
case Instruction::FNeg:
testConstFloata = dynamic_cast<ConstantFloat *>(instr->get_operand(0));
if (testConstFloata) {
instr->replace_all_use_with(
new ConstantFloat(m->float32_ty_, -testConstFloata->value_));
uselessInstr[instr] = bb;
}
break;
case Instruction::FPtoSI:
testConstFloata = dynamic_cast<ConstantFloat *>(instr->get_operand(0));
if (testConstFloata) {
instr->replace_all_use_with(
new ConstantInt(m->int32_ty_, testConstFloata->value_));
uselessInstr[instr] = bb;
}
break;
case Instruction::SItoFP:
testConstInta = dynamic_cast<ConstantInt *>(instr->get_operand(0));
if (testConstInta) {
instr->replace_all_use_with(
new ConstantFloat(m->float32_ty_, testConstInta->value_));
uselessInstr[instr] = bb;
}
break;
case Instruction::ZExt:
testConstInta = dynamic_cast<ConstantInt *>(instr->get_operand(0));
if (testConstInta) {
instr->replace_all_use_with(
new ConstantInt(m->int32_ty_, testConstInta->value_));
uselessInstr[instr] = bb;
}
break;
case Instruction::Call:
ConstIntMap.clear();
ConstFloatMap.clear();
case Instruction::Load: {
auto globalVar = dynamic_cast<GlobalVariable *>(instr->get_operand(0));
if (globalVar) {
auto iterInt = ConstIntMap.find(globalVar);
auto iterFloat = ConstFloatMap.find(globalVar);
if (iterInt != ConstIntMap.end()) {
instr->replace_all_use_with(iterInt->second);
uselessInstr[instr] = bb;
} else if (iterFloat != ConstFloatMap.end()) {
instr->replace_all_use_with(iterFloat->second);
uselessInstr[instr] = bb;
}
} else if (dynamic_cast<AllocaInst *>(instr->get_operand(0))) {
auto pos = dynamic_cast<AllocaInst *>(instr->get_operand(0));
if (pos->alloca_ty_->tid_ == Type::IntegerTyID) {
auto iterInt = ConstIntMap.find(pos);
if (iterInt != ConstIntMap.end()) {
instr->replace_all_use_with(iterInt->second);
uselessInstr[instr] = bb;
}
} else if (pos->alloca_ty_->tid_ == Type::FloatTyID) {
auto iterFloat = ConstFloatMap.find(pos);
if (iterFloat != ConstFloatMap.end()) {
instr->replace_all_use_with(iterFloat->second);
uselessInstr[instr] = bb;
}
}
}
} break;
case Instruction::Store: {
// std::cout << "EVER STORE\n";
auto storePos = instr->get_operand(1);
auto storeValInt = dynamic_cast<ConstantInt *>(instr->get_operand(0));
auto storeValFloat =
dynamic_cast<ConstantFloat *>(instr->get_operand(0));
if (storeValInt) {
auto iter1 = ConstIntMap.find(storePos);
if (iter1 != ConstIntMap.end()) {
if (iter1->second->value_ == storeValInt->value_)
uselessInstr[instr] = bb;
else
iter1->second = storeValInt;
} else
ConstIntMap[storePos] = storeValInt;
} else if (storeValFloat) {
auto iter = ConstFloatMap.find(storePos);
if (iter != ConstFloatMap.end()) {
if (iter->second->value_ == storeValInt->value_)
uselessInstr[instr] = bb;
else
iter->second = storeValFloat;
} else
ConstFloatMap[storePos] = storeValFloat;
} else {
// 非常量存储,则该地址数据不再是常量
auto iterInt = ConstIntMap.find(storePos);
auto iterFloat = ConstFloatMap.find(storePos);
if (iterInt != ConstIntMap.end())
ConstIntMap.erase(iterInt);
if (iterFloat != ConstFloatMap.end())
ConstFloatMap.erase(iterFloat);
}
} break;
default:
break;
}
}
}
if (!uselessInstr.empty()) {
for (auto [instr, bb] : uselessInstr)
bb->delete_instr(instr);
return true;
}
return false;
}
bool ConstSpread::BranchProcess(Function *func) {
bool change = false;
for (auto bb : func->basic_blocks_) {
auto br = bb->get_terminator();
if (!br)
continue;
if (br->op_id_ == Instruction::Br &&
dynamic_cast<BranchInst *>(br)->num_ops_ == 3) {
auto cond = dynamic_cast<ConstantInt *>(br->get_operand(0));
auto truebb = br->get_operand(1);
auto falsebb = br->get_operand(2);
if (!cond)
continue;
change = true;
if (cond->value_ == 0) {
bb->delete_instr(br);
for (auto succ_bb : bb->succ_bbs_) {
succ_bb->remove_pre_basic_block(bb);
if (succ_bb != falsebb) {
SolvePhi(bb, succ_bb);
}
}
bb->succ_bbs_.clear();
new BranchInst(dynamic_cast<BasicBlock *>(falsebb), bb);
} else {
bb->delete_instr(br);
for (auto succ_bb : bb->succ_bbs_) {
succ_bb->remove_pre_basic_block(bb);
if (succ_bb != truebb) {
SolvePhi(bb, succ_bb);
}
}
bb->succ_bbs_.clear();
new BranchInst(dynamic_cast<BasicBlock *>(truebb), bb);
}
}
}
return change;
}

@ -0,0 +1,23 @@
#ifndef CONSTSPREAD
#define CONSTSPREAD
#include "../ir/ir.h"
#include "BasicOperation.h"
#include "opt.h"
class ConstSpread : public Optimization {
public:
ConstSpread(Module *m_) : Optimization(m_) {}
void execute();
ConstantInt *CalcInt(Instruction::OpID op, ConstantInt *v1, ConstantInt *v2);
ConstantFloat *CalcFloat(Instruction::OpID op, ConstantFloat *v1,
ConstantFloat *v2);
ConstantInt *CalcICMP(ICmpInst::ICmpOp op, ConstantInt *v1, ConstantInt *v2);
ConstantInt *CalcFCMP(FCmpInst::FCmpOp op, ConstantFloat *v1, ConstantFloat *v2);
bool SpreadingConst(Function *func);
bool BranchProcess(Function *func);
std::map<Value *, ConstantInt *> ConstIntMap;
std::map<Value *, ConstantFloat *> ConstFloatMap;
std::map<Instruction *, BasicBlock *> uselessInstr;
};
#endif // !CONSTSPREAD

@ -0,0 +1,180 @@
#include "DeleteDeadCode.h"
#include "ConstSpread.h"
std::set<std::string> OptFunc = {"getint", "getfloat",
"getch", "getarray",
"getfarray", "putint",
"putfloat", "putch",
"putarray", "putfarray",
"_sysy_starttime", "_sysy_stoptime",
"memcpy", "memclr",
"memset", "llvm.memset.p0.i32",
"__aeabi_memcpy4", "__aeabi_memclr4",
"__aeabi_memset4"};
void DeadCodeDeletion::initFuncPtrArg() {
for (auto foo : m->function_list_) {
if (foo->basic_blocks_.empty())
continue;
for (auto arg : foo->arguments_)
if (arg->type_->tid_ == Type::PointerTyID) {
if (!funcPtrArgs.count(foo))
funcPtrArgs[foo] = {};
funcPtrArgs[foo].insert(arg);
}
}
}
void DeadCodeDeletion::Init(Function *foo) {
storePos.clear();
for (auto bb : foo->basic_blocks_) {
for (auto ins : bb->instr_list_) {
if (ins->op_id_ == Instruction::GetElementPtr) {
} else if (ins->op_id_ == Instruction::Store) {
if (!storePos.count(ins->get_operand(1))) {
storePos.insert({ins->get_operand(1), {}});
}
storePos[ins->get_operand(1)].push_back(ins);
if (dynamic_cast<GlobalVariable *>(ins->get_operand(1)))
OptFunc.insert(foo->name_);
if (dynamic_cast<GetElementPtrInst *>(ins->get_operand(1)))
OptFunc.insert(foo->name_);
if (funcPtrArgs[foo].count(ins->get_operand(1)))
OptFunc.insert(foo->name_);
} else if (ins->op_id_ == Instruction::Call) {
auto f = ins->get_operand(ins->operands_.size() - 1);
if (OptFunc.count(f->name_))
OptFunc.insert(foo->name_);
}
}
}
}
bool DeadCodeDeletion::checkOpt(Function *foo, Instruction *ins) {
if (ins->op_id_ == Instruction::Ret) {
exitBlock = ins->parent_;
return true;
} else if (ins->op_id_ == Instruction::Call) {
auto f = ins->get_operand(ins->operands_.size() - 1);
return OptFunc.count(f->name_);
} else if (ins->op_id_ == Instruction::Store) {
if (dynamic_cast<GlobalVariable *>(ins->get_operand(1)))
return true;
if (dynamic_cast<GetElementPtrInst *>(ins->get_operand(1)))
return true;
if (funcPtrArgs[foo].count(ins->get_operand(1)))
return true;
return false;
}
return false;
}
void DeadCodeDeletion::findInstr(Function *foo) {
std::list<Value *> workList;
for (auto bb : foo->basic_blocks_) {
for (auto ins : bb->instr_list_) {
if (checkOpt(foo, ins)) {
uselessInstr.insert(ins);
workList.push_back(ins);
}
}
}
while (!workList.empty()) {
auto ins = dynamic_cast<Instruction *>(workList.back());
workList.pop_back();
if (ins == nullptr) {
continue;
}
for (auto operand : ins->operands_) {
auto temp = dynamic_cast<Instruction *>(operand);
if (!temp)
continue;
if (uselessInstr.insert(temp).second) {
workList.push_back(temp);
}
}
if (ins->op_id_ == Instruction::PHI) {
for (int i = 1; i < ins->operands_.size(); i += 2) {
auto bb = dynamic_cast<BasicBlock *>(ins->get_operand(i));
auto br = bb->get_terminator();
if (uselessInstr.insert(br).second) {
workList.push_back(br);
}
}
}
if (storePos.count(ins)) {
for (auto curInstr : storePos[ins]) {
if (uselessInstr.insert(dynamic_cast<Instruction *>(curInstr)).second) {
workList.push_back(curInstr);
}
}
storePos.erase(ins);
}
if (uselessBlock.insert(ins->parent_).second) {
for (auto RFrontier : ins->parent_->rdom_frontier_) {
auto t = RFrontier->get_terminator();
if (uselessInstr.insert(t).second) {
workList.push_back(t);
}
}
}
}
}
void DeadCodeDeletion::deleteInstr(Function *foo) {
int deleteCnt = 0, changeCnt = 0;
for (auto bb : foo->basic_blocks_) {
std::vector<Instruction *> ins2Del;
for (auto ins : bb->instr_list_) {
if (!uselessInstr.count(ins)) {
if (ins->op_id_ != Instruction::Br) {
ins2Del.push_back(ins);
} else {
if (ins->operands_.size() == 3) {
changeCnt++;
auto trueBB = dynamic_cast<BasicBlock *>(ins->get_operand(1));
auto falseBB = dynamic_cast<BasicBlock *>(ins->get_operand(2));
trueBB->remove_pre_basic_block(bb);
falseBB->remove_pre_basic_block(bb);
bb->remove_succ_basic_block(trueBB);
bb->remove_succ_basic_block(falseBB);
BasicBlock *temp = exitBlock;
std::vector<BasicBlock *> rdoms(bb->rdoms_.begin(),
bb->rdoms_.end());
std::sort(rdoms.begin(), rdoms.end(),
[=](BasicBlock *x, BasicBlock *y) -> bool {
return x->rdoms_.count(y);
});
for (auto rdbb : rdoms) {
if (rdbb != bb && uselessBlock.count(rdbb)) {
temp = rdbb;
break;
}
}
ins->remove_operands(0, 2);
ins->num_ops_ = 1;
ins->operands_.resize(1);
ins->use_pos_.resize(1);
ins->set_operand(0, temp);
bb->add_succ_basic_block(temp);
temp->add_pre_basic_block(bb);
}
}
}
}
deleteCnt += ins2Del.size();
for (auto ins : ins2Del) {
bb->delete_instr(ins);
}
}
}
void DeadCodeDeletion::execute() {
ReverseDomainTree reverseDomainTree(m);
reverseDomainTree.execute();
initFuncPtrArg();
for (auto foo : m->function_list_)
if (!foo->basic_blocks_.empty()) {
Init(foo);
findInstr(foo);
deleteInstr(foo);
DeleteUnusedBB(foo);
}
}

@ -0,0 +1,25 @@
#ifndef DELETEDEADCODEH
#define DELETEDEADCODEH
#include "opt.h"
extern std::set<std::string> sysLibFunc;
class DeadCodeDeletion : public Optimization {
std::map<Function *, std::set<Value *>> funcPtrArgs;
std::map<Value *, std::vector<Value *>> storePos;
BasicBlock *exitBlock;
std::set<Instruction *> uselessInstr;
std::set<BasicBlock *> uselessBlock;
public:
DeadCodeDeletion(Module *m) : Optimization(m), exitBlock(nullptr) {}
void execute();
void initFuncPtrArg();
void Init(Function *foo);
bool checkOpt(Function *foo, Instruction *instr);
void findInstr(Function *foo);
void deleteInstr(Function *foo);
};
#endif // !DELETEDEADCODEH

@ -0,0 +1,149 @@
#include "LoopInvariant.h"
void LoopInvariant::execute() {
searchLoop();
while (!loopStack.empty()) {
auto loop = loopStack.top();
loopStack.pop();
std::set<Value *> assignVals;
std::set<Instruction *> visInstr; // 标记下这条语句是不是被操作过
std::vector<Instruction *> invarInstrs; // 存在不变量的语句集合
std::map<Instruction *, BasicBlock *> instrPos;
for (auto bb : *loop)
for (Instruction *inst : bb->instr_list_)
// 赋值语句做操作
if (inst->is_binary() || inst->is_cmp() || inst->is_fcmp() ||
inst->is_call() || inst->is_phi() || inst->is_zext() ||
inst->is_fptosi() || inst->is_sitofp() || inst->is_gep() ||
inst->is_load())
assignVals.insert(inst);
bool changed = true;
while (changed) {
changed = false;
for (auto bb : *loop) {
for (auto instr : bb->instr_list_) {
if (visInstr.find(instr) != visInstr.end())
continue;
if (!instr->is_gep() && !instr->is_alloca() && !instr->is_br() &&
!instr->is_ret() && !instr->is_phi() && !instr->is_store() &&
!instr->is_load() &&
!(instr->is_call() &&
instr->get_operand(instr->num_ops_ - 1)->print() != "rand")) {
bool move = true;
// 一个操作数不是不变量就不能动
for (unsigned int i = 0; i < instr->num_ops_; i++)
if (assignVals.find(instr->get_operand(i)) != assignVals.end())
move = false;
if (move) {
instrPos[instr] = bb;
invarInstrs.push_back(instr);
assignVals.erase(instr);
visInstr.insert(instr);
changed = true;
}
}
}
}
}
auto enter = entryPos[loop];
for (auto prev : enter->pre_bbs_)
if (loop->find(prev) == loop->end())
for (auto inst : invarInstrs)
prev->add_instruction_before_terminator(inst);
}
}
void LoopInvariant::searchLoop() {
for (auto foo : m->function_list_) {
if (foo->basic_blocks_.empty())
continue;
std::set<node *> nodes;
std::set<node *> entry;
std::set<std::set<node *> *> SCCs;
std::map<BasicBlock *, node *> nodeMap;
for (auto bb : foo->basic_blocks_) {
auto cur = new node(bb, -1, -1, 0);
nodeMap[bb] = cur;
nodes.insert(cur);
}
for (auto bb : foo->basic_blocks_) {
auto BlockNode = nodeMap[bb];
for (auto suc : bb->succ_bbs_)
BlockNode->suc.insert(nodeMap[suc]);
for (auto pre : bb->succ_bbs_)
BlockNode->pre.insert(nodeMap[pre]);
}
while (LoopInvariant::searchSCC(nodes, SCCs)) {
for (auto SCC : SCCs) {
node *enter = nullptr;
for (auto curBlock : *SCC)
for (auto pre : curBlock->pre)
if (SCC->find(pre) == SCC->end())
enter = curBlock;
else if (entry.find(pre) != entry.end())
enter = pre;
auto curLoop = new std::set<BasicBlock *>;
for (auto curBlock : *SCC)
curLoop->insert(curBlock->bb);
entryPos[curLoop] = enter->bb;
loopStack.push(curLoop);
entry.insert(enter);
nodes.erase(enter);
for (auto pre : enter->pre)
pre->suc.erase(enter);
for (auto suc : enter->suc)
suc->pre.erase(enter);
}
for (auto SCC : SCCs)
SCC->clear();
SCCs.clear();
for (auto NodeBlock : nodes)
NodeBlock = new node(nullptr, -1, -1, false);
}
for (auto node : nodes)
delete node;
}
}
bool LoopInvariant::searchSCC(std::set<node *> &basicBlocks,
std::set<std::set<node *> *> &SCCs) {
ind = 0;
while (!tarjanStack.empty())
tarjanStack.pop();
for (auto cur : basicBlocks)
if (cur->dfn == -1)
tarjan(cur, SCCs);
return SCCs.size() != 0;
}
void LoopInvariant::tarjan(node *cur, std::set<std::set<node *> *> &SCCs) {
cur->dfn = cur->low = ++ind;
cur->inStack = true;
tarjanStack.push(cur);
for (auto succ : cur->suc)
if (succ->dfn == -1) {
tarjan(succ, SCCs);
if (succ->low < cur->low)
cur->low = succ->low;
} else if (succ->inStack && succ->low < cur->low)
cur->low = succ->low;
// 找到low=dfn的出现强连通分量
if (cur->dfn == cur->low) {
if (cur == tarjanStack.top()) {
tarjanStack.pop();
cur->inStack = false;
return;
}
auto SCC = new std::set<node *>;
node *tp = nullptr;
do {
tp = tarjanStack.top();
SCC->insert(tp);
tarjanStack.pop();
tp->inStack = false;
} while (tp != cur);
SCCs.insert(SCC);
}
}

@ -0,0 +1,31 @@
#ifndef LOOPH
#define LOOPH
#include "BasicOperation.h"
struct node {
BasicBlock *bb;
std::set<node *> pre;
std::set<node *> suc;
int dfn, low;
bool inStack;
node() = default;
node(BasicBlock *bb_, int dfn_, int low_, bool inStack_)
: bb(bb_), dfn(dfn_), low(low_), inStack(inStack_) {}
};
class LoopInvariant : public Optimization {
int ind;
std::stack<node *> tarjanStack;
std::stack<std::set<BasicBlock *> *> loopStack;
std::map<std::set<BasicBlock *> *, BasicBlock *> entryPos;
public:
LoopInvariant(Module *m) : Optimization(m) {}
void execute();
void searchLoop();
bool searchSCC(std::set<node *> &basicBlock, std::set<std::set<node *> *> &SCCs);
void tarjan(node *pos, std::set<std::set<node *> *> &SCCs);
};
#endif // !LOOPH

@ -0,0 +1,120 @@
#include "SimplifyJump.h"
void SimplifyJump::execute() {
for (auto foo : m->function_list_)
if (foo->basic_blocks_.empty()) {
deleteUnReachableBlock(foo);
mergePreBlock(foo);
deleteUselessPhi(foo);
deleteUselessJump(foo);
}
}
void SimplifyJump::deleteUselessBlock(Function *foo,
std::vector<BasicBlock *> &uselessBlock) {
for (auto bb : uselessBlock)
foo->remove_bb(bb);
}
bool SimplifyJump::checkUselessJump(BasicBlock *bb) {
auto JumpPos = bb->get_terminator()->get_operand(0);
for (auto preBB : bb->pre_bbs_) {
auto br = preBB->get_terminator();
if (br->operands_.size() == 1)
continue;
auto trueBB = br->get_operand(1);
auto falseBB = br->get_operand(2);
if (trueBB == JumpPos || falseBB == JumpPos)
return false;
}
return true;
}
void SimplifyJump::deleteUnReachableBlock(Function *foo) {
std::vector<BasicBlock *> uselessBlock;
for (int i = 2; i < foo->basic_blocks_.size(); i++) {
auto curbb = foo->basic_blocks_[i];
if (curbb->pre_bbs_.empty()) {
uselessBlock.push_back(curbb);
// 发现无用块后需要提前进行phi合流处理
for (auto use : curbb->use_list_) {
auto instr = dynamic_cast<PhiInst *>(use.val_);
if (instr != nullptr)
instr->remove_operands(use.arg_no_ - 1, use.arg_no_);
}
}
}
}
void SimplifyJump::mergePreBlock(Function *foo) {
std::vector<BasicBlock *> uselessBlock;
for (int i = 2; i < foo->basic_blocks_.size(); i++) {
auto bb = foo->basic_blocks_[i];
if (bb->pre_bbs_.size() == 1) {
auto preBlock = *bb->pre_bbs_.begin();
auto preBr = preBlock->get_terminator();
if (preBlock->succ_bbs_.size() != 1)
continue;
preBlock->delete_instr(preBr);
for (auto instr : bb->instr_list_) {
preBlock->add_instruction(instr);
bb->remove_instr(instr);
}
preBlock->remove_succ_basic_block(bb);
for (auto suc : bb->succ_bbs_) {
preBlock->add_succ_basic_block(suc);
suc->remove_pre_basic_block(bb);
suc->add_pre_basic_block(preBlock);
}
bb->replace_all_use_with(preBlock);
uselessBlock.push_back(bb);
}
}
deleteUselessBlock(foo, uselessBlock);
}
void SimplifyJump::deleteUselessPhi(Function *foo) {
for (auto bb : foo->basic_blocks_)
if (bb->pre_bbs_.size() == 1)
for (auto instr : bb->instr_list_)
if (instr->is_phi()) {
instr->replace_all_use_with(instr->get_operand(0));
bb->delete_instr(instr);
}
}
void SimplifyJump::deleteUselessJump(Function *foo) {
std::vector<BasicBlock *> uselessBlock;
for (int i = 2; i < foo->basic_blocks_.size(); i++) {
BasicBlock *curbb = foo->basic_blocks_[i];
if (curbb->instr_list_.size() != 1)
continue;
auto branchInstr = curbb->get_terminator();
if (branchInstr->operands_.size() != 1 || branchInstr->is_ret())
continue;
if (!checkUselessJump(curbb))
continue;
uselessBlock.push_back(curbb);
auto JumpTarget = dynamic_cast<BasicBlock *>(branchInstr->get_operand(0));
for (auto instr : JumpTarget->instr_list_)
if (instr->is_phi()) {
for (int i = 1; i < instr->operands_.size(); i += 2) {
if (instr->get_operand(i) == curbb) {
auto val = instr->get_operand(i - 1);
instr->remove_operands(i - 1, i);
for (auto preBB : curbb->pre_bbs_) {
instr->add_operand(val);
instr->add_operand(preBB);
}
break;
}
}
}
curbb->replace_all_use_with(JumpTarget);
for (auto preBB : curbb->pre_bbs_) {
preBB->add_succ_basic_block(JumpTarget);
JumpTarget->add_pre_basic_block(preBB);
}
}
deleteUselessBlock(foo, uselessBlock);
}

@ -0,0 +1,19 @@
#ifndef SIMPLIFYJUMPH
#define SIMPLIFYJUMPH
#include "opt.h"
class SimplifyJump : public Optimization {
public:
SimplifyJump(Module *m) : Optimization(m) {}
void execute();
void deleteUselessBlock(Function *foo,
std::vector<BasicBlock *> &uselessBlock);
bool checkUselessJump(BasicBlock *bb);
void deleteUselessPhi(Function *foo);
void deleteUselessJump(Function *foo);
void mergePreBlock(Function *foo);
void deleteUnReachableBlock(Function *foo);
};
#endif // !SIMPLIFYJUMPH

@ -0,0 +1,206 @@
#include "opt.h"
#include <functional>
#include <vector>
void DomainTree::execute() {
for (auto foo : m->function_list_)
if (!foo->basic_blocks_.empty()) {
getBlockDom(foo);
getBlockDomFront(foo);
}
}
bool DomainTree::isLoopEdge(BasicBlock *a, BasicBlock *b) {
return TraverseInd[a] > TraverseInd[b];
}
std::vector<BasicBlock *> DomainTree::postTraverse(BasicBlock *bb) {
std::set<BasicBlock *> vis;
std::vector<BasicBlock *> ans;
std::function<void(BasicBlock *)> dfs = [&](BasicBlock *place) {
vis.insert(place);
for (auto child : place->succ_bbs_)
if (vis.find(child) == vis.end())
dfs(child);
ans.push_back(place);
};
dfs(bb);
return ans;
}
void DomainTree::getReversePostTraverse(Function *f) {
doms.clear();
reversePostTraverse.clear();
TraverseInd.clear();
auto entryBlock = *f->basic_blocks_.begin();
auto seq = postTraverse(entryBlock);
std::reverse(reversePostTraverse.begin(), reversePostTraverse.end());
for (int i = 0; i < seq.size(); i++)
TraverseInd[seq[i]] = i;
reversePostTraverse = seq;
}
void DomainTree::getBlockDom(Function *f) {
getReversePostTraverse(f);
auto root = *f->basic_blocks_.begin();
auto root_id = TraverseInd[root];
doms.resize(root_id + 1, nullptr);
doms.back() = root;
bool change = true;
while (change) {
change = false;
for (auto bb : reversePostTraverse)
if (bb != root) {
auto preds = bb->pre_bbs_;
BasicBlock *curDom = nullptr;
for (auto pred_bb : preds)
if (doms[TraverseInd[pred_bb]] != nullptr) {
curDom = pred_bb;
break;
}
for (auto pred_bb : preds)
if (doms[TraverseInd[pred_bb]] != nullptr)
curDom = intersect(pred_bb, curDom);
if (doms[TraverseInd[bb]] != curDom) {
doms[TraverseInd[bb]] = curDom;
change = true;
}
}
}
for (auto bb : reversePostTraverse)
bb->idom_ = doms[TraverseInd[bb]];
}
void DomainTree::getBlockDomFront(Function *foo) {
for (auto b : foo->basic_blocks_) {
auto b_pred = b->pre_bbs_;
if (b_pred.size() >= 2) {
for (auto pred : b_pred) {
auto runner = pred;
while (runner != doms[TraverseInd[b]]) {
runner->dom_frontier_.insert(b);
runner = doms[TraverseInd[runner]];
}
}
}
}
}
BasicBlock *DomainTree::intersect(BasicBlock *b1, BasicBlock *b2) {
auto head1 = b1;
auto head2 = b2;
while (head1 != head2) {
while (TraverseInd[head1] < TraverseInd[head2])
head1 = doms[TraverseInd[head1]];
while (TraverseInd[head2] < TraverseInd[head1])
head2 = doms[TraverseInd[head2]];
}
return head1;
}
void ReverseDomainTree::execute() {
for (auto f : m->function_list_)
if (!f->basic_blocks_.empty()) {
for (auto bb : f->basic_blocks_) {
bb->rdoms_.clear();
bb->rdom_frontier_.clear();
}
getBlockDomR(f);
getBlockDomFrontR(f);
getBlockRdoms(f);
}
}
void ReverseDomainTree::getPostTraverse(BasicBlock *bb,
std::set<BasicBlock *> &visited) {
visited.insert(bb);
for (auto parent : bb->pre_bbs_)
if (visited.find(parent) == visited.end())
getPostTraverse(parent, visited);
reverseTraverseInd[bb] = reverseTraverse.size();
reverseTraverse.push_back(bb);
}
void ReverseDomainTree::getReversePostTraverse(Function *f) {
reverseDomainBlock.clear();
reverseTraverse.clear();
reverseTraverseInd.clear();
for (auto bb : f->basic_blocks_) {
auto terminate_instr = bb->get_terminator();
if (terminate_instr->op_id_ == Instruction::Ret) {
exitBlock = bb;
break;
}
}
assert(exitBlock != nullptr);
std::set<BasicBlock *> visited = {};
getPostTraverse(exitBlock, visited);
reverse(reverseTraverse.begin(), reverseTraverse.end());
}
void ReverseDomainTree::getBlockDomR(Function *f) {
getReversePostTraverse(f);
auto root = exitBlock;
auto root_id = reverseTraverseInd[root];
for (int i = 0; i < root_id; i++)
reverseDomainBlock.push_back(nullptr);
reverseDomainBlock.push_back(root);
bool change = true;
while (change) {
change = false;
for (auto bb : reverseTraverse) {
if (bb != root) {
BasicBlock *new_irdom = nullptr;
for (auto rpred_bb : bb->succ_bbs_)
if (reverseDomainBlock[reverseTraverseInd[rpred_bb]] != nullptr) {
new_irdom = rpred_bb;
break;
}
for (auto rpred_bb : bb->succ_bbs_)
if (reverseDomainBlock[reverseTraverseInd[rpred_bb]] != nullptr)
new_irdom = intersect(rpred_bb, new_irdom);
if (reverseDomainBlock[reverseTraverseInd[bb]] != new_irdom) {
reverseDomainBlock[reverseTraverseInd[bb]] = new_irdom;
change = true;
}
}
}
}
}
void ReverseDomainTree::getBlockRdoms(Function *f) {
for (auto bb : f->basic_blocks_) {
if (bb == exitBlock)
continue;
auto current = bb;
while (current != exitBlock) {
bb->rdoms_.insert(current);
current = reverseDomainBlock[reverseTraverseInd[current]];
}
}
}
void ReverseDomainTree::getBlockDomFrontR(Function *f) {
for (auto bb_iter = f->basic_blocks_.rbegin();
bb_iter != f->basic_blocks_.rend(); bb_iter++) {
auto bb = *bb_iter;
if (bb->succ_bbs_.size() >= 2) {
for (auto rpred : bb->succ_bbs_) {
auto runner = rpred;
while (runner != reverseDomainBlock[reverseTraverseInd[bb]]) {
runner->rdom_frontier_.insert(bb);
runner = reverseDomainBlock[reverseTraverseInd[runner]];
}
}
}
}
}
BasicBlock *ReverseDomainTree::intersect(BasicBlock *b1, BasicBlock *b2) {
auto head1 = b1;
auto head2 = b2;
while (head1 != head2) {
while (reverseTraverseInd[head1] < reverseTraverseInd[head2])
head1 = reverseDomainBlock[reverseTraverseInd[head1]];
while (reverseTraverseInd[head2] < reverseTraverseInd[head1])
head2 = reverseDomainBlock[reverseTraverseInd[head2]];
}
return head1;
}

@ -0,0 +1,46 @@
// Currently a dummy file
#ifndef OPTH
#define OPTH
#include "ir.h"
class Optimization {
public:
Module *m;
explicit Optimization(Module *m_) : m(m_) {}
virtual void execute() = 0;
};
class DomainTree : public Optimization {
std::vector<BasicBlock *> reversePostTraverse;
std::map<BasicBlock *, int> TraverseInd;
std::vector<BasicBlock *> doms;
public:
DomainTree(Module *m) : Optimization(m) {}
void execute();
void getReversePostTraverse(Function *foo);
std::vector<BasicBlock *> postTraverse(BasicBlock *bb);
void getBlockDom(Function *foo);
void getBlockDomFront(Function *foo);
BasicBlock *intersect(BasicBlock *b1, BasicBlock *b2);
bool isLoopEdge(BasicBlock *a, BasicBlock *b);
};
class ReverseDomainTree : public Optimization {
std::map<BasicBlock *, int> reverseTraverseInd;
std::vector<BasicBlock *> reverseDomainBlock;
std::vector<BasicBlock *> reverseTraverse;
BasicBlock *exitBlock;
public:
ReverseDomainTree(Module *m) : Optimization(m), exitBlock(nullptr) {}
void execute();
BasicBlock *intersect(BasicBlock *b1, BasicBlock *b2);
void getReversePostTraverse(Function *foo);
void getBlockDomR(Function *foo);
void getBlockRdoms(Function *foo);
void getBlockDomFrontR(Function *foo);
void getPostTraverse(BasicBlock *bb, std::set<BasicBlock *> &visited);
};
#endif // !OPTH

@ -0,0 +1,22 @@
# Require Flex & Bison as generator
# find_package(FLEX 2.5.4 REQUIRED)
# find_package(BISON 2.4.1 REQUIRED)
set(PARSER_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
# set(PARSER_DIR "${CMAKE_CURRENT_BINARY_DIR}")
set(LEXER_CPP "${PARSER_DIR}/lexer.cpp")
set(LEXER_DEF "${PARSER_DIR}/lexer.hpp")
set(PARSER_CPP "${PARSER_DIR}/parser.cpp")
set(PARSER_DEF "${PARSER_DIR}/parser.hpp")
# Generate tokenizer & parser via Flex & Bison
# flex_target(LEXER "lexer.l" "${LEXER_CPP}" DEFINES_FILE "${LEXER_DEF}")
# bison_target(PARSER "parser.y" "${PARSER_CPP}" DEFINES_FILE "${PARSER_DEF}")
# add_flex_bison_dependency(LEXER PARSER)
set(SOURCE_FILES "${LEXER_CPP}" "${PARSER_CPP}" "ast.cpp")
set(PARSER_INCLUDE ${PARSER_DIR} ${CMAKE_CURRENT_SOURCE_DIR} PARENT_SCOPE)
add_library(parser STATIC ${SOURCE_FILES})
target_include_directories(parser PRIVATE "${PARSER_INCLUDE}")

@ -0,0 +1,10 @@
objs=parser.o lexer.o ast.o display.o
CC=gcc
parser:$(objs)
$(CC) -o parser ${objs}
%.o:%.c
$(CC) -c $<
.PHONY: clean
clean:
rm parser $(objs) parser.c lexer.c parser.h -f

@ -0,0 +1,716 @@
#include "ast.h"
#include "parser.h" //由bison根据parser.y生成
#define DEBUG 1
#define INTEGER_MINVALUE -2147483647
#define ARRAYCALL 123456789
int LEV=0;
int main_flag = 0;
int call_flag = 0;
int main_call = 0;
int current_offset = 0;
char break_label[30];
char continue_label[30];
char case_temp[30];
char case_label[30];
char array_name[30];
char struct_name[33];
int struct_width = 0;
int struct_flag = 0;
int array_index = 0;
int struct_var_flag = 0;
int rtn, flag = 0;
int rtn2, op;
int return_flag = 0;
struct ASTNode* left;
struct ASTNode* right;
char tokens[200][200]; //用来存储TAC语句信息
const char spState[8][20] = { "FUNCTION", "GOTO", "CALL", "PARAM", "LABEL", "ARG", "RETURN","BLOCK" };
int varlen = 0; //记录当前变量长度
int lineNo = 0; //记录行号
ASTNode *new_node(node_type type, ASTNode *left, ASTNode *mid, ASTNode *right, int int_val, float float_val, char *symbol, node_type d_type) {
ASTNode *n = (ASTNode *)malloc(sizeof(ASTNode));
n->type = type;
n->left = left;
n->mid = mid;
n->right = right;
n->int_val = int_val;
n->float_val = float_val;
n->symbol = symbol;
n->d_type = d_type;
return n;
}
int targetNum[1024];
int currentNum[1024];
int currentLayer = 0;
int handle_next_display = 1;
int getBranchNum(ASTNode* T)
{
int count = 0;
if(T->left)
{
count++;
}
if(T->right)
{
count++;
}
if(T->mid)
{
count++;
}
return count;
}
void printVarType(node_type type)
{
switch(type)
{
case Int:
printf("int");
break;
case Float:
printf("float");
break;
case Void:
printf("void");
break;
default:
printf("unknown");
break;
}
}
void gapProcess()
{
for(int i=1;i<=currentLayer;i++)
{
if(i < currentLayer)
{
if(currentNum[i] <= targetNum[i])
{
printf("| ");
}
else
{
printf(" ");
}
}
else
{
if(currentNum[i] < targetNum[i])
{
printf("|--");
}
else
{
printf("`--");
}
}
}
}
void gapManage(ASTNode* T, int reverse)
{
currentNum[currentLayer]++;
currentLayer++;
targetNum[currentLayer] = getBranchNum(T);
currentNum[currentLayer] = 1;
if(reverse)
{
nextDisplayReverse(T);
}
else
{
nextDisplay(T);
}
currentLayer--;
}
void sameGapManage(ASTNode* T, int reverse)
{
currentNum[currentLayer]++;
currentLayer++;
targetNum[currentLayer] = getBranchNum(T) - 1;
currentNum[currentLayer] = 1;
if(T->right)
{
display(T->right);
}
currentLayer--;
display(T->left);
}
void nextDisplay(ASTNode* T)
{
if(T->left)
{
display(T->left);
}
if(T->mid)
{
display(T->mid);
}
if(T->right)
{
display(T->right);
}
}
void nextDisplayReverse(ASTNode* T)
{
if(T->right)
{
display(T->right);
}
if(T->mid)
{
display(T->mid);
}
if(T->left)
{
display(T->left);
}
}
void print_root(ASTNode* T)
{
gapProcess();
printf("CompUnit\n");
gapManage(T, 1);
}
void print_comp_unit(ASTNode* T)
{
if(T->left)
{
currentNum[currentLayer]--;
}
handle_next_display = 0;
switch(T->right->type)
{
case ConstDecl:
print_const_decl(T->right);
break;
case VarDecl:
print_var_decl(T->right);
break;
case FuncDef:
print_func_def(T->right);
break;
}
if(T->left)
{
currentLayer++;
targetNum[currentLayer] = getBranchNum(T) - 1;
currentNum[currentLayer] = 1;
currentLayer--;
display(T->left);
}
}
void print_const_decl(ASTNode* T)
{
gapProcess();
printf("ConstDecl ");
printVarType(T->d_type);
printf("\n");
gapManage(T, 1);
}
void print_const_def(ASTNode* T)
{
if(T->left)
{
currentNum[currentLayer]--;
}
gapProcess();
printf("ConstDef %s\n", T->symbol);
if(T->left)
{
sameGapManage(T, 1);
}
else
{
gapManage(T, 1);
}
}
void print_const_exp_array(ASTNode* T)
{
gapProcess();
printf("ConstExpArray\n");
gapManage(T, 1);
}
void print_const_init_val(ASTNode* T)
{
if(T->right && T->right->type == ConstExp)
{
handle_next_display = 0;
print_const_exp(T->right);
return;
}
gapProcess();
printf("ConstInitVal {}\n");
gapManage(T, 1);
}
void print_const_exp(ASTNode* T)
{
if(T->int_val == 0)
{
handle_next_display = 0;
print_mul_exp(T->right);
return;
}
gapProcess();
printf("ConstExp ");
if(T->d_type == PLUS)
{
printf("+\n");
}
else
{
printf("-\n");
}
gapManage(T, 1);
}
void print_var_decl(ASTNode* T)
{
gapProcess();
printf("VarDecl ");
printVarType(T->d_type);
printf("\n");
gapManage(T, 1);
}
void print_var_def(ASTNode* T)
{
if(T->left)
{
currentNum[currentLayer]--;
}
gapProcess();
printf("VarDef %s\n", T->symbol);
if(T->left)
{
sameGapManage(T, 1);
}
else
{
gapManage(T, 1);
}
}
void print_init_val(ASTNode* T)
{
if(T->int_val == Exp)
{
handle_next_display = 0;
print_exp(T->right);
}
else
{
if(T->right)
{
handle_next_display = 0;
print_init_vals(T->right);
}
else
{
gapProcess();
printf("Null Init Vals\n");
gapManage(T, 1);
}
}
}
void print_init_vals(ASTNode* T)
{
if(T->left)
{
currentNum[currentLayer]--;
}
gapProcess();
printf("InitVals\n");
if(T->left)
{
sameGapManage(T, 1);
}
else
{
gapManage(T, 1);
}
}
void print_func_def(ASTNode* T)
{
gapProcess();
printf("FuncDef ");
printVarType(T->d_type);
printf(" %s\n", T->symbol);
gapManage(T, 0);
}
void print_func_f_param(ASTNode* T)
{
if(T->left)
{
currentNum[currentLayer]--;
}
gapProcess();
printf("FuncFParams ");
printVarType(T->d_type);
printf(" %s\n", T->symbol);
if(T->left)
{
sameGapManage(T, 1);
}
else
{
gapManage(T, 1);
}
}
void print_block(ASTNode* T)
{
gapProcess();
printf("Block\n");
gapManage(T, 1);
}
void print_block_item(ASTNode* T)
{
if(T->left)
{
currentNum[currentLayer]--;
}
handle_next_display = 0;
switch(T->right->type)
{
case ConstDecl:
print_const_decl(T->right);
break;
case VarDecl:
print_var_decl(T->right);
break;
case Stmt:
print_stmt(T->right);
break;
}
if(T->left)
{
currentLayer++;
targetNum[currentLayer] = getBranchNum(T) - 1;
currentNum[currentLayer] = 1;
currentLayer--;
display(T->left);
}
}
void print_stmt(ASTNode* T)
{
switch (T->int_val)
{
case BlankStmt:
gapProcess();
printf("Blank Statement\n");
gapManage(T, 0);
break;
case ExpStmt:
handle_next_display = 0;
print_exp(T->right);
break;
case AssignStmt:
gapProcess();
printf("Assign Statement\n");
gapManage(T, 0);
break;
case Block:
handle_next_display = 0;
print_block(T->right);
break;
case IfStmt:
gapProcess();
printf("If Statement\n");
gapManage(T, 0);
break;
case IfElseStmt:
gapProcess();
printf("If-Else Statement\n");
gapManage(T, 0);
break;
case WhileStmt:
gapProcess();
printf("While Statement\n");
gapManage(T, 0);
break;
case BreakStmt:
gapProcess();
printf("Break Statement\n");
gapManage(T, 0);
break;
case ContinueStmt:
gapProcess();
printf("Continue Statement\n");
gapManage(T, 0);
break;
case BlankReturnStmt:
gapProcess();
printf("Blank Return Statement\n");
gapManage(T, 0);
break;
case ReturnStmt:
gapProcess();
printf("Return Statement\n");
gapManage(T, 0);
break;
default:
gapProcess();
printf("Unknown Statement\n");
gapManage(T, 0);
break;
}
}
void print_exp(ASTNode* T)
{
handle_next_display = 0;
print_add_exp(T->right);
}
void print_add_exp(ASTNode* T)
{
if(T->int_val == MUL)
{
handle_next_display = 0;
print_mul_exp(T->right);
return;
}
gapProcess();
printf("AddExp");
if(T->int_val == PLUS)
{
printf(" +\n");
}
else
{
printf(" -\n");
}
gapManage(T, 1);
}
void print_mul_exp(ASTNode* T)
{
if(T->int_val == UnaryExp)
{
handle_next_display = 0;
print_unary_exp(T->right);
return;
}
gapProcess();
printf("MulExp");
if(T->int_val == MUL)
{
printf(" *\n");
}
else if(T->int_val == DIV)
{
printf(" /\n");
}
else if(T->int_val == MOD)
{
printf(" %%\n");
}
gapManage(T, 1);
}
void print_unary_exp(ASTNode* T)
{
if(T->int_val == PrimaryExp)
{
handle_next_display = 0;
print_primary_exp(T->right);
return;
}
gapProcess();
printf("UnaryExp ");
switch (T->int_val)
{
case FuncRParams:
printf("%s()\n", T->symbol);
break;
case Plus:
printf("+\n");
break;
case Minus:
printf("-\n");
break;
case NOT:
printf("NOT\n");
break;
default:
break;
}
gapManage(T, 1);
}
void print_func_r_params(ASTNode* T)
{
if(T->left)
{
currentNum[currentLayer]--;
}
gapProcess();
printf("FuncRParams\n");
if(T->left)
{
sameGapManage(T, 1);
}
else
{
gapManage(T, 1);
}
}
void print_primary_exp(ASTNode* T)
{
if(T->d_type == NonType)
{
handle_next_display = 0;
if(T->int_val == Exp)
{
print_exp(T->right);
}
else
{
print_lv_al(T->right);
}
return;
}
gapProcess();
printf("PrimaryExp ");
if(T->d_type == Int)
{
printf("IntLiteral %d\n", T->int_val);
}
else
{
printf("FloatLiteral %f\n", T->float_val);
}
gapManage(T, 1);
}
void print_lv_al(ASTNode* T)
{
gapProcess();
//printVarType(T->d_type);
printf("LVal %s\n", T->symbol);
gapManage(T, 1);
}
void print_cond(ASTNode* T)
{
if(T->int_val == 0 && T->right->type == Cond)
{
handle_next_display = 0;
print_cond(T->right);
return;
}
gapProcess();
printf("Cond");
if(T->int_val == OR)
{
printf(" OR\n");
}
else
{
printf("\n");
}
gapManage(T, 1);
}
void print_l_and_exp(ASTNode* T)
{
if(T->int_val == 0)
{
handle_next_display = 0;
print_eq_exp(T->right);
return;
}
gapProcess();
printf("LAndExp AND\n");
gapManage(T, 1);
}
void print_eq_exp(ASTNode* T)
{
if(T->int_val == 0)
{
handle_next_display = 0;
print_rel_exp(T->right);
return;
}
gapProcess();
printf("EqExp");
if(T->int_val == EQ)
{
printf(" ==\n");
}
else if(T->int_val == NE)
{
printf(" !=\n");
}
else
{
printf("\n");
}
gapManage(T, 1);
}
void print_rel_exp(ASTNode* T)
{
if(T->int_val == 0)
{
handle_next_display = 0;
print_add_exp(T->right);
return;
}
gapProcess();
printf("RelExp ");
if(T->int_val == LT)
{
printf("<\n");
}
else if(T->int_val == GT)
{
printf(">\n");
}
else if(T->int_val == LE)
{
printf("<=\n");
}
else
{
printf(">=\n");
}
gapManage(T, 1);
}
void print_exp_array(ASTNode* T)
{
gapProcess();
printf("ExpArray []\n");
gapManage(T, 1);
}
void print_unknown(ASTNode* T)
{
currentNum[currentLayer]++;
gapProcess();
printf("Unknown\n");
}

@ -0,0 +1,116 @@
#ifndef DEF_H
#define DEF_H
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "math.h"
#include "stdarg.h"
#include "parser.h" //由bison根据parser.y生成
#define MAXLENGTH 200
#define DX 3*sizeof(int) /*活动记录控制信息需要的单元数,这个根据实际系统调整*/
//以下语法树结点类型、三地址结点类型等定义仅供参考,实验时一定要根据自己的理解来定义
extern int LEV; //层号
#define BLOCK -2147483647
typedef enum node_type {
CompUnit,
ConstDecl,
VarDecl,
FuncDef,
ConstDef,
ConstInitVal,
VarDef,
InitVal,
FuncFParam,
ExpArray,
Exp,
Block,
BlockItem,
Stmt,
LVal,
PrimaryExp,
UnaryExp,
LOrExp,
FuncRParams,
MulExp,
RelExp,
EqExp,
LAndExp,
LNotExp,
Cond,
ConstExp,
ConstExpArray,
BlankStmt, //空语句
ExpStmt, // 表达式语句
AssignStmt, // 赋值语句
IfStmt, // If语句
IfElseStmt, // If-Else语句
WhileStmt, // while语句
BreakStmt, // break语句
ContinueStmt, // continue语句
BlankReturnStmt, //不带返回值的return语句
AddExp,
ReturnStmt, // 带返回值的return语句
NonType,
Float,
Int,
InitVals,
Void,
Plus,
Minus,
Root
} node_type;
// AST节点(最多三个子节点lef,mid,right,当只有两节点时置mid为null):
typedef struct ASTNode {
node_type type;
struct ASTNode *left;
struct ASTNode *mid;
struct ASTNode *right;
int int_val;
float float_val;
char *symbol;
node_type d_type;
}ASTNode;
ASTNode *new_node(node_type type, ASTNode *left, ASTNode *mid, ASTNode *right, int int_val, float float_val, char *symbol, node_type d_type);
void display(ASTNode* T);
int getBranchNum(ASTNode* T);
void printVarType(node_type type);
void gapProcess();
void nextDisplay(ASTNode* T);
void nextDisplayReverse(ASTNode* T);
void print_root(ASTNode* T);
void print_comp_unit(ASTNode* T);
void print_const_decl(ASTNode* T);
void print_const_def(ASTNode* T);
void print_const_exp_array(ASTNode* T);
void print_const_init_val(ASTNode* T);
void print_const_exp(ASTNode* T);
void print_var_decl(ASTNode* T);
void print_var_def(ASTNode* T);
void print_init_val(ASTNode* T);
void print_init_vals(ASTNode* T);
void print_func_def(ASTNode* T);
void print_func_f_param(ASTNode* T);
void print_block(ASTNode* T);
void print_block_item(ASTNode* T);
void print_stmt(ASTNode* T);
void print_exp(ASTNode* T);
void print_add_exp(ASTNode* T);
void print_mul_exp(ASTNode* T);
void print_unary_exp(ASTNode* T);
void print_func_r_params(ASTNode* T);
void print_primary_exp(ASTNode* T);
void print_lv_al(ASTNode* T);
void print_cond(ASTNode* T);
void print_l_and_exp(ASTNode* T);
void print_eq_exp(ASTNode* T);
void print_rel_exp(ASTNode* T);
void print_exp_array(ASTNode* T);
void print_unknown(ASTNode* T);
#endif

Binary file not shown.

@ -0,0 +1,2 @@
bison -d parser.y -o parser.c
flex -o lexer.c lexer.l

@ -0,0 +1,15 @@
#pragma once
enum STYPE { SEMI, ASS, EXP, CONT, BRE, RET, BLK, SEL, ITER };
enum UOP { UOP_ADD, UOP_MINUS, UOP_NOT };
enum AOP { AOP_ADD, AOP_MINUS };
enum MOP { MOP_MUL, MOP_DIV, MOP_MOD };
enum ROP { ROP_GTE, ROP_LTE, ROP_GT, ROP_LT };
enum EOP { EOP_EQ, EOP_NEQ };
enum TYPE { TYPE_VOID, TYPE_INT, TYPE_FLOAT };

@ -0,0 +1,101 @@
#include "ast.h"
extern int targetNum[1024];
extern int currentNum[1024];
extern int currentLayer;
extern int handle_next_display;
void display(ASTNode* T)
{
handle_next_display = 1;
switch (T->type)
{
case Root:
print_root(T);
break;
case CompUnit:
print_comp_unit(T);
break;
case ConstDecl:
print_const_decl(T);
break;
case ConstDef:
print_const_def(T);
break;
case ConstExpArray:
print_const_exp_array(T);
break;
case ConstInitVal:
print_const_init_val(T);
break;
case ConstExp:
print_const_exp(T);
break;
case VarDecl:
print_var_decl(T);
break;
case VarDef:
print_var_def(T);
break;
case InitVal:
print_init_val(T);
break;
case InitVals:
print_init_vals(T);
break;
case FuncDef:
print_func_def(T);
break;
case FuncFParam:
print_func_f_param(T);
break;
case Block:
print_block(T);
break;
case BlockItem:
print_block_item(T);
break;
case Stmt:
print_stmt(T);
break;
case Exp:
print_exp(T);
break;
case AddExp:
print_add_exp(T);
break;
case MulExp:
print_mul_exp(T);
break;
case UnaryExp:
print_unary_exp(T);
break;
case FuncRParams:
print_func_r_params(T);
break;
case PrimaryExp:
print_primary_exp(T);
break;
case LVal:
print_lv_al(T);
break;
case Cond:
print_cond(T);
break;
case LAndExp:
print_l_and_exp(T);
break;
case EqExp:
print_eq_exp(T);
break;
case RelExp:
print_rel_exp(T);
break;
case ExpArray:
print_exp_array(T);
break;
default:
handle_next_display = 0;
print_unknown(T);
break;
}
}

Binary file not shown.

File diff suppressed because it is too large Load Diff

@ -0,0 +1,102 @@
%option noyywrap
%option yylineno
%{
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "ast.h"
#include "parser.h"
#define TESTINFO(type,value) printf("LexToken(%s,%s,%d))\n",type,value,yylineno)
int line_cnt = 1;
%}
MultilineComment "/*"([^\*]|(\*)*[^\*/])*(\*)*"*/"
SingleLineComment "//"[^\n]+
Lex_err [1-9][0-9]*[a-zA-Z]+[0-9]*|0[0-7]*[8-9a-zA-Z_]+[0-9a-zA-Z_]*
%%
"//".* { }
"/*"([^\*]|(\*)*[^\*/])*(\*)*"*/" {
int len = strlen(yytext);
for (int i = 0; i < len; i++)
if(yytext[i] == '\n')
line_cnt++;
}
"\n" { line_cnt++; }
[ \t] { }
"int" { TESTINFO("INT","int"); return INT; }
"float" { TESTINFO("FLOAT","float"); return FLOAT; }
"void" { TESTINFO("VOID","void");return VOID; }
"const" { TESTINFO("CONST","const"); return CONST; }
"return" { TESTINFO("RETURN","return"); return RETURN; }
"if" { TESTINFO("IF","if"); return IF; }
"else" { TESTINFO("ELSE","else"); return ELSE; }
"while" { TESTINFO("WHILE","while");return WHILE; }
"break" { TESTINFO("BREAK","break");return BREAK; }
"continue" { TESTINFO("CONTINUE","continue"); return CONTINUE; }
"(" { TESTINFO("LP","()");return LP; }
")" { TESTINFO("RP",")");return RP; }
"[" { TESTINFO("LB","[");return LB; }
"]" { TESTINFO("RB","]");return RB; }
"{" { TESTINFO("LC","{");return LC; }
"}" { TESTINFO("RC","}");return RC; }
"," { TESTINFO("COMMA",",");return COMMA; }
";" { TESTINFO("SEMI",";");return SEMICOLON; }
"+" { TESTINFO("PLUS","+");return PLUS; }
"-" { TESTINFO("MINUS","-");return MINUS; }
"!" { TESTINFO("NOT","!");return NOT; }
"=" { TESTINFO("ASSIGN","=");return ASSIGN; }
"*" { TESTINFO("MUL","*");return MUL; }
"/" { TESTINFO("DIV","/");return DIV; }
"%" { TESTINFO("MOD","%");return MOD; }
"&&" { TESTINFO("AND","&&");return AND; }
"||" { TESTINFO("OR","||");return OR; }
"==" { TESTINFO("EQ","==");return EQ; }
"!=" { TESTINFO("NE","!=");return NE; }
"<" { TESTINFO("LT","<");return LT; }
"<=" { TESTINFO("LE","<=");return LE; }
">" { TESTINFO("GT",">");return GT; }
">=" { TESTINFO("GE",">=");return GE; }
0[xX][0-9a-fA-F]* {
int val = 0;
int len = strlen(yytext);
for (int i = 2; i < len; i++) {
val <<= 4;
if (isdigit(yytext[i]))
val += yytext[i] - '0';
else
val += yytext[i] - 'a' + 10;
}
yylval.int_val = val;
TESTINFO("INT_LIT","int_lit");
return INT_LIT;
}
[a-zA-Z_][a-zA-Z0-9_]* {
yylval.str_val = (char *)malloc(strlen(yytext) + 1);
strcpy(yylval.str_val, yytext);
yylval.str_val[strlen(yytext)] = '\0';
TESTINFO("ID","id");
return ID;
}
[0-9]*\.[0-9]+f?|[0-9]+e-?[0-9]+f? {
yylval.float_val = atof(yytext);
TESTINFO("FLOAT_LIT","float_lit");
return FLOAT_LIT; }
{Lex_err} { return LEX_ERR; }
[1-9][0-9]*|0 { yylval.int_val = atoi(yytext); return INT_LIT; }
0[0-7]+ {
int val = 0;
int len = strlen(yytext);
for (int i = 1; i < len; i++)
val = (val << 3) + yytext[i] - '0';
yylval.int_val = val;
TESTINFO("INT_LIT","int_lit");
return INT_LIT;
}
. { }
%%

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff

@ -0,0 +1,131 @@
/* A Bison parser, made by GNU Bison 3.5.1. */
/* Bison interface for Yacc-like parsers in C
Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2020 Free Software Foundation,
Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
/* As a special exception, you may create a larger work that contains
part or all of the Bison parser skeleton and distribute that work
under terms of your choice, so long as that work isn't itself a
parser generator using the skeleton or a modified version thereof
as a parser skeleton. Alternatively, if you modify or redistribute
the parser skeleton itself, you may (at your option) remove this
special exception, which will cause the skeleton and the resulting
Bison output files to be licensed under the GNU General Public
License without this special exception.
This special exception was added by the Free Software Foundation in
version 2.2 of Bison. */
/* Undocumented macros, especially those whose name start with YY_,
are private implementation details. Do not rely on them. */
#ifndef YY_YY_PARSER_H_INCLUDED
# define YY_YY_PARSER_H_INCLUDED
/* Debug traces. */
#ifndef YYDEBUG
# define YYDEBUG 0
#endif
#if YYDEBUG
extern int yydebug;
#endif
/* Token type. */
#ifndef YYTOKENTYPE
# define YYTOKENTYPE
enum yytokentype
{
ID = 258,
INT_LIT = 259,
FLOAT_LIT = 260,
INT = 261,
FLOAT = 262,
VOID = 263,
CONST = 264,
RETURN = 265,
IF = 266,
ELSE = 267,
WHILE = 268,
BREAK = 269,
CONTINUE = 270,
LP = 271,
RP = 272,
LB = 273,
RB = 274,
LC = 275,
RC = 276,
COMMA = 277,
SEMICOLON = 278,
MINUS = 279,
NOT = 280,
ASSIGN = 281,
PLUS = 282,
MUL = 283,
DIV = 284,
MOD = 285,
AND = 286,
OR = 287,
EQ = 288,
NE = 289,
LT = 290,
LE = 291,
GT = 292,
GE = 293,
LEX_ERR = 294,
THEN = 295
};
#endif
/* Value type. */
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
union YYSTYPE
{
#line 25 "parser.y"
int int_val;
float float_val;
char *str_val;
struct ASTNode *node_val;
#line 105 "parser.h"
};
typedef union YYSTYPE YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1
# define YYSTYPE_IS_DECLARED 1
#endif
/* Location type. */
#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED
typedef struct YYLTYPE YYLTYPE;
struct YYLTYPE
{
int first_line;
int first_column;
int last_line;
int last_column;
};
# define YYLTYPE_IS_DECLARED 1
# define YYLTYPE_IS_TRIVIAL 1
#endif
extern YYSTYPE yylval;
extern YYLTYPE yylloc;
int yyparse (void);
#endif /* !YY_YY_PARSER_H_INCLUDED */

File diff suppressed because one or more lines are too long

Binary file not shown.

@ -0,0 +1,196 @@
%define parse.error verbose
%locations
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include "ast.h"
ASTNode *root;
extern FILE *yyin;
extern int line_cnt;
extern int yylineno;
extern char *yytext;
extern int yylex();
extern int yyparse();
//extern void yyerror(char *msg);
void yyerror(const char* fmt, ...);
int syntax_error = 0;
char filename[100];
%}
%union {
int int_val;
float float_val;
char *str_val;
struct ASTNode *node_val;
}
%type <node_val> CompUnit ConstDecl VarDecl FuncDef ConstDef ConstInitVal VarDef InitVal FuncFParam ConstExpArray Block
%type <node_val> Root BlockItem Stmt LVal PrimaryExp UnaryExp FuncRParams MulExp Exp RelExp EqExp LAndExp LNotExp Cond ConstExp
%type <node_val> ExpArray AddExp LOrExp InitVals
//ForList
%token <str_val> ID
%token <int_val> INT_LIT
%token <float_val> FLOAT_LIT
%token <int_val> INT FLOAT VOID CONST RETURN IF ELSE WHILE BREAK CONTINUE LP RP LB RB LC RC COMMA SEMICOLON
%token <int_val> MINUS NOT ASSIGN PLUS MUL DIV MOD AND OR EQ NE LT LE GT GE LEX_ERR
//FOR INC DEC THEN
%nonassoc THEN
%nonassoc ELSE
%start Root
%%
Root: CompUnit { root = new_node(Root, NULL, NULL, $1, 0, 0, NULL, NonType); };
CompUnit: ConstDecl { $$ = new_node(CompUnit, NULL, NULL, $1, 0, 0, NULL, NonType); }
| VarDecl { $$ = new_node(CompUnit, NULL, NULL, $1, 0, 0, NULL, NonType); }
| FuncDef { $$ = new_node(CompUnit, NULL, NULL, $1, 0, 0, NULL, NonType); }
| ConstDecl CompUnit { $$ = new_node(CompUnit, $2, NULL, $1, 0, 0, NULL, NonType); }
| VarDecl CompUnit { $$ = new_node(CompUnit, $2, NULL, $1, 0, 0, NULL, NonType); }
| FuncDef CompUnit { $$ = new_node(CompUnit, $2, NULL, $1, 0, 0, NULL, NonType); };
ConstDecl: CONST INT ConstDef SEMICOLON { $$ = new_node(ConstDecl, NULL, NULL, $3, 0, 0, NULL, Int); }
| CONST FLOAT ConstDef SEMICOLON { $$ = new_node(ConstDecl, NULL, NULL, $3, 0, 0, NULL, Float); };
ConstDef: ID ConstExpArray ASSIGN ConstInitVal { $$ = new_node(ConstDef, NULL, $2, $4, 0, 0, $1, NonType); }
| ID ConstExpArray ASSIGN ConstInitVal COMMA ConstDef { $$ = new_node(ConstDef, $6, $2, $4, 0, 0, $1, NonType); };
ConstExpArray: { $$ = NULL; }
| LB ConstExp RB ConstExpArray { $$ = new_node(ConstExpArray, $4, NULL, $2, 0, 0, NULL, NonType); };
ConstInitVal: ConstExp { $$ = new_node(ConstInitVal, NULL, NULL, $1, 0, 0, NULL, NonType); }
| LC RC { $$ = new_node(ConstInitVal, NULL, NULL, NULL, 0, 0, NULL, NonType); }
| LC ConstInitVal RC { $$ = new_node(ConstInitVal, NULL, NULL, $2, 0, 0, NULL, NonType); }
| LC ConstInitVal COMMA ConstInitVal RC { $$ = new_node(ConstInitVal, $4, NULL, $2, 0, 0, NULL, NonType); };
ConstExp: MulExp { $$ = new_node(ConstExp, NULL, NULL, $1, 0, 0, NULL, NonType); }
| MulExp PLUS Exp { $$ = new_node(ConstExp, $3, NULL, $1, PLUS, 0, NULL, NonType); }
| MulExp MINUS Exp { $$ = new_node(ConstExp, $3, NULL, $1, MINUS, 0, NULL, NonType); };
VarDecl: INT VarDef SEMICOLON { $$ = new_node(VarDecl, NULL, NULL, $2, 0, 0, NULL, Int); }
| FLOAT VarDef SEMICOLON { $$ = new_node(VarDecl, NULL, NULL, $2, 0, 0, NULL, Float); };
VarDef: ID ConstExpArray { $$ = new_node(VarDef, NULL, $2, NULL, 0, 0, $1, NonType); }
| ID ConstExpArray ASSIGN InitVal { $$ = new_node(VarDef, NULL, $2, $4, 0, 0, $1, NonType); }
| ID ConstExpArray COMMA VarDef { $$ = new_node(VarDef, $4, $2, NULL, 0, 0, $1, NonType); }
| ID ConstExpArray ASSIGN InitVal COMMA VarDef { $$ = new_node(VarDef, $6, $2, $4, 0, 0, $1, NonType); };
InitVal: Exp { $$ = new_node(InitVal, NULL, NULL, $1, Exp, 0, NULL, NonType); }
| LC RC { $$ = new_node(InitVal, NULL, NULL, NULL, InitVals, 0, NULL, NonType); }
| LC InitVals RC { $$ = new_node(InitVal, NULL, NULL, $2, InitVals, 0, NULL, NonType); };
InitVals: InitVal { $$ = new_node(InitVals, NULL, NULL, $1, 0, 0, NULL, NonType); }
| InitVal COMMA InitVals { $$ = new_node(InitVals, $3, NULL, $1, 0, 0, NULL, NonType); };
FuncDef: INT ID LP RP Block { $$ = new_node(FuncDef, NULL, NULL, $5, 0, 0, $2, Int); }
| FLOAT ID LP RP Block { $$ = new_node(FuncDef, NULL, NULL, $5, 0, 0, $2, Float); }
| VOID ID LP RP Block { $$ = new_node(FuncDef, NULL, NULL, $5, 0, 0, $2, Void); }
| INT ID LP FuncFParam RP Block { $$ = new_node(FuncDef, NULL, $4, $6, 0, 0, $2, Int); }
| FLOAT ID LP FuncFParam RP Block { $$ = new_node(FuncDef, NULL, $4, $6, 0, 0, $2, Float); }
| VOID ID LP FuncFParam RP Block { $$ = new_node(FuncDef, NULL, $4, $6, 0, 0, $2, Void); };;
FuncFParam: INT ID { $$ = new_node(FuncFParam, NULL, NULL, NULL, 0, 0, $2, Int); }
| FLOAT ID { $$ = new_node(FuncFParam, NULL, NULL, NULL, 0, 0, $2, Float); }
| INT ID LB RB ExpArray { $$ = new_node(FuncFParam, NULL, NULL, $5, 0, 0, $2, Int); }
| FLOAT ID LB RB ExpArray { $$ = new_node(FuncFParam, NULL, NULL, $5, 0, 0, $2, Float); }
| INT ID COMMA FuncFParam { $$ = new_node(FuncFParam, $4, NULL, NULL, 0, 0, $2, Int); }
| FLOAT ID COMMA FuncFParam { $$ = new_node(FuncFParam, $4, NULL, NULL, 0, 0, $2, Float); }
| INT ID LB RB ExpArray COMMA FuncFParam { $$ = new_node(FuncFParam, $7, NULL, $5, 0, 0, $2, Int); }
| FLOAT ID LB RB ExpArray COMMA FuncFParam { $$ = new_node(FuncFParam, $7, NULL, $5, 0, 0, $2, Float); };
Block: LC BlockItem RC { $$ = new_node(Block, NULL, NULL, $2, 0, 0, NULL, NonType); };
BlockItem: { $$ = NULL; }
| ConstDecl BlockItem { $$ = new_node(BlockItem, $2, NULL, $1, 0, 0, NULL, NonType); }
| VarDecl BlockItem { $$ = new_node(BlockItem, $2, NULL, $1, 0, 0, NULL, NonType); }
| Stmt BlockItem { $$ = new_node(BlockItem, $2, NULL, $1, 0, 0, NULL, NonType); };
Stmt: LVal ASSIGN Exp SEMICOLON{$$ = new_node(Stmt,$1,NULL,$3,0,0,NULL,NonType);}
| Exp SEMICOLON {$$ = new_node(Stmt,NULL,NULL,$1,0,0,NULL,NonType);}
| SEMICOLON {$$ = NULL;}
| Block{$$ = new_node(Stmt,NULL,NULL,$1,0,0,NULL,NonType);}
| IF LP Cond RP Stmt {$$ = new_node(Stmt,$3,NULL,$5,0,0,NULL,NonType);}
| IF LP Cond RP Stmt ELSE Stmt {$$ = new_node(Stmt,$3,$7,$5,0,0,NULL,NonType);}
| WHILE LP Cond RP Stmt {$$ = new_node(Stmt,$3,NULL,$5,0,0,NULL,NonType);}
| BREAK SEMICOLON {$$ = NULL;}
| CONTINUE SEMICOLON {$$ = NULL;}
| RETURN SEMICOLON {$$ = NULL;}
| RETURN Exp SEMICOLON {$$ = new_node(Stmt,NULL,NULL,$2,0,0,NULL,NonType);}
;
Exp: AddExp { $$ = new_node(Exp, NULL, NULL, $1, 0, 0, NULL, NonType); };
AddExp: MulExp { $$ = new_node(AddExp, NULL, NULL, $1, MUL, 0, NULL, NonType); }
| MulExp PLUS AddExp { $$ = new_node(AddExp, $3, NULL, $1, PLUS, 0, NULL, NonType); }
| MulExp MINUS AddExp { $$ = new_node(AddExp, $3, NULL, $1, MINUS, 0, NULL, NonType); };
MulExp: UnaryExp { $$ = new_node(MulExp, NULL, NULL, $1, UnaryExp, 0, NULL, NonType); }
| UnaryExp MUL MulExp { $$ = new_node(MulExp, $3, NULL, $1, MUL, 0, NULL, NonType); }
| UnaryExp DIV MulExp { $$ = new_node(MulExp, $3, NULL, $1, DIV, 0, NULL, NonType); }
| UnaryExp MOD MulExp { $$ = new_node(MulExp, $3, NULL, $1, MOD, 0, NULL, NonType); };
UnaryExp: PrimaryExp { $$ = new_node(UnaryExp, NULL, NULL, $1, PrimaryExp, 0, NULL, NonType); }
| ID LP RP { $$ = new_node(UnaryExp, NULL, NULL, NULL, FuncRParams, 0, $1, NonType); }
| ID LP FuncRParams RP { $$ = new_node(UnaryExp, NULL, NULL, $3, FuncRParams, 0, $1, NonType); }
| PLUS UnaryExp { $$ = new_node(UnaryExp, NULL, NULL, $2, Plus, 0, NULL, NonType); }
| MINUS UnaryExp { $$ = new_node(UnaryExp, NULL, NULL, $2, Minus, 0, NULL, NonType); }
| NOT UnaryExp { $$ = new_node(UnaryExp, NULL, NULL, $2, NOT, 0, NULL, NonType); };
FuncRParams: Exp { $$ = new_node(FuncRParams, NULL, NULL, $1, 0, 0, NULL, NonType); }
| Exp COMMA FuncRParams { $$ = new_node(FuncRParams, $3, NULL, $1, 0, 0, NULL, NonType); };
PrimaryExp: LP Exp RP { $$ = new_node(PrimaryExp, NULL, NULL, $2, Exp, 0, NULL, NonType); }
| LVal { $$ = new_node(PrimaryExp, NULL, NULL, $1, LVal, 0, NULL, NonType); }
| INT_LIT { $$ = new_node(PrimaryExp, NULL, NULL, NULL, $1, 0, NULL, Int); }
| FLOAT_LIT { $$ = new_node(PrimaryExp, NULL, NULL, NULL, 0, $1, NULL, Float); };
LVal: ID ExpArray { $$ = new_node(LVal, NULL, NULL, $2, 0, 0, $1, NonType); };
Cond: LOrExp { $$ = new_node(Cond, NULL, NULL, $1, 0, 0, NULL, NonType); };
LOrExp: LAndExp { $$ = new_node(Cond, NULL, NULL, $1, 0, 0, NULL, NonType); }
| LAndExp OR LOrExp { $$ = new_node(Cond, $3, NULL, $1, OR, 0, 0, NonType); }
| LNotExp{ $$ = new_node(Cond, NULL, NULL, $1, 0, 0, NULL, NonType); };
LAndExp: EqExp { $$ = new_node(LAndExp, NULL, NULL, $1, 0, 0, NULL, NonType); }
| EqExp AND LAndExp { $$ = new_node(LAndExp, $3, NULL, $1, AND, 0, NULL, NonType); };
LNotExp: NOT LP EqExp RP { $$ = new_node(LNotExp, NULL, NULL, $3, 0, 0, NULL, NonType);};
EqExp: RelExp { $$ = new_node(EqExp, NULL, NULL, $1, 0, 0, NULL, NonType);}
| RelExp EQ EqExp { $$ = new_node(EqExp, $3, NULL, $1, EQ, 0, NULL, NonType); }
| RelExp NE EqExp { $$ = new_node(EqExp, $3, NULL, $1, NE, 0, NULL, NonType); };
RelExp: AddExp { $$ = new_node(RelExp, NULL, NULL, $1, 0, 0, NULL, NonType); }
| AddExp LT RelExp { $$ = new_node(RelExp, $3, NULL, $1, LT, 0, NULL, NonType); }
| AddExp GT RelExp { $$ = new_node(RelExp, $3, NULL, $1, GT, 0, NULL, NonType);}
| AddExp LE RelExp { $$ = new_node(RelExp, $3, NULL, $1, LE, 0, NULL, NonType); }
| AddExp GE RelExp { $$ = new_node(RelExp, $3, NULL, $1, GE, 0, NULL, NonType); };
ExpArray: { $$ = NULL; }
| LB Exp RB ExpArray { $$ = new_node(ExpArray, $4, NULL, $2, 0, 0, NULL, NonType); };
%%
int main(int argc, char *argv[]) {
int index = strlen(argv[1]) - 1;
while(index > 0 && argv[1][index - 1] != '/')
index--;
strcpy(filename, argv[1] + index);
freopen(argv[1], "r", stdin);
yyparse();
if (syntax_error == 0)
display(root);
return 0;
}
/*
void yyerror(char *msg) {
printf("%s:%d\n", name, yylineno);
printf("error text: %s\n", yytext);
exit(-1);
}
*/
#include<stdarg.h>
void yyerror(const char* fmt, ...)
{
syntax_error = 1;
va_list ap;
va_start(ap, fmt);
fprintf(stderr, "%s:%d ", filename, yylineno);
vfprintf(stderr, fmt, ap);
fprintf(stderr, ".\n");
}

@ -0,0 +1,8 @@
cmake_minimum_required(VERSION 3.21)
set(SOURCE_FILES "riscv.cpp" instruction.cpp optimize.cpp backend.cpp regalloc.cpp)
add_library(riscv STATIC ${SOURCE_FILES})
target_link_libraries(riscv PRIVATE ir)
target_include_directories(riscv PRIVATE ${CMAKE_SOURCE_DIR}/src/ir)

@ -0,0 +1,970 @@
#include "backend.h"
#include <cassert>
void RiscvBuilder::initializeRegisterFile() {
// 分配寄存器堆,初始化寄存器堆各项参数
// assert(false);
}
// 进行标号
// 未知指令FNeg
// 注意IR中因为没有addi和add和浮点的区别该步操作由build操作进行修正
const std::map<Instruction::OpID, RiscvInstr::InstrType> toRiscvOp = {
{Instruction::OpID::Add, RiscvInstr::InstrType::ADD},
{Instruction::OpID::Sub, RiscvInstr::InstrType::SUB},
{Instruction::OpID::Mul, RiscvInstr::InstrType::MUL},
{Instruction::OpID::SDiv, RiscvInstr::InstrType::DIV},
{Instruction::OpID::SRem, RiscvInstr::InstrType::REM},
{Instruction::OpID::FAdd, RiscvInstr::InstrType::FADD},
{Instruction::OpID::FSub, RiscvInstr::InstrType::FSUB},
{Instruction::OpID::FMul, RiscvInstr::InstrType::FMUL},
{Instruction::OpID::FDiv, RiscvInstr::InstrType::FDIV},
{Instruction::OpID::Ret, RiscvInstr::InstrType::RET},
{Instruction::OpID::ICmp, RiscvInstr::InstrType::ICMP},
{Instruction::OpID::FCmp, RiscvInstr::InstrType::FCMP},
{Instruction::OpID::Call, RiscvInstr::InstrType::CALL},
{Instruction::OpID::SItoFP, RiscvInstr::InstrType::SITOFP},
{Instruction::OpID::FPtoSI, RiscvInstr::InstrType::FPTOSI},
{Instruction::OpID::Or, RiscvInstr::InstrType::OR},
{Instruction::OpID::And, RiscvInstr::InstrType::AND},
{Instruction::OpID::Shl, RiscvInstr::InstrType::SHL},
{Instruction::OpID::LShr, RiscvInstr::InstrType::LSHR},
{Instruction::OpID::AShr, RiscvInstr::InstrType::ASHR},
{Instruction::OpID::Load, RiscvInstr::InstrType::LW},
{Instruction::OpID::Store, RiscvInstr::InstrType::SW},
};
int LabelCount = 0;
std::map<BasicBlock *, RiscvBasicBlock *> rbbLabel;
std::map<Function *, RiscvFunction *> functionLabel;
std::string toLabel(int ind) { return ".L" + std::to_string(ind); }
RiscvBasicBlock *createRiscvBasicBlock(BasicBlock *bb) {
if (bb == nullptr) {
LabelCount++;
return new RiscvBasicBlock(toLabel(LabelCount), LabelCount);
}
if (rbbLabel.count(bb))
return rbbLabel[bb];
LabelCount++;
auto cur = new RiscvBasicBlock(toLabel(LabelCount), LabelCount);
return rbbLabel[bb] = cur;
}
RiscvFunction *createRiscvFunction(Function *foo) {
assert(foo != nullptr);
if (functionLabel.count(foo) == 0) {
auto ty = RiscvOperand::Void;
switch (foo->type_->tid_) {
case Type::VoidTyID:
ty = RiscvOperand::Void;
break;
case Type::IntegerTyID:
ty = RiscvOperand::IntReg;
break;
case Type::FloatTyID:
ty = RiscvOperand::FloatReg;
break;
}
RiscvFunction *cur =
new RiscvFunction(foo->name_, foo->arguments_.size(), ty);
return functionLabel[foo] = cur;
}
return functionLabel[foo];
}
BinaryRiscvInst *RiscvBuilder::createBinaryInstr(RegAlloca *regAlloca,
BinaryInst *binaryInstr,
RiscvBasicBlock *rbb) {
auto id = toRiscvOp.at(binaryInstr->op_id_);
// 立即数处理
// If both operands are imm value, caculate the result directly and save to
// binaryInstr value.
if (binaryInstr->operands_[0]->is_constant() &&
binaryInstr->operands_[1]->is_constant() &&
dynamic_cast<ConstantInt *>(binaryInstr->operands_[0]) != nullptr) {
int value[] = {
static_cast<ConstantInt *>(binaryInstr->operands_[0])->value_,
static_cast<ConstantInt *>(binaryInstr->operands_[1])->value_};
int value_result;
switch (binaryInstr->op_id_) {
case Instruction::OpID::Add:
value_result = value[0] + value[1];
break;
case Instruction::OpID::Sub:
value_result = value[0] - value[1];
break;
case Instruction::OpID::Mul:
value_result = value[0] * value[1];
break;
case Instruction::OpID::SDiv:
value_result = value[0] / value[1];
break;
case Instruction::OpID::SRem:
value_result = value[0] % value[1];
break;
default:
std::cerr << "[Fatal Error] Binary instruction immediate caculation not "
"implemented."
<< std::endl;
std::terminate();
}
rbb->addInstrBack(
new MoveRiscvInst(regAlloca->findReg(binaryInstr, rbb, nullptr, 0, 0),
value_result, rbb));
return nullptr;
}
BinaryRiscvInst *instr = new BinaryRiscvInst(
id, regAlloca->findReg(binaryInstr->operands_[0], rbb, nullptr, 1),
regAlloca->findReg(binaryInstr->operands_[1], rbb, nullptr, 1),
regAlloca->findReg(binaryInstr, rbb, nullptr, 1, 0), rbb, true);
return instr;
}
UnaryRiscvInst *RiscvBuilder::createUnaryInstr(RegAlloca *regAlloca,
UnaryInst *unaryInstr,
RiscvBasicBlock *rbb) {
UnaryRiscvInst *instr = new UnaryRiscvInst(
toRiscvOp.at(unaryInstr->op_id_),
regAlloca->findReg(unaryInstr->operands_[0], rbb, nullptr, 1),
regAlloca->findReg(unaryInstr, rbb, nullptr, 1, 0), rbb);
return instr;
}
// IR中的Store对应到RISCV为MOV指令或浮点MOV指令或LI指令或真正的store指令
std::vector<RiscvInstr *> RiscvBuilder::createStoreInstr(RegAlloca *regAlloca,
StoreInst *storeInstr,
RiscvBasicBlock *rbb) {
auto testConstInt = dynamic_cast<ConstantInt *>(storeInstr->operands_[0]);
if (testConstInt != nullptr) {
// 整数部分可以直接li指令
std::vector<RiscvInstr *> ans;
auto regPos = getRegOperand("t0");
ans.push_back(
new MoveRiscvInst(regPos, new RiscvConst(testConstInt->value_), rbb));
// 指针类型找ptr
if (storeInstr->operands_[1]->type_->tid_ == Type::TypeID::PointerTyID)
ans.push_back(new StoreRiscvInst(
storeInstr->operands_[0]->type_, regPos,
regAlloca->findMem(storeInstr->operands_[1], rbb, nullptr, 0), rbb));
else
ans.push_back(new StoreRiscvInst(
storeInstr->operands_[0]->type_, regPos,
regAlloca->findMem(storeInstr->operands_[1], rbb, nullptr, 0), rbb));
return ans;
}
// 真正的store第二操作数为一个指针类型
if (storeInstr->operands_[1]->type_->tid_ == Type::TypeID::PointerTyID) {
auto curType = static_cast<PointerType *>(storeInstr->operands_[1]->type_);
//Alignment check.
if (calcTypeSize(curType->contained_) > 4) {
auto mem =
regAlloca->findMem(storeInstr->operands_[1], rbb, nullptr, false);
if (static_cast<RiscvIntPhiReg *>(mem)->shift_ & 7) {
std::cerr << "[Error] Alignment error." << std::endl;
std::terminate();
}
}
StoreRiscvInst *instr = new StoreRiscvInst(
curType->contained_,
regAlloca->findReg(storeInstr->operands_[0], rbb, nullptr, 1),
regAlloca->findMem(storeInstr->operands_[1], rbb, nullptr, 0), rbb);
return {instr};
}
// 下面为整型或浮点的mov
// 浮点部分需要提前写入内存中然后等效于直接mov
// 先把浮点常数以全局变量形式存入内存中再直接fmv
std::vector<RiscvInstr *> ans;
auto regPos = regAlloca->findReg(storeInstr->operands_[0], rbb, nullptr, 1);
ans.push_back(new MoveRiscvInst(
regAlloca->findReg(storeInstr->operands_[1], rbb), regPos, rbb));
ans.push_back(new StoreRiscvInst(storeInstr->operands_[0]->type_, regPos,
regAlloca->findMem(storeInstr->operands_[0]),
rbb));
return ans;
}
// Load 指令仅一个参数它本身就是一个value
std::vector<RiscvInstr *> RiscvBuilder::createLoadInstr(RegAlloca *regAlloca,
LoadInst *loadInstr,
RiscvBasicBlock *rbb) {
assert(loadInstr->operands_[0]->type_->tid_ == Type::TypeID::PointerTyID);
auto curType = static_cast<PointerType *>(loadInstr->operands_[0]->type_);
if (calcTypeSize(curType->contained_) > 4) {
auto mem = regAlloca->findMem(loadInstr->operands_[0], rbb, nullptr, false);
if (static_cast<RiscvIntPhiReg *>(mem)->shift_ & 7) {
std::cerr << "[Error] Alignment error." << std::endl;
std::terminate();
}
}
std::vector<RiscvInstr *> ans;
auto regPos =
regAlloca->findReg(static_cast<Value *>(loadInstr), rbb, nullptr, 1, 0);
ans.push_back(new LoadRiscvInst(
curType->contained_, regPos,
regAlloca->findMem(loadInstr->operands_[0], rbb, nullptr, false), rbb));
return ans;
}
ICmpRiscvInstr *RiscvBuilder::createICMPInstr(RegAlloca *regAlloca,
ICmpInst *icmpInstr,
BranchInst *brInstr,
RiscvBasicBlock *rbb) {
ICmpRiscvInstr *instr = new ICmpRiscvInstr(
icmpInstr->icmp_op_,
regAlloca->findReg(icmpInstr->operands_[0], rbb, nullptr, 1),
regAlloca->findReg(icmpInstr->operands_[1], rbb, nullptr, 1),
createRiscvBasicBlock(static_cast<BasicBlock *>(brInstr->operands_[1])),
createRiscvBasicBlock(static_cast<BasicBlock *>(brInstr->operands_[2])),
rbb);
return instr;
}
ICmpRiscvInstr *RiscvBuilder::createICMPSInstr(RegAlloca *regAlloca,
ICmpInst *icmpInstr,
RiscvBasicBlock *rbb) {
bool swap = ICmpSRiscvInstr::ICmpOpSName.count(icmpInstr->icmp_op_) == 0;
if (swap) {
std::swap(icmpInstr->operands_[0], icmpInstr->operands_[1]);
icmpInstr->icmp_op_ =
ICmpRiscvInstr::ICmpOpEquiv.find(icmpInstr->icmp_op_)->second;
}
bool inv = false;
switch (icmpInstr->icmp_op_) {
case ICmpInst::ICMP_SGE:
case ICmpInst::ICMP_SLE:
case ICmpInst::ICMP_UGE:
case ICmpInst::ICMP_ULE:
inv = true;
default:
break;
}
ICmpSRiscvInstr *instr = new ICmpSRiscvInstr(
icmpInstr->icmp_op_,
regAlloca->findReg(icmpInstr->operands_[0], rbb, nullptr, 1),
regAlloca->findReg(icmpInstr->operands_[1], rbb, nullptr, 1),
regAlloca->findReg(icmpInstr, rbb, nullptr, 1, 0), rbb);
rbb->addInstrBack(instr);
if (inv) {
auto instr_reg = regAlloca->findReg(icmpInstr, rbb, nullptr, 1, 0);
rbb->addInstrBack(new BinaryRiscvInst(RiscvInstr::XORI, instr_reg,
new RiscvConst(1), instr_reg, rbb));
}
return instr;
}
RiscvInstr *RiscvBuilder::createFCMPInstr(RegAlloca *regAlloca,
FCmpInst *fcmpInstr,
RiscvBasicBlock *rbb) {
// Deal with always true
if (fcmpInstr->fcmp_op_ == fcmpInstr->FCMP_TRUE ||
fcmpInstr->fcmp_op_ == fcmpInstr->FCMP_FALSE) {
auto instr =
new MoveRiscvInst(regAlloca->findReg(fcmpInstr, rbb, nullptr, 1, 0),
fcmpInstr->fcmp_op_ == fcmpInstr->FCMP_TRUE, rbb);
rbb->addInstrBack(instr);
return instr;
}
bool swap = FCmpRiscvInstr::FCmpOpName.count(fcmpInstr->fcmp_op_) == 0;
if (swap) {
std::swap(fcmpInstr->operands_[0], fcmpInstr->operands_[1]);
fcmpInstr->fcmp_op_ =
FCmpRiscvInstr::FCmpOpEquiv.find(fcmpInstr->fcmp_op_)->second;
}
bool inv = false;
bool inv_classify = false;
switch (fcmpInstr->fcmp_op_) {
case FCmpInst::FCMP_ONE:
case FCmpInst::FCMP_UNE:
inv = true;
default:
break;
}
switch (fcmpInstr->fcmp_op_) {
case FCmpInst::FCMP_OEQ:
case FCmpInst::FCMP_OGT:
case FCmpInst::FCMP_OGE:
case FCmpInst::FCMP_OLT:
case FCmpInst::FCMP_OLE:
case FCmpInst::FCMP_ONE:
case FCmpInst::FCMP_ORD:
inv_classify = true;
default:
break;
}
if (inv_classify) {
std::cerr << "[Warning] Not implemented FCLASS yet.\n";
}
FCmpRiscvInstr *instr = new FCmpRiscvInstr(
fcmpInstr->fcmp_op_,
regAlloca->findReg(fcmpInstr->operands_[0], rbb, nullptr, 1),
regAlloca->findReg(fcmpInstr->operands_[1], rbb, nullptr, 1),
regAlloca->findReg(fcmpInstr, rbb, nullptr, 1, 0), rbb);
rbb->addInstrBack(instr);
if (inv) {
auto instr_reg = regAlloca->findReg(fcmpInstr, rbb, nullptr, 1, 0);
rbb->addInstrBack(new BinaryRiscvInst(RiscvInstr::XORI, instr_reg,
new RiscvConst(1), instr_reg, rbb));
return instr;
}
return instr;
}
CallRiscvInst *RiscvBuilder::createCallInstr(RegAlloca *regAlloca,
CallInst *callInstr,
RiscvBasicBlock *rbb) {
// push 指令需要寄存器
int argnum = callInstr->operands_.size() - 1;
// 涉及从Function 到RISCV function转换问题第一个参数
CallRiscvInst *instr =
new CallRiscvInst(createRiscvFunction(static_cast<Function *>(
callInstr->operands_[argnum])),
rbb);
return instr;
}
// 注意return语句本身并不负责返回值的传递该语句由storeRet函数实现
ReturnRiscvInst *RiscvBuilder::createRetInstr(RegAlloca *regAlloca,
ReturnInst *returnInstr,
RiscvBasicBlock *rbb,
RiscvFunction *rfoo) {
RiscvOperand *reg_to_save = nullptr;
// If ret i32 %4
if (returnInstr->num_ops_ > 0) {
// 写返回值到 a0/fa0 中
auto operand = returnInstr->operands_[0];
if (operand->type_->tid_ == Type::TypeID::IntegerTyID)
reg_to_save = regAlloca->findSpecificReg(operand, "a0", rbb);
else if (operand->type_->tid_ == Type::TypeID::FloatTyID)
reg_to_save = regAlloca->findSpecificReg(operand, "fa0", rbb);
// auto instr = regAlloca->writeback(reg_to_save, rbb);
rbb->addInstrBack(new MoveRiscvInst(
reg_to_save, regAlloca->findReg(operand, rbb, nullptr), rbb));
}
return new ReturnRiscvInst(rbb);
}
BranchRiscvInstr *RiscvBuilder::createBrInstr(RegAlloca *regAlloca,
BranchInst *brInstr,
RiscvBasicBlock *rbb) {
BranchRiscvInstr *instr;
if (brInstr->num_ops_ == 1) {
instr = new BranchRiscvInstr(
nullptr, nullptr,
createRiscvBasicBlock(static_cast<BasicBlock *>(brInstr->operands_[0])),
rbb);
} else {
instr = new BranchRiscvInstr(
regAlloca->findReg(brInstr->operands_[0], rbb, nullptr, 1),
createRiscvBasicBlock(static_cast<BasicBlock *>(brInstr->operands_[1])),
createRiscvBasicBlock(static_cast<BasicBlock *>(brInstr->operands_[2])),
rbb);
}
return instr;
}
SiToFpRiscvInstr *RiscvBuilder::createSiToFpInstr(RegAlloca *regAlloca,
SiToFpInst *sitofpInstr,
RiscvBasicBlock *rbb) {
return new SiToFpRiscvInstr(
regAlloca->findReg(sitofpInstr->operands_[0], rbb, nullptr, 1),
regAlloca->findReg(static_cast<Value *>(sitofpInstr), rbb, nullptr, 1, 0),
rbb);
}
FpToSiRiscvInstr *RiscvBuilder::createFptoSiInstr(RegAlloca *regAlloca,
FpToSiInst *fptosiInstr,
RiscvBasicBlock *rbb) {
return new FpToSiRiscvInstr(
regAlloca->findReg(fptosiInstr->operands_[0], rbb, nullptr, 1),
regAlloca->findReg(static_cast<Value *>(fptosiInstr), rbb, nullptr, 1, 0),
rbb);
}
// 固定采用x30作为偏移量x31作为乘法的LI指令地址
RiscvInstr *RiscvBuilder::solveGetElementPtr(RegAlloca *regAlloca,
GetElementPtrInst *instr,
RiscvBasicBlock *rbb) {
Value *op0 = instr->get_operand(0);
RiscvOperand *dest = getRegOperand("t2");
bool isConst = 1; // 能否用确定的形如 -12(sp)访问
int finalOffset = 0;
if (dynamic_cast<GlobalVariable *>(op0) != nullptr) {
// 全局变量使用la指令取基础地址
isConst = 0;
rbb->addInstrBack(new LoadAddressRiscvInstr(dest, op0->name_, rbb));
} else if (auto oi = dynamic_cast<Instruction *>(op0)) {
// 获取指针指向的地址
int varOffset = 0;
rbb->addInstrBack(new MoveRiscvInst(
dest, regAlloca->findReg(op0, rbb, nullptr, 1, 1), rbb));
finalOffset += varOffset;
}
int curTypeSize = 0;
unsigned int num_operands = instr->num_ops_;
int indexVal, totalOffset = 0;
Type *cur_type =
static_cast<PointerType *>(instr->get_operand(0)->type_)->contained_;
for (unsigned int i = 1; i <= num_operands - 1; i++) {
if (i > 1)
cur_type = static_cast<ArrayType *>(cur_type)->contained_;
Value *opi = instr->get_operand(i);
curTypeSize = calcTypeSize(cur_type);
if (auto ci = dynamic_cast<ConstantInt *>(opi)) {
indexVal = ci->value_;
totalOffset += indexVal * curTypeSize;
} else {
// 存在变量参与偏移量计算
isConst = 0;
// 考虑目标数是int还是float
RiscvOperand *mulTempReg = getRegOperand("t3");
rbb->addInstrBack(new MoveRiscvInst(mulTempReg, curTypeSize, rbb));
rbb->addInstrBack(new BinaryRiscvInst(
RiscvInstr::InstrType::MUL, regAlloca->findReg(opi, rbb, nullptr, 1),
mulTempReg, mulTempReg, rbb));
rbb->addInstrBack(new BinaryRiscvInst(RiscvInstr::InstrType::ADD,
mulTempReg, dest, dest, rbb));
}
}
// if (totalOffset > 0)
rbb->addInstrBack(new BinaryRiscvInst(RiscvInstr::InstrType::ADDI, dest,
new RiscvConst(totalOffset), dest,
rbb));
rbb->addInstrBack(
new StoreRiscvInst(instr->type_, dest, regAlloca->findMem(instr), rbb));
return nullptr;
}
void RiscvBuilder::initRetInstr(RegAlloca *regAlloca, RiscvInstr *returnInstr,
RiscvBasicBlock *rbb, RiscvFunction *foo) {
// 将被保护的寄存器还原
// ! FP 必须被最后还原。
int curSP = foo->querySP();
auto reg_to_recover = regAlloca->savedRegister;
auto reg_used = regAlloca->getUsedReg();
reverse(reg_to_recover.begin(), reg_to_recover.end());
for (auto reg : reg_to_recover)
if (reg_used.find(reg) != reg_used.end()) {
if (reg->getType() == reg->IntReg)
rbb->addInstrBefore(new LoadRiscvInst(new Type(Type::PointerTyID), reg,
new RiscvIntPhiReg("fp", curSP),
rbb),
returnInstr);
else
rbb->addInstrBefore(new LoadRiscvInst(new Type(Type::FloatTyID), reg,
new RiscvIntPhiReg("fp", curSP),
rbb),
returnInstr);
curSP += VARIABLE_ALIGN_BYTE;
}
// 还原 fp
rbb->addInstrBefore(new LoadRiscvInst(new Type(Type::PointerTyID),
getRegOperand("fp"),
new RiscvIntPhiReg("fp", curSP), rbb),
returnInstr);
// 释放栈帧
rbb->addInstrBefore(new BinaryRiscvInst(RiscvInstr::ADDI, getRegOperand("sp"),
new RiscvConst(-foo->querySP()),
getRegOperand("sp"), rbb),
returnInstr);
}
RiscvBasicBlock *RiscvBuilder::transferRiscvBasicBlock(BasicBlock *bb,
RiscvFunction *foo) {
int translationCount = 0;
RiscvBasicBlock *rbb = createRiscvBasicBlock(bb);
Instruction *forward = nullptr; // 前置指令用于icmp、fcmp和branch指令合并
bool brFound = false;
for (Instruction *instr : bb->instr_list_) {
switch (instr->op_id_) {
case Instruction::Ret:
// Before leaving basic block writeback all registers
foo->regAlloca->writeback_all(rbb);
brFound = true;
// 在翻译过程中先指ret恢复寄存器等操作在第二遍扫描的时候再插入
rbb->addInstrBack(this->createRetInstr(
foo->regAlloca, static_cast<ReturnInst *>(instr), rbb, foo));
break;
// 分支指令
case Instruction::Br:
// Before leaving basic block writeback all registers
foo->regAlloca->writeback_all(rbb);
brFound = true;
rbb->addInstrBack(this->createBrInstr(
foo->regAlloca, static_cast<BranchInst *>(instr), rbb));
break;
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
case Instruction::SDiv:
case Instruction::SRem:
case Instruction::UDiv:
case Instruction::URem:
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
case Instruction::FDiv:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
rbb->addInstrBack(this->createBinaryInstr(
foo->regAlloca, static_cast<BinaryInst *>(instr), rbb));
// foo->regAlloca->writeback(static_cast<Value *>(instr), rbb);
break;
case Instruction::FNeg:
rbb->addInstrBack(this->createUnaryInstr(
foo->regAlloca, static_cast<UnaryInst *>(instr), rbb));
// foo->regAlloca->writeback(static_cast<Value *>(instr), rbb);
break;
case Instruction::PHI:
break;
// 直接删除的指令
case Instruction::BitCast:
break;
case Instruction::ZExt:
// 等价一条合流语句操作
break;
case Instruction::Alloca:
break;
case Instruction::GetElementPtr: {
this->solveGetElementPtr(foo->regAlloca,
static_cast<GetElementPtrInst *>(instr), rbb);
// Writeback inside solveGetElementPtr().
break;
}
case Instruction::FPtoSI:
rbb->addInstrBack(this->createFptoSiInstr(
foo->regAlloca, static_cast<FpToSiInst *>(instr), rbb));
// foo->regAlloca->writeback(static_cast<Value *>(instr), rbb);
break;
case Instruction::SItoFP:
rbb->addInstrBack(this->createSiToFpInstr(
foo->regAlloca, static_cast<SiToFpInst *>(instr), rbb));
// foo->regAlloca->writeback(static_cast<Value *>(instr), rbb);
break;
case Instruction::Load: {
auto instrSet = this->createLoadInstr(
foo->regAlloca, static_cast<LoadInst *>(instr), rbb);
for (auto x : instrSet)
rbb->addInstrBack(x);
// foo->regAlloca->writeback(static_cast<Value *>(instr), rbb);
break;
}
case Instruction::Store: {
auto instrSet = this->createStoreInstr(
foo->regAlloca, static_cast<StoreInst *>(instr), rbb);
for (auto *x : instrSet)
rbb->addInstrBack(x);
// Store Instruction returns void value.
break;
}
case Instruction::ICmp:
createICMPSInstr(foo->regAlloca, static_cast<ICmpInst *>(instr), rbb);
// foo->regAlloca->writeback(static_cast<Value *>(instr), rbb);
break;
case Instruction::FCmp:
createFCMPInstr(foo->regAlloca, static_cast<FCmpInst *>(instr), rbb);
// foo->regAlloca->writeback(static_cast<Value *>(instr), rbb);
break;
case Instruction::Call: {
// 注意:该部分并未单独考虑系统函数!
// 注意区分float还是int调用是看寄存器分配部分实现
// 说明call函数部分本身需要进行栈寄存器调整调整到0栈帧供新函数使用
// 除此之外不在任何地方调整sp的值
// 在call语句结束之后要手动恢复回原来的栈帧
CallInst *curInstr = static_cast<CallInst *>(instr);
RiscvFunction *calleeFoo = createRiscvFunction(
static_cast<Function *>(curInstr->operands_.back()));
// 根据函数调用约定,按需传递参数。
int sp_shift_for_paras = 0;
int sp_shift_alignment_padding = 0; // Align sp pointer to 16-byte
int paraShift = 0;
int intRegCount = 0, floatRegCount = 0;
// 计算存储参数需要的额外栈帧大小
for (int i = 0; i < curInstr->operands_.size() - 1; i++) {
sp_shift_for_paras += VARIABLE_ALIGN_BYTE;
}
sp_shift_alignment_padding =
16 - ((abs(foo->querySP()) + sp_shift_for_paras) & 15);
sp_shift_for_paras += sp_shift_alignment_padding;
// 为参数申请栈帧
rbb->addInstrBack(new BinaryRiscvInst(
BinaryRiscvInst::ADDI, getRegOperand("sp"),
new RiscvConst(-sp_shift_for_paras), getRegOperand("sp"), rbb));
// 将参数移动至寄存器与内存中
for (int i = 0; i < curInstr->operands_.size() - 1; i++) {
std::string name = "";
auto operand = curInstr->operands_[i];
if (operand->type_->tid_ != Type::FloatTyID) {
if (intRegCount < 8)
name = "a" + std::to_string(intRegCount);
intRegCount++;
} else if (operand->type_->tid_ == Type::FloatTyID) {
if (floatRegCount < 8)
name = "fa" + std::to_string(floatRegCount);
floatRegCount++;
}
// 将额外的参数直接写入内存中
if (name.empty()) {
if (operand->type_->tid_ != Type::FloatTyID) {
rbb->addInstrBack(new StoreRiscvInst(
operand->type_,
foo->regAlloca->findSpecificReg(operand, "t1", rbb),
new RiscvIntPhiReg("sp", paraShift), rbb));
} else {
rbb->addInstrBack(new StoreRiscvInst(
operand->type_,
foo->regAlloca->findSpecificReg(operand, "ft1", rbb),
new RiscvIntPhiReg("sp", paraShift), rbb));
}
} else {
foo->regAlloca->findSpecificReg(operand, name, rbb, nullptr);
}
paraShift += VARIABLE_ALIGN_BYTE; // Add operand size lastly
}
// Call the function.
rbb->addInstrBack(this->createCallInstr(foo->regAlloca, curInstr, rbb));
// 为参数释放栈帧
rbb->addInstrBack(new BinaryRiscvInst(
BinaryRiscvInst::ADDI, getRegOperand("sp"),
new RiscvConst(sp_shift_for_paras), getRegOperand("sp"), rbb));
// At last, save return value (a0) to target value.
if (curInstr->type_->tid_ != curInstr->type_->VoidTyID) {
if (curInstr->type_->tid_ != curInstr->type_->FloatTyID) {
rbb->addInstrBack(
new StoreRiscvInst(new IntegerType(32), getRegOperand("a0"),
foo->regAlloca->findMem(curInstr), rbb));
} else {
rbb->addInstrBack(new StoreRiscvInst(
new Type(Type::FloatTyID), getRegOperand("fa0"),
foo->regAlloca->findMem(curInstr), rbb));
}
}
break;
}
}
// std::cout << "FINISH TRANSFER " << ++translationCount << "Codes\n";
}
// If br instruction is not found, then leave basic block at the block's end.
if (!brFound) {
foo->regAlloca->writeback_all(rbb);
}
return rbb;
}
// 总控程序
std::string RiscvBuilder::buildRISCV(Module *m) {
this->rm = new RiscvModule();
std::string data = ".align 2\n.section .data\n"; // Add align attribute
// 全局变量
if (m->global_list_.size()) {
for (GlobalVariable *gb : m->global_list_) {
auto curType = static_cast<PointerType *>(gb->type_)->contained_;
RiscvGlobalVariable *curGB = nullptr;
Type *containedType = nullptr;
switch (curType->tid_) {
case Type::PointerTyID:
assert(false);
break;
case Type::ArrayTyID:
containedType = curType;
while (1) {
if (containedType->tid_ == Type::TypeID::ArrayTyID)
containedType = static_cast<ArrayType *>(containedType)->contained_;
else
break;
}
if (containedType->tid_ == Type::IntegerTyID) {
curGB = new RiscvGlobalVariable(RiscvOperand::IntImm, gb->name_,
gb->is_const_, gb->init_val_,
calcTypeSize(curType) / 4);
rm->addGlobalVariable(curGB);
data += curGB->print();
} else if (containedType->tid_ == Type::FloatTyID) {
curGB = new RiscvGlobalVariable(RiscvOperand::FloatImm, gb->name_,
gb->is_const_, gb->init_val_,
calcTypeSize(curType) / 4);
rm->addGlobalVariable(curGB);
data += curGB->print();
}
break;
case Type::TypeID::IntegerTyID: {
auto curGB =
new RiscvGlobalVariable(RiscvOperand::OpTy::IntImm, gb->name_,
gb->is_const_, gb->init_val_);
assert(curGB != nullptr);
rm->addGlobalVariable(curGB);
data += curGB->print();
break;
}
case Type::TypeID::FloatTyID: {
auto curGB =
new RiscvGlobalVariable(RiscvOperand::OpTy::FloatImm, gb->name_,
gb->is_const_, gb->init_val_);
rm->addGlobalVariable(curGB);
data += curGB->print();
break;
}
}
}
}
// 浮点常量进入内存
int ConstFloatCount = 0;
std::string code = ".section .text\n";
// 函数体
// 预处理首先合并所有的合流语句操作然后在分配单元storeOnStack部分使用DSU合并
for (Function *foo : m->function_list_) {
auto rfoo = createRiscvFunction(foo);
rm->addFunction(rfoo);
if (rfoo->is_libfunc()) {
auto *libFunc = createSyslibFunc(foo);
if (libFunc != nullptr)
code += libFunc->print();
continue;
}
for (BasicBlock *bb : foo->basic_blocks_)
for (Instruction *instr : bb->instr_list_)
if (instr->op_id_ == Instruction::OpID::PHI) {
for (auto *operand : instr->operands_)
rfoo->regAlloca->DSU_for_Variable.merge(
operand, static_cast<Value *>(instr));
} else if (instr->op_id_ == Instruction::OpID::ZExt) {
rfoo->regAlloca->DSU_for_Variable.merge(instr->operands_[0],
static_cast<Value *>(instr));
} else if (instr->op_id_ == Instruction::OpID::BitCast) {
// std::cerr << "[Debug] [DSU] Bitcast Instruction: Merge value "
// << static_cast<Value *>(instr)->print() << " to "
// << instr->operands_[0]->print() << " ." << std::endl;
rfoo->regAlloca->DSU_for_Variable.merge(static_cast<Value *>(instr),
instr->operands_[0]);
}
// 将该函数内的浮点常量全部处理出来并告知寄存器分配单元
for (BasicBlock *bb : foo->basic_blocks_)
for (Instruction *instr : bb->instr_list_)
for (auto *Operand : instr->operands_)
// 找到浮点常数,存入内存,写入全局变量区
if (dynamic_cast<ConstantFloat *>(Operand) != nullptr) {
std::string curFloatName =
"FloatConst" + std::to_string(ConstFloatCount);
ConstFloatCount++;
std::string valString =
dynamic_cast<ConstantFloat *>(Operand)->print32();
while (valString.length() < 10)
valString += "0";
data +=
curFloatName + ":\n\t.word\t" + valString.substr(0, 10) + "\n";
rfoo->regAlloca->setPosition(Operand,
new RiscvFloatPhiReg(curFloatName, 0));
}
// 首先检查所有的alloca指令加入一个基本块进行寄存器保护以及栈空间分配
RiscvBasicBlock *initBlock = createRiscvBasicBlock();
std::map<Value *, int> haveAllocated;
int IntParaCount = 0, FloatParaCount = 0;
int sp_shift_for_paras = 0;
int paraShift = 0;
rfoo->setSP(0); // set sp to 0 initially.
// Lambda function to write value to stack frame.
auto storeOnStack = [&](Value **val) {
if (val == nullptr)
return;
assert(*val != nullptr);
if (haveAllocated.count(*val))
return;
// 几种特殊类型,不需要分栈空间
if (dynamic_cast<Function *>(*val) != nullptr)
return;
if (dynamic_cast<BasicBlock *>(*val) != nullptr)
return;
// 注意:函数参数不用分配,而是直接指定!
// 这里设定是v开头的是局部变量arg开头的是函数寄存器变量
// 无名常量
if ((*val)->name_.empty())
return;
// 全局变量不用给他保存栈上地址,它本身就有对应的内存地址,直接忽略
if (dynamic_cast<GlobalVariable *>(*val) != nullptr) {
auto curType = (*val)->type_;
while (1) {
if (curType->tid_ == Type::TypeID::ArrayTyID)
curType = static_cast<ArrayType *>(curType)->contained_;
else if (curType->tid_ == Type::TypeID::PointerTyID)
curType = static_cast<PointerType *>(curType)->contained_;
else
break;
}
if (curType->tid_ != Type::TypeID::FloatTyID)
rfoo->regAlloca->setPosition(*val,
new RiscvIntPhiReg((*val)->name_, 0, 1));
else
rfoo->regAlloca->setPosition(
*val, new RiscvFloatPhiReg((*val)->name_, 0, 1));
return;
}
// 除了全局变量之外的参数
if (dynamic_cast<Argument *>(*val) != nullptr) {
// 不用额外分配空间
// 整型参数
if ((*val)->type_->tid_ == Type::TypeID::IntegerTyID ||
(*val)->type_->tid_ == Type::TypeID::PointerTyID) {
// Pointer type's size is set to 8 byte.
if (IntParaCount < 8)
rfoo->regAlloca->setPositionReg(
*val, getRegOperand("a" + std::to_string(IntParaCount)));
rfoo->regAlloca->setPosition(
*val, new RiscvIntPhiReg(NamefindReg("fp"), paraShift));
IntParaCount++;
}
// 浮点参数
else {
assert((*val)->type_->tid_ == Type::TypeID::FloatTyID);
// 寄存器有
if (FloatParaCount < 8) {
rfoo->regAlloca->setPositionReg(
*val, getRegOperand("fa" + std::to_string(FloatParaCount)));
}
rfoo->regAlloca->setPosition(
*val, new RiscvFloatPhiReg(NamefindReg("fp"), paraShift));
FloatParaCount++;
}
paraShift += VARIABLE_ALIGN_BYTE;
}
// 函数内变量
else {
int curSP = rfoo->querySP();
RiscvOperand *stackPos = static_cast<RiscvOperand *>(
new RiscvIntPhiReg(NamefindReg("fp"), curSP - VARIABLE_ALIGN_BYTE));
rfoo->regAlloca->setPosition(static_cast<Value *>(*val), stackPos);
rfoo->addTempVar(stackPos);
}
haveAllocated[*val] = 1;
};
// 关联函数参数、寄存器与内存
for (Value *arg : foo->arguments_)
storeOnStack(&arg);
for (BasicBlock *bb : foo->basic_blocks_)
for (Instruction *instr : bb->instr_list_)
if (instr->op_id_ != Instruction::OpID::PHI &&
instr->op_id_ != Instruction::OpID::ZExt &&
instr->op_id_ != Instruction::OpID::Alloca) {
// 所有的函数局部变量都要压入栈
Value *tempPtr = static_cast<Value *>(instr);
storeOnStack(&tempPtr);
for (auto *val : instr->operands_) {
tempPtr = static_cast<Value *>(val);
storeOnStack(&tempPtr);
}
}
for (BasicBlock *bb : foo->basic_blocks_)
for (Instruction *instr : bb->instr_list_)
if (instr->op_id_ == Instruction::OpID::Alloca) {
// 分配指针,并且将指针地址也同步保存
auto curInstr = static_cast<AllocaInst *>(instr);
int curTypeSize = calcTypeSize(curInstr->alloca_ty_);
rfoo->storeArray(curTypeSize);
int curSP = rfoo->querySP();
RiscvOperand *ptrPos = new RiscvIntPhiReg(NamefindReg("fp"), curSP);
rfoo->regAlloca->setPosition(static_cast<Value *>(instr), ptrPos);
rfoo->regAlloca->setPointerPos(static_cast<Value *>(instr), ptrPos);
}
// 添加初始化基本块
rfoo->addBlock(initBlock);
// 翻译语句并计算被使用的寄存器
for (BasicBlock *bb : foo->basic_blocks_)
rfoo->addBlock(this->transferRiscvBasicBlock(bb, rfoo));
rfoo->ChangeBlock(initBlock, 0);
// 保护寄存器
rfoo->shiftSP(-VARIABLE_ALIGN_BYTE);
int fp_sp = rfoo->querySP(); // 为保护 fp 分配空间
auto &reg_to_save = rfoo->regAlloca->savedRegister;
auto reg_used = rfoo->regAlloca->getUsedReg();
for (auto reg : reg_to_save)
if (reg_used.find(reg) != reg_used.end()) {
rfoo->shiftSP(-VARIABLE_ALIGN_BYTE);
if (reg->getType() == reg->IntReg)
initBlock->addInstrBack(new StoreRiscvInst(
new Type(Type::PointerTyID), reg,
new RiscvIntPhiReg(NamefindReg("fp"), rfoo->querySP()),
initBlock));
else
initBlock->addInstrBack(new StoreRiscvInst(
new Type(Type::FloatTyID), reg,
new RiscvIntPhiReg(NamefindReg("fp"), rfoo->querySP()),
initBlock));
}
// 分配整体的栈空间并设置s0为原sp
initBlock->addInstrFront(new BinaryRiscvInst(
RiscvInstr::ADDI, getRegOperand("sp"), new RiscvConst(-rfoo->querySP()),
getRegOperand("fp"),
initBlock)); // 3: fp <- t0
initBlock->addInstrFront(new StoreRiscvInst(
new Type(Type::PointerTyID), getRegOperand("fp"),
new RiscvIntPhiReg(NamefindReg("sp"), fp_sp - rfoo->querySP()),
initBlock)); // 2: 保护 fp
initBlock->addInstrFront(new BinaryRiscvInst(
RiscvInstr::ADDI, getRegOperand("sp"), new RiscvConst(rfoo->querySP()),
getRegOperand("sp"), initBlock)); // 1: 分配栈帧
// 扫描所有的返回语句并插入寄存器还原等相关内容
for (RiscvBasicBlock *rbb : rfoo->blk)
for (RiscvInstr *rinstr : rbb->instruction)
if (rinstr->type_ == rinstr->RET) {
initRetInstr(rfoo->regAlloca, rinstr, rbb, rfoo);
break;
}
code += rfoo->print();
}
return data + code;
}
/**
*
* @param ty `ty`
* @return
*/
int calcTypeSize(Type *ty) {
int totalsize = 1;
while (ty->tid_ == Type::ArrayTyID) {
totalsize *= static_cast<ArrayType *>(ty)->num_elements_;
ty = static_cast<ArrayType *>(ty)->contained_;
}
assert(ty->tid_ == Type::IntegerTyID || ty->tid_ == Type::FloatTyID ||
ty->tid_ == Type::PointerTyID);
if (ty->tid_ == Type::IntegerTyID || ty->tid_ == Type::FloatTyID)
totalsize *= 4;
else if (ty->tid_ == Type::PointerTyID)
totalsize *= 8;
return totalsize;
}

@ -0,0 +1,82 @@
#ifndef BACKENDH
#define BACKENDH
#include "instruction.h"
#include "ir.h"
#include "optimize.h"
#include "regalloc.h"
#include "riscv.h"
#include <memory.h>
// 建立IR到RISCV指令集的映射
const extern std::map<Instruction::OpID, RiscvInstr::InstrType> toRiscvOp;
extern int LabelCount;
extern std::map<BasicBlock *, RiscvBasicBlock *> rbbLabel;
extern std::map<Function *, RiscvFunction *> functionLabel;
// 下面的函数仅为一个basic
// block产生一个标号指令集为空需要使用总控程序中具体遍历该bb才会产生内部指令
RiscvBasicBlock *createRiscvBasicBlock(BasicBlock *bb = nullptr);
RiscvFunction *createRiscvFunction(Function *foo = nullptr);
std::string toLabel(int ind);
int calcTypeSize(Type *ty);
// 总控程序
class RiscvBuilder {
private:
void initializeRegisterFile();
public:
RiscvBuilder() {
rm = new RiscvModule();
initializeRegisterFile();
}
RiscvModule *rm;
// phi语句的合流此处建立一个并查集DSU_for_Variable维护相同的变量。
// 例如对于if (A) y1=do something else y2=do another thing. Phi y3 y1, y2
std::string buildRISCV(Module *m);
// 下面的语句是需要生成对应riscv语句
// Zext语句零扩展因而没有必要
// ZExtRiscvInstr createZextInstr(ZextInst *instr);
// void resolveLibfunc(RiscvFunction *foo);
BinaryRiscvInst *createBinaryInstr(RegAlloca *regAlloca,
BinaryInst *binaryInstr,
RiscvBasicBlock *rbb);
UnaryRiscvInst *createUnaryInstr(RegAlloca *regAlloca, UnaryInst *unaryInstr,
RiscvBasicBlock *rbb);
std::vector<RiscvInstr *> createStoreInstr(RegAlloca *regAlloca,
StoreInst *storeInstr,
RiscvBasicBlock *rbb);
std::vector<RiscvInstr *> createLoadInstr(RegAlloca *regAlloca,
LoadInst *loadInstr,
RiscvBasicBlock *rbb);
ICmpRiscvInstr *createICMPInstr(RegAlloca *regAlloca, ICmpInst *icmpInstr,
BranchInst *brInstr, RiscvBasicBlock *rbb);
ICmpRiscvInstr *createICMPSInstr(RegAlloca *regAlloca, ICmpInst *icmpInstr,
RiscvBasicBlock *rbb);
RiscvInstr *createFCMPInstr(RegAlloca *regAlloca, FCmpInst *fcmpInstr,
RiscvBasicBlock *rbb);
SiToFpRiscvInstr *createSiToFpInstr(RegAlloca *regAlloca,
SiToFpInst *sitofpInstr,
RiscvBasicBlock *rbb);
FpToSiRiscvInstr *createFptoSiInstr(RegAlloca *regAlloca,
FpToSiInst *fptosiInstr,
RiscvBasicBlock *rbb);
CallRiscvInst *createCallInstr(RegAlloca *regAlloca, CallInst *callInstr,
RiscvBasicBlock *rbb);
RiscvBasicBlock *transferRiscvBasicBlock(BasicBlock *bb, RiscvFunction *foo);
ReturnRiscvInst *createRetInstr(RegAlloca *regAlloca, ReturnInst *returnInstr,
RiscvBasicBlock *rbb, RiscvFunction *rfoo);
BranchRiscvInstr *createBrInstr(RegAlloca *regAlloca, BranchInst *brInstr,
RiscvBasicBlock *rbb);
RiscvInstr *solveGetElementPtr(RegAlloca *regAlloca, GetElementPtrInst *instr,
RiscvBasicBlock *rbb);
/**
*
*/
void initRetInstr(RegAlloca *regAlloca, RiscvInstr *returnInstr,
RiscvBasicBlock *rbb, RiscvFunction *foo);
};
#endif // !BACKENDH

@ -0,0 +1,360 @@
#include "instruction.h"
#include "riscv.h"
#include <cassert>
#include <string>
// IR的指令转变到RISV的指令
std::map<RiscvInstr::InstrType, std::string> instrTy2Riscv = {
{RiscvInstr::ADD, "ADD"}, {RiscvInstr::ADDI, "ADDI"},
{RiscvInstr::ADDIW, "ADDIW"}, {RiscvInstr::SUB, "SUB"},
{RiscvInstr::SUBI, "SUBI"}, {RiscvInstr::FADD, "FADD.S"},
{RiscvInstr::FSUB, "FSUB.S"}, {RiscvInstr::FMUL, "FMUL.S"},
{RiscvInstr::FDIV, "FDIV.S"}, {RiscvInstr::MUL, "MUL"},
{RiscvInstr::DIV, "DIV"}, {RiscvInstr::REM, "REM"},
{RiscvInstr::AND, "AND"}, {RiscvInstr::OR, "OR"},
{RiscvInstr::ANDI, "ANDI"}, {RiscvInstr::ORI, "ORI"},
{RiscvInstr::XOR, "XOR"}, {RiscvInstr::XORI, "XORI"},
{RiscvInstr::RET, "RET"}, {RiscvInstr::FPTOSI, "FCVT.W.S"},
{RiscvInstr::SITOFP, "FCVT.S.W"}, {RiscvInstr::FMV, "FMV.S"},
{RiscvInstr::CALL, "CALL"}, {RiscvInstr::LI, "LI"},
{RiscvInstr::MOV, "MV"}, {RiscvInstr::PUSH, "PUSH"},
{RiscvInstr::POP, "POP"}, {RiscvInstr::SW, "SW"},
{RiscvInstr::LW, "LW"}, {RiscvInstr::FSW, "FSW"},
{RiscvInstr::FLW, "FLW"}, {RiscvInstr::SHL, "SLL"},
{RiscvInstr::ASHR, "SRA"}, {RiscvInstr::SHLI, "SLLI"},
{RiscvInstr::LSHR, "SRL"}, {RiscvInstr::ASHRI, "SRAI"},
{RiscvInstr::LSHRI, "SRLI"},
};
// Instruction from opid to string
const std::map<ICmpInst::ICmpOp, std::string> ICmpRiscvInstr::ICmpOpName = {
{ICmpInst::ICmpOp::ICMP_EQ, "BEQ"}, {ICmpInst::ICmpOp::ICMP_NE, "BNE"},
{ICmpInst::ICmpOp::ICMP_UGE, "BGEU"}, {ICmpInst::ICmpOp::ICMP_ULT, "BLTU"},
{ICmpInst::ICmpOp::ICMP_SGE, "BGE"}, {ICmpInst::ICmpOp::ICMP_SLT, "BLT"},
{ICmpInst::ICmpOp::ICMP_SLE, "BLE"}};
const std::map<ICmpInst::ICmpOp, std::string> ICmpRiscvInstr::ICmpOpSName = {
{ICmpInst::ICmpOp::ICMP_EQ, "SEQZ"}, {ICmpInst::ICmpOp::ICMP_NE, "SNEZ"},
{ICmpInst::ICmpOp::ICMP_UGE, "SLTU"}, {ICmpInst::ICmpOp::ICMP_ULT, "SLTU"},
{ICmpInst::ICmpOp::ICMP_SGE, "SLT"}, {ICmpInst::ICmpOp::ICMP_SLT, "SLT"}};
const std::map<ICmpInst::ICmpOp, ICmpInst::ICmpOp> ICmpRiscvInstr::ICmpOpEquiv =
{{ICmpInst::ICmpOp::ICMP_ULE, ICmpInst::ICmpOp::ICMP_UGE},
{ICmpInst::ICmpOp::ICMP_UGT, ICmpInst::ICmpOp::ICMP_ULT},
{ICmpInst::ICmpOp::ICMP_SLE, ICmpInst::ICmpOp::ICMP_SGE},
{ICmpInst::ICmpOp::ICMP_SGT, ICmpInst::ICmpOp::ICMP_SLT}};
const std::map<FCmpInst::FCmpOp, std::string> FCmpRiscvInstr::FCmpOpName = {
{FCmpInst::FCmpOp::FCMP_OLT, "FLT.S"},
{FCmpInst::FCmpOp::FCMP_ULT, "FLT.S"},
{FCmpInst::FCmpOp::FCMP_OLE, "FLE.S"},
{FCmpInst::FCmpOp::FCMP_ULE, "FLE.S"},
{FCmpInst::FCmpOp::FCMP_ORD, "FCLASS.S"},
{FCmpInst::FCmpOp::FCMP_UNO, "FCLASS.S"}, // 取反
{FCmpInst::FCmpOp::FCMP_OEQ, "FEQ.S"},
{FCmpInst::FCmpOp::FCMP_UEQ, "FEQ.S"},
{FCmpInst::FCmpOp::FCMP_ONE, "FEQ.S"}, // 取反
{FCmpInst::FCmpOp::FCMP_UNE, "FEQ.S"} // 取反
};
const std::map<FCmpInst::FCmpOp, FCmpInst::FCmpOp> FCmpRiscvInstr::FCmpOpEquiv =
{{FCmpInst::FCmpOp::FCMP_OGT, FCmpInst::FCmpOp::FCMP_OLT},
{FCmpInst::FCmpOp::FCMP_UGT, FCmpInst::FCmpOp::FCMP_ULT},
{FCmpInst::FCmpOp::FCMP_OGE, FCmpInst::FCmpOp::FCMP_OLE},
{FCmpInst::FCmpOp::FCMP_UGE, FCmpInst::FCmpOp::FCMP_ULE}};
std::string print_as_op(Value *v, bool print_ty);
std::string print_cmp_type(ICmpInst::ICmpOp op);
std::string print_fcmp_type(FCmpInst::FCmpOp op);
RiscvInstr::RiscvInstr(InstrType type, int op_nums)
: type_(type), parent_(nullptr) {
operand_.resize(op_nums);
}
RiscvInstr::RiscvInstr(InstrType type, int op_nums, RiscvBasicBlock *bb)
: type_(type), parent_(bb) {
operand_.resize(op_nums);
}
// 格式op tar, v1, v2->tar=v1 op v2
std::string BinaryRiscvInst::print() {
// 这里需要将每个参数根据当前需要进行upcasting
assert(this->operand_.size() == 2);
std::string riscv_instr = "\t\t";
bool overflow = false;
if (type_ == ADDI &&
std::abs(static_cast<RiscvConst *>(operand_[1])->intval) >= 1024) {
overflow = true;
type_ = ADD;
riscv_instr += "LI\tt6, " + operand_[1]->print();
riscv_instr += "\n\t\t";
}
riscv_instr += instrTy2Riscv.at(this->type_);
if (word && (type_ == ADDI || type_ == ADD || type_ == MUL || type_ == REM ||
type_ == DIV))
riscv_instr += "W"; // Integer word type instruction.
riscv_instr += "\t";
riscv_instr += this->result_->print();
riscv_instr += ", ";
riscv_instr += this->operand_[0]->print();
riscv_instr += ", ";
if (overflow) {
riscv_instr += "t6";
} else {
riscv_instr += this->operand_[1]->print();
}
riscv_instr += "\n";
return riscv_instr;
}
std::string UnaryRiscvInst::print() {
assert(this->operand_.size() == 1);
std::string riscv_instr = "\t\t";
riscv_instr += instrTy2Riscv[this->type_];
riscv_instr += "\t";
riscv_instr += this->result_->print();
riscv_instr += ", ";
riscv_instr += this->operand_[0]->print();
riscv_instr += ", ";
return riscv_instr;
}
std::string CallRiscvInst::print() {
std::string riscv_instr = "\t\tCALL\t";
riscv_instr += static_cast<RiscvFunction *>(this->operand_[0])->name_;
riscv_instr += "\n";
return riscv_instr;
}
// 注意return 语句不会进行相应的寄存器约定检查
std::string ReturnRiscvInst::print() {
std::string riscv_instr = "\t\tRET\n";
return riscv_instr;
}
std::string PushRiscvInst::print() {
std::string riscv_instr = "";
int shift = this->basicShift_;
for (auto x : this->operand_) {
shift -= VARIABLE_ALIGN_BYTE;
riscv_instr +=
"\t\tSD\t" + x->print() + ", " + std::to_string(shift) + "(sp)\n";
}
return riscv_instr;
}
std::string PopRiscvInst::print() {
std::string riscv_instr = "";
int shift = this->basicShift_;
for (auto x : this->operand_) {
shift -= VARIABLE_ALIGN_BYTE;
riscv_instr +=
"\t\tLD\t" + x->print() + ", " + std::to_string(shift) + "(sp)\n";
}
riscv_instr += "\t\tADDI\tsp, " + std::to_string(-shift) + "\n";
return riscv_instr;
}
std::string ICmpRiscvInstr::print() {
std::string riscv_instr = "\t\t";
// 注意由于RISCV不支持全部的比较运算因而需要根据比较条件对式子进行等价变换
if (ICmpOpName.count(this->icmp_op_) == 0) {
std::swap(this->operand_[0], this->operand_[1]);
this->icmp_op_ = ICmpRiscvInstr::ICmpOpEquiv.find(this->icmp_op_)->second;
}
riscv_instr += ICmpOpName.at(this->icmp_op_) + "\t";
riscv_instr += this->operand_[0]->print();
riscv_instr += ", ";
riscv_instr += this->operand_[1]->print();
riscv_instr += ", ";
riscv_instr += static_cast<RiscvBasicBlock *>(this->operand_[2])->name_;
riscv_instr += "\n";
auto falseLink = dynamic_cast<RiscvBasicBlock *>(this->operand_[3]);
// Force Jump
if (falseLink != nullptr)
riscv_instr += "\t\tJ\t" + falseLink->name_ + "\n";
return riscv_instr;
}
std::string ICmpSRiscvInstr::print() {
std::string riscv_instr = "\t\t";
// If equal or nequal instruction
bool eorne = false;
switch (icmp_op_) {
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_NE:
eorne = true;
default:
break;
}
if (eorne) {
riscv_instr += "SUB\t";
riscv_instr += "t6";
riscv_instr += ", ";
riscv_instr += operand_[0]->print();
riscv_instr += ", ";
riscv_instr += operand_[1]->print();
riscv_instr += "\n\t\t";
}
riscv_instr += ICmpOpSName.at(this->icmp_op_) + "\t";
riscv_instr += this->result_->print();
riscv_instr += ", ";
if (!eorne) {
riscv_instr += this->operand_[0]->print();
riscv_instr += ", ";
riscv_instr += this->operand_[1]->print();
riscv_instr += "\n";
} else {
riscv_instr += "t6\n";
}
return riscv_instr;
}
std::string FCmpRiscvInstr::print() {
std::string riscv_instr = "\t\t";
riscv_instr += FCmpOpName.at(this->fcmp_op_) + "\t";
riscv_instr += this->result_->print();
riscv_instr += ", ";
riscv_instr += this->operand_[0]->print();
riscv_instr += ", ";
riscv_instr += this->operand_[1]->print();
riscv_instr += "\n";
return riscv_instr;
}
std::string StoreRiscvInst::print() {
std::string riscv_instr = "\t\t";
auto mem_addr = static_cast<RiscvIntPhiReg *>(operand_[1]);
bool overflow = mem_addr->overflow();
if (overflow) {
riscv_instr += "LI\tt6, " + std::to_string(mem_addr->shift_);
riscv_instr += "\n\t\t";
riscv_instr += "ADD\tt6, t6, " + mem_addr->MemBaseName;
riscv_instr += "\n\t\t";
}
if (this->type.tid_ == Type::FloatTyID)
riscv_instr += "FSW\t";
else if (this->type.tid_ == Type::IntegerTyID)
riscv_instr += "SW\t";
else if (this->type.tid_ == Type::PointerTyID)
riscv_instr += "SD\t";
else {
std::cerr << "[Error] Unknown store instruction type." << std::endl;
std::terminate();
}
riscv_instr += this->operand_[0]->print();
riscv_instr += ", ";
if (overflow) {
riscv_instr += "(t6)";
} else {
riscv_instr += this->operand_[1]->print();
}
riscv_instr += "\n";
return riscv_instr;
}
// 简易版 load。如果不存在目标内存地址本条指令不执行
std::string LoadRiscvInst::print() {
if (this->operand_[0] == nullptr || this->operand_[1] == nullptr)
return "";
std::string riscv_instr = "\t\t";
auto mem_addr = static_cast<RiscvIntPhiReg *>(operand_[1]);
bool overflow = mem_addr->overflow();
if (overflow) {
riscv_instr += "LI\tt6, " + std::to_string(mem_addr->shift_);
riscv_instr += "\n\t\t";
riscv_instr += "ADD\tt6, t6, " + mem_addr->MemBaseName;
riscv_instr += "\n\t\t";
}
if (this->type.tid_ == Type::FloatTyID)
riscv_instr += "FLW\t";
else if (this->type.tid_ == Type::IntegerTyID)
riscv_instr += "LW\t";
else if (this->type.tid_ == Type::PointerTyID)
riscv_instr += "LD\t";
else {
std::cerr << "[Error] Unknown load instruction type." << std::endl;
std::terminate();
}
riscv_instr += this->operand_[0]->print();
riscv_instr += ", ";
if (overflow) {
riscv_instr += "(t6)";
} else {
riscv_instr += this->operand_[1]->print();
}
riscv_instr += "\n";
return riscv_instr;
}
std::string MoveRiscvInst::print() {
// Optmize: 若两个操作数相等则忽略该指令
if (this->operand_[0] == this->operand_[1])
return "";
std::string riscv_instr = "\t\t";
// li 指令
if (this->operand_[1]->tid_ == RiscvOperand::IntImm)
riscv_instr += "LI\t";
// 寄存器传寄存器
else if (this->operand_[1]->tid_ == RiscvOperand::IntReg)
riscv_instr += "MV\t";
// 浮点数
else
riscv_instr += "FMV.S\t";
if (this->operand_[0]->print() == this->operand_[1]->print())
return "";
riscv_instr += this->operand_[0]->print();
riscv_instr += ", ";
riscv_instr += this->operand_[1]->print();
riscv_instr += "\n";
return riscv_instr;
}
std::string SiToFpRiscvInstr::print() {
std::string riscv_instr = "\t\tFCVT.S.W\t";
riscv_instr += this->operand_[1]->print();
riscv_instr += ", ";
riscv_instr += this->operand_[0]->print();
riscv_instr += "\n";
return riscv_instr;
}
std::string FpToSiRiscvInstr::print() {
std::string riscv_instr = "\t\tFCVT.W.S\t";
riscv_instr += this->operand_[1]->print();
riscv_instr += ", ";
riscv_instr += this->operand_[0]->print();
riscv_instr += ", ";
riscv_instr += "rtz"; // round to zero.
riscv_instr += "\n";
return riscv_instr;
}
std::string LoadAddressRiscvInstr::print() {
std::string riscv_instr =
"\t\tLA\t" + this->operand_[0]->print() + ", " + this->name_ + "\n";
return riscv_instr;
}
std::string BranchRiscvInstr::print() {
std::string riscv_instr = "\t\t";
// If single label operand then force jump
if (operand_[0] != nullptr) {
riscv_instr += "BGTZ\t";
riscv_instr += operand_[0]->print();
riscv_instr += ", ";
riscv_instr += static_cast<RiscvBasicBlock *>(operand_[1])->name_;
riscv_instr += "\n\t\t";
}
riscv_instr += "J\t";
riscv_instr += static_cast<RiscvBasicBlock *>(operand_[2])->name_;
riscv_instr += "\n";
return riscv_instr;
}

@ -0,0 +1,431 @@
#ifndef INSTRUCTIONH
#define INSTRUCTIONH
#include "ir.h"
#include "riscv.h"
#include "regalloc.h"
// 语句块,也使用标号标识
// 必须挂靠在函数下,否则无法正常生成标号
// 可以考虑转化到riscv basic block做数据流分析预留接口
class RiscvBasicBlock : public RiscvLabel {
public:
RiscvFunction *func_;
std::vector<RiscvInstr *> instruction;
int blockInd_; // 表示了各个block之间的顺序
RiscvBasicBlock(std::string name, RiscvFunction *func, int blockInd)
: RiscvLabel(Block, name), func_(func), blockInd_(blockInd) {
func->addBlock(this);
}
RiscvBasicBlock(std::string name, int blockInd)
: RiscvLabel(Block, name), func_(nullptr), blockInd_(blockInd) {}
void addFunction(RiscvFunction *func) { func->addBlock(this); }
std::string printname() { return name_; }
// void addOutBlock(RiscvBasicBlock *bb) { inB.push_back(bb); }
// void addInBlock(RiscvBasicBlock *bb) { outB.push_back(bb); }
void deleteInstr(RiscvInstr *instr) {
auto it = std::find(instruction.begin(), instruction.end(), instr);
if (it != instruction.end())
instruction.erase(
std::remove(instruction.begin(), instruction.end(), instr),
instruction.end());
}
void replaceInstr(RiscvInstr *oldinst, RiscvInstr *newinst) {}
// 在全部指令后面加入
void addInstrBack(RiscvInstr *instr) {
if (instr == nullptr)
return;
instruction.push_back(instr);
}
// 在全部指令之前加入
void addInstrFront(RiscvInstr *instr) {
if (instr == nullptr)
return;
instruction.insert(instruction.begin(), instr);
}
void addInstrBefore(RiscvInstr *instr, RiscvInstr *dst) {
if (instr == nullptr)
return;
auto it = std::find(instruction.begin(), instruction.end(), dst);
if (it != instruction.end())
instruction.insert(it, instr);
else
addInstrBack(instr);
}
void addInstrAfter(RiscvInstr *instr, RiscvInstr *dst) {
if (instr == nullptr)
return;
auto it = std::find(instruction.begin(), instruction.end(), dst);
if (it != instruction.end()) {
if (next(it) == instruction.end())
instruction.push_back(instr);
else
instruction.insert(next(it), instr);
} else {
addInstrBack(instr);
}
}
std::string print();
};
// 传入寄存器编号以生成一条语句,
// 上层由basic block整合
class RiscvInstr {
public:
// 指令类型除分支语句外其他对应到riscv
// 加立即数或者浮点加需要做区分
enum InstrType {
ADD = 0,
ADDI,
SUB,
SUBI,
MUL,
DIV = 6,
REM,
FADD = 8,
FSUB = 10,
FMUL = 12,
FDIV = 14,
XOR = 16,
XORI,
AND,
ANDI,
OR,
ORI,
SW,
LW,
FSW = 30,
FLW,
ICMP,
FCMP,
PUSH,
POP,
CALL,
RET,
LI,
MOV,
FMV,
FPTOSI,
SITOFP,
JMP,
SHL,
LSHR,
ASHR,
SHLI = 52,
LSHRI,
ASHRI,
LA,
ADDIW,
BGT
};
const static std::map<InstrType, std::string> RiscvName;
InstrType type_;
RiscvBasicBlock *parent_;
std::vector<RiscvOperand *> operand_;
RiscvInstr(InstrType type, int op_nums);
RiscvInstr(InstrType type, int op_nums, RiscvBasicBlock *bb);
~RiscvInstr() = default;
virtual std::string print() = 0;
RiscvOperand *result_;
void setOperand(int ind, RiscvOperand *val) {
assert(ind >= 0 && ind < operand_.size());
operand_[ind] = val;
}
void setResult(RiscvOperand *result) { result_ = result; }
void removeResult() { result_ = nullptr; }
// 消除一个操作数
void removeOperand(int i) { operand_[i] = nullptr; }
// 清除指令
void clear() { operand_.clear(), removeResult(); }
// 先操作数后返回值
std::vector<RiscvOperand *> getOperandResult() {
std::vector<RiscvOperand *> ans(operand_);
if (result_ != nullptr)
ans.push_back(result_);
return ans;
}
RiscvOperand *getOperand(int i) const { return operand_[i]; }
};
// 二元指令
class BinaryRiscvInst : public RiscvInstr {
public:
std::string print() override;
BinaryRiscvInst() = default;
// target = v1 op v2后面接一个flag参数表示要不要加入到对应的basic block中
BinaryRiscvInst(InstrType op, RiscvOperand *v1, RiscvOperand *v2,
RiscvOperand *target, RiscvBasicBlock *bb, bool flag = 0)
: RiscvInstr(op, 2, bb), word(flag) {
setOperand(0, v1);
setOperand(1, v2);
setResult(target);
this->parent_ = bb;
// Optimize: 若立即数为0则改用寄存器zero。
if(v2->getType() == v2->IntImm && static_cast<RiscvConst*>(v2)->intval == 0){
type_ = ADD;
setOperand(1, getRegOperand("zero"));
}
}
bool word;
};
// 一元指令
class UnaryRiscvInst : public RiscvInstr {
public:
std::string print() override;
UnaryRiscvInst() = default;
// target = op v1后面接一个flag参数表示要不要加入到对应的basic block中
UnaryRiscvInst(InstrType op, RiscvOperand *v1, RiscvOperand *target,
RiscvBasicBlock *bb, bool flag = 0)
: RiscvInstr(op, 1, bb) {
setOperand(0, v1);
setResult(target);
this->parent_ = bb;
if (flag)
this->parent_->addInstrBack(this);
}
};
// 加入一个整型mov指令(LI)
class MoveRiscvInst : public RiscvInstr {
public:
std::string print() override;
MoveRiscvInst() = default;
MoveRiscvInst(RiscvOperand *v1, int Imm, RiscvBasicBlock *bb, bool flag = 0)
: RiscvInstr(InstrType::LI, 2, bb) {
RiscvOperand *Const = new RiscvConst(Imm);
setOperand(0, v1);
setOperand(1, Const);
this->parent_ = bb;
if (flag)
this->parent_->addInstrBack(this);
}
// v2->v1
MoveRiscvInst(RiscvOperand *v1, RiscvOperand *v2, RiscvBasicBlock *bb,
bool flag = 0)
: RiscvInstr(InstrType::MOV, 2, bb) {
setOperand(0, v1);
setOperand(1, v2);
this->parent_ = bb;
if (flag)
this->parent_->addInstrBack(this);
}
};
// 注意压栈顺序问题打印的时候严格按照lists内顺序
class PushRiscvInst : public RiscvInstr {
int basicShift_;
public:
PushRiscvInst(std::vector<RiscvOperand *> &lists, RiscvBasicBlock *bb,
int basicShift)
: RiscvInstr(InstrType::PUSH, lists.size(), bb), basicShift_(basicShift) {
for (int i = 0; i < lists.size(); i++)
setOperand(i, lists[i]);
}
std::string print() override;
};
// 打印的时候严格按照lists内顺序
class PopRiscvInst : public RiscvInstr {
int basicShift_;
public:
// 传入所有要pop的变量
PopRiscvInst(std::vector<RiscvOperand *> &lists, RiscvBasicBlock *bb,
int basicShift)
: RiscvInstr(InstrType::POP, lists.size(), bb), basicShift_(basicShift) {
for (int i = 0; i < lists.size(); i++)
setOperand(i, lists[i]);
}
std::string print() override;
};
// call调用语句+压栈语句
// 0作为函数名1-n是函数各参数
class CallRiscvInst : public RiscvInstr {
public:
CallRiscvInst(RiscvFunction *func, RiscvBasicBlock *bb)
: RiscvInstr(InstrType::CALL, 1, bb) {
setOperand(0, func);
}
virtual std::string print() override;
};
// 仅返回语句返回参数由上层的block对应的function构造push语句和lw sw指令
class ReturnRiscvInst : public RiscvInstr {
public:
ReturnRiscvInst(RiscvBasicBlock *bb) : RiscvInstr(InstrType::RET, 0, bb) {}
std::string print() override;
};
// Store 指令格式sw source_value(reg), shift(base reg)
// 目的source_value->M[base reg + shift]
// 传入源寄存器目的寄存器和偏移地址默认为0
// 如果是直接寻址则base填x0号寄存器
class StoreRiscvInst : public RiscvInstr {
public:
int shift_; // 地址偏移量
Type type; // 到底是浮点还是整型
StoreRiscvInst(Type *ty, RiscvOperand *source, RiscvOperand *target,
RiscvBasicBlock *bb, int shift = 0)
: RiscvInstr(InstrType::SW, 2, bb), shift_(shift), type(ty->tid_) {
setOperand(0, source);
setOperand(1, target);
this->parent_ = bb;
if (source->isRegister() == false) {
std::cerr << "[Fatal error] Invalid store instruction: " << print()
<< std::endl;
std::terminate();
}
}
std::string print() override;
};
// 指令传入格式同store
// 先dest 后base reg
// 目的M[base reg + shift]->dest reg
// 需指明是浮点还是整型
class LoadRiscvInst : public RiscvInstr {
public:
int shift_; // 地址偏移量
Type type; // 到底是浮点还是整型
LoadRiscvInst(Type *ty, RiscvOperand *dest, RiscvOperand *target,
RiscvBasicBlock *bb, int shift = 0)
: RiscvInstr(InstrType::LW, 2, bb), shift_(shift), type(ty->tid_) {
setOperand(0, dest);
setOperand(1, target);
if (target == nullptr) {
std::cerr << "[Fatal Error] Load Instruction's target is nullptr."
<< std::endl;
std::terminate();
}
this->parent_ = bb;
}
std::string print() override;
};
// 整型比较
// 类型cmpop val1, val2, true_link
// 传入参数val1, val2, true_link, false_linkbasic block指针形式
// false_link如果为为下一条语句则不会发射j false_link指令
class ICmpRiscvInstr : public RiscvInstr {
public:
static const std::map<ICmpInst::ICmpOp, std::string> ICmpOpName;
static const std::map<ICmpInst::ICmpOp, std::string> ICmpOpSName;
static const std::map<ICmpInst::ICmpOp, ICmpInst::ICmpOp> ICmpOpEquiv;
ICmpRiscvInstr(ICmpInst::ICmpOp op, RiscvOperand *v1, RiscvOperand *v2,
RiscvBasicBlock *trueLink, RiscvBasicBlock *falseLink,
RiscvBasicBlock *bb)
: RiscvInstr(ICMP, 4, bb), icmp_op_(op) {
setOperand(0, v1);
setOperand(1, v2);
setOperand(2, trueLink);
setOperand(3, falseLink);
}
ICmpRiscvInstr(ICmpInst::ICmpOp op, RiscvOperand *v1, RiscvOperand *v2,
RiscvBasicBlock *trueLink, RiscvBasicBlock *bb)
: RiscvInstr(ICMP, 4, bb), icmp_op_(op) {
setOperand(0, v1);
setOperand(1, v2);
setOperand(2, trueLink);
setOperand(3, nullptr);
}
ICmpInst::ICmpOp icmp_op_;
std::string print() override;
};
class ICmpSRiscvInstr : public ICmpRiscvInstr {
public:
ICmpSRiscvInstr(ICmpInst::ICmpOp op, RiscvOperand *v1, RiscvOperand *v2,
RiscvOperand *target, RiscvBasicBlock *bb)
: ICmpRiscvInstr(op, v1, v2, nullptr, bb) {
setOperand(0, v1);
setOperand(1, v2);
setResult(target);
}
std::string print() override;
};
// 浮点比较
// 类型cmpop val1, val2, true_link, false_link
// 假定basic block是顺序排布的那么如果false_link恰好为下一个basic
// block则不会发射j false_link指令
class FCmpRiscvInstr : public RiscvInstr {
public:
static const std::map<FCmpInst::FCmpOp, std::string> FCmpOpName;
static const std::map<FCmpInst::FCmpOp, FCmpInst::FCmpOp> FCmpOpEquiv;
FCmpRiscvInstr(FCmpInst::FCmpOp op, RiscvOperand *v1, RiscvOperand *v2,
RiscvOperand *target, RiscvBasicBlock *bb)
: RiscvInstr(FCMP, 2, bb), fcmp_op_(op) {
setOperand(0, v1);
setOperand(1, v2);
setResult(target);
}
FCmpInst::FCmpOp fcmp_op_;
std::string print() override;
};
class FpToSiRiscvInstr : public RiscvInstr {
public:
FpToSiRiscvInstr(RiscvOperand *Source, RiscvOperand *Target,
RiscvBasicBlock *bb)
: RiscvInstr(FPTOSI, 2, bb) {
setOperand(0, Source);
setOperand(1, Target);
}
virtual std::string print() override;
};
class SiToFpRiscvInstr : public RiscvInstr {
public:
SiToFpRiscvInstr(RiscvOperand *Source, RiscvOperand *Target,
RiscvBasicBlock *bb)
: RiscvInstr(SITOFP, 2, bb) {
setOperand(0, Source);
setOperand(1, Target);
}
virtual std::string print() override;
};
// LA rd, symbol ; x[rd] = &symbol
// `dest` : rd
// `name` : symbol
class LoadAddressRiscvInstr : public RiscvInstr {
public:
std::string name_;
LoadAddressRiscvInstr(RiscvOperand *dest, std::string name,
RiscvBasicBlock *bb)
: RiscvInstr(LA, 1, bb), name_(name) {
setOperand(0, dest);
}
virtual std::string print() override;
};
/**
*
* BEQ rs1, zero, label1
* J label2
*/
class BranchRiscvInstr : public RiscvInstr {
public:
/// @brief 生成分支指令类。
/// @param rs1 存储布尔值的寄存器
/// @param trueLink 真值跳转基本块
/// @param falseLink 假值跳转基本块
BranchRiscvInstr(RiscvOperand *rs1, RiscvBasicBlock *trueLink,
RiscvBasicBlock *falseLink, RiscvBasicBlock *bb)
: RiscvInstr(BGT, 3, bb) {
setOperand(0, rs1);
setOperand(1, trueLink);
setOperand(2, falseLink);
}
virtual std::string print() override;
};
#endif // !INSTRUCTIONH

@ -0,0 +1,3 @@
#include "optimize.h"
void OptimizeBlock() {}

@ -0,0 +1,11 @@
#ifndef OPTIMIZEH
#define OPTIMIZEH
#include "riscv.h"
#include "ir.h"
// 进行数据流的优化
// 在此之前先分配各寄存器
// 可选
void OptimizeBlock();
#endif // !OPTIMIZEH

@ -0,0 +1,380 @@
#include "regalloc.h"
#include "instruction.h"
#include "riscv.h"
int IntRegID = 32, FloatRegID = 32; // 测试阶段使用
Register *NamefindReg(std::string reg) {
if (reg.size() > 4)
return nullptr;
Register *reg_to_ret = new Register(Register::Int, 0);
// Check if int registers
for (int i = 0; i < 32; i++) {
reg_to_ret->rid_ = i;
if (reg_to_ret->print() == reg)
return reg_to_ret;
}
// Else then float registers
reg_to_ret->regtype_ = reg_to_ret->Float;
for (int i = 0; i < 32; i++) {
reg_to_ret->rid_ = i;
if (reg_to_ret->print() == reg)
return reg_to_ret;
}
return nullptr;
}
RiscvOperand *getRegOperand(std::string reg) {
for (auto regope : regPool) {
if (regope->print() == reg)
return regope;
}
assert(false);
return nullptr;
}
RiscvOperand *getRegOperand(Register::RegType op_ty_, int id) {
Register *reg = new Register(op_ty_, id);
for (auto regope : regPool) {
if (regope->print() == reg->print()) {
delete reg;
return regope;
}
}
assert(false);
return nullptr;
}
Type *getStoreTypeFromRegType(RiscvOperand *riscvReg) {
return riscvReg->getType() == RiscvOperand::OpTy::FloatReg
? new Type(Type::TypeID::FloatTyID)
: new Type(Type::TypeID::IntegerTyID);
}
RiscvOperand *RegAlloca::findReg(Value *val, RiscvBasicBlock *bb,
RiscvInstr *instr, int inReg, int load,
RiscvOperand *specified, bool direct) {
safeFindTimeStamp++;
val = this->DSU_for_Variable.query(val);
bool isGVar = dynamic_cast<GlobalVariable *>(val) != nullptr;
bool isAlloca = dynamic_cast<AllocaInst *>(val) != nullptr;
bool isPointer = val->type_->tid_ == val->type_->PointerTyID;
// If there is no register allocated for value then get a new one
if (specified != nullptr)
setPositionReg(val, specified, bb, instr);
else if (curReg.find(val) == curReg.end() || isAlloca ||
val->is_constant()) { // Alloca and constant value is always unsafe.
bool found = false;
RiscvOperand *cur = nullptr;
IntRegID = 32;
FloatRegID = 32;
while (!found) {
if (val->type_->tid_ != Type::FloatTyID) {
++IntRegID;
if (IntRegID > 27)
IntRegID = 18;
cur = getRegOperand(Register::Int, IntRegID);
} else {
++FloatRegID;
if (FloatRegID > 27)
FloatRegID = 18;
cur = getRegOperand(Register::Float, FloatRegID);
}
if (regFindTimeStamp.find(cur) == regFindTimeStamp.end() ||
safeFindTimeStamp - regFindTimeStamp[cur] > SAFE_FIND_LIMIT) {
setPositionReg(val, cur, bb, instr);
found = true;
}
}
} else {
regFindTimeStamp[curReg[val]] = safeFindTimeStamp;
return curReg[val];
}
// ! Though all registers are considered unsafe, there is no way
// ! to writeback registers properly in findReg() for now.
// ! Therefore unsafe part below is not being executed for now.
// ! Maybe should consider using writeback() instead.
// For now, all registers are considered unsafe thus registers should always
// load from memory before using and save to memory after using.
auto mem_addr = findMem(val, bb, instr, 1); // Value's direct memory address
auto current_reg = curReg[val]; // Value's current register
auto load_type = val->type_;
regFindTimeStamp[current_reg] = safeFindTimeStamp; // Update time stamp
if (load) {
// Load before usage.
if (mem_addr != nullptr) {
bb->addInstrBefore(
new LoadRiscvInst(load_type, current_reg, mem_addr, bb), instr);
} else if (val->is_constant()) {
// If value is a int constant, create a LI instruction.
auto cval = dynamic_cast<ConstantInt *>(val);
if (cval != nullptr)
bb->addInstrBefore(new MoveRiscvInst(current_reg, cval->value_, bb),
instr);
else if (dynamic_cast<ConstantFloat *>(val) != nullptr)
bb->addInstrBefore(
new MoveRiscvInst(current_reg, this->findMem(val), bb), instr);
else {
std::cerr << "[Warning] Trying to find a register for unknown type of "
"constant value which is not implemented for now."
<< std::endl;
}
} else if (isAlloca) {
bb->addInstrBefore(
new BinaryRiscvInst(
BinaryRiscvInst::ADDI, getRegOperand("fp"),
new RiscvConst(static_cast<RiscvIntPhiReg *>(pos[val])->shift_),
current_reg, bb),
instr);
// std::cerr << "[Debug] Get a alloca position <" << val->print() << ", "
// << static_cast<RiscvIntPhiReg *>(pos[val])->print()
// << "> into the register <" << current_reg->print() << ">"
// << std::endl;
} else {
std::cerr << "[Error] Unknown error in findReg()." << std::endl;
std::terminate();
}
}
return current_reg;
}
RiscvOperand *RegAlloca::findMem(Value *val, RiscvBasicBlock *bb,
RiscvInstr *instr, bool direct) {
val = this->DSU_for_Variable.query(val);
if (pos.count(val) == 0 && !val->is_constant()) {
std::cerr << "[Warning] Value " << std::hex << val << " (" << val->name_
<< ")'s memory map not found." << std::endl;
}
bool isGVar = dynamic_cast<GlobalVariable *>(val) != nullptr;
bool isPointer = val->type_->tid_ == val->type_->PointerTyID;
bool isAlloca = dynamic_cast<AllocaInst *>(val) != nullptr;
// All float constant considered as global variables for now.
isGVar = isGVar || dynamic_cast<ConstantFloat *>(val) != nullptr;
// Always loading global variable's address into t5 when execute findMem().
if (isGVar) {
if (bb == nullptr) {
std::cerr << "[Warning] Trying to add global var addressing "
"instruction, but basic block pointer is null."
<< std::endl;
return nullptr;
}
bb->addInstrBefore(
new LoadAddressRiscvInstr(getRegOperand("t5"), pos[val]->print(), bb),
instr);
return new RiscvIntPhiReg("t5");
}
// If not loading pointer's address directly, then use indirect addressing.
// Ignore alloca due to the instruction only being dealt by findReg()
if (isPointer && !isAlloca && !direct) {
if (bb == nullptr) {
std::cerr << "[Warning] Trying to add indirect pointer addressing "
"instruction, but basic block pointer is null."
<< std::endl;
return nullptr;
}
bb->addInstrBefore(new LoadRiscvInst(new Type(Type::PointerTyID),
getRegOperand("t4"), pos[val], bb),
instr);
return new RiscvIntPhiReg("t4");
}
// Cannot access to alloca's memory directly.
else if (direct && isAlloca)
return nullptr;
if (pos.find(val) == pos.end())
return nullptr;
return pos[val];
}
RiscvOperand *RegAlloca::findMem(Value *val) {
return findMem(val, nullptr, nullptr, true);
}
RiscvOperand *RegAlloca::findNonuse(Type *ty, RiscvBasicBlock *bb,
RiscvInstr *instr) {
if (ty->tid_ == Type::IntegerTyID || ty->tid_ == Type::PointerTyID) {
++IntRegID;
if (IntRegID > 27)
IntRegID = 18;
return getRegOperand(Register::Int, IntRegID);
} else {
++FloatRegID;
if (FloatRegID > 27)
FloatRegID = 18;
return getRegOperand(Register::Float, FloatRegID);
}
}
void RegAlloca::setPosition(Value *val, RiscvOperand *riscvVal) {
val = this->DSU_for_Variable.query(val);
if (pos.find(val) != pos.end()) {
// std::cerr << "[Warning] Trying overwriting memory address map of value "
// << std::hex << val << " (" << val->name_ << ") ["
// << riscvVal->print() << " -> " << pos[val]->print() << "]"
// << std::endl;
// std::terminate();
}
// std::cerr << "[Debug] [RegAlloca] Map value <" << val->print()
// << "> to operand <" << riscvVal->print() << ">" << std::endl;
pos[val] = riscvVal;
}
RiscvOperand *RegAlloca::findSpecificReg(Value *val, std::string RegName,
RiscvBasicBlock *bb, RiscvInstr *instr,
bool direct) {
val = this->DSU_for_Variable.query(val);
RiscvOperand *retOperand = getRegOperand(RegName);
return findReg(val, bb, instr, 0, 1, retOperand, direct);
}
void RegAlloca::setPositionReg(Value *val, RiscvOperand *riscvReg,
RiscvBasicBlock *bb, RiscvInstr *instr) {
val = this->DSU_for_Variable.query(val);
Value *old_val = getRegPosition(riscvReg);
RiscvOperand *old_reg = getPositionReg(val);
if (old_val != nullptr && old_val != val)
writeback(riscvReg, bb, instr);
if (old_reg != nullptr && old_reg != riscvReg)
writeback(old_reg, bb, instr);
setPositionReg(val, riscvReg);
}
void RegAlloca::setPositionReg(Value *val, RiscvOperand *riscvReg) {
val = this->DSU_for_Variable.query(val);
if (riscvReg->isRegister() == false) {
std::cerr << "[Fatal error] Trying to map value " << std::hex << val
<< " to not a register operand." << std::endl;
std::terminate();
}
// std::cerr << "[Debug] Map register <" << riscvReg->print() << "> to value <"
// << val->print() << ">\n";
curReg[val] = riscvReg;
regPos[riscvReg] = val;
regUsed.insert(riscvReg);
}
RiscvInstr *RegAlloca::writeback(RiscvOperand *riscvReg, RiscvBasicBlock *bb,
RiscvInstr *instr) {
Value *value = getRegPosition(riscvReg);
if (value == nullptr)
return nullptr; // Value not found in map
value = this->DSU_for_Variable.query(value);
// std::cerr << "[Debug] [RegAlloca] Writeback register <" << riscvReg->print()
// << "> to value <" << value->print() << ">.\n";
// Erase map info
regPos.erase(riscvReg);
regFindTimeStamp.erase(riscvReg);
curReg.erase(value);
RiscvOperand *mem_addr = findMem(value);
if (mem_addr == nullptr) {
// std::cerr << "[Debug] [RegAlloca] Writeback ignore alloca pointer direct "
// "access and immvalue.\n";
return nullptr; // Maybe an immediate value or dicrect accessing alloca
}
auto store_type = value->type_;
auto store_instr = new StoreRiscvInst(value->type_, riscvReg, mem_addr, bb);
// Write store instruction
if (instr != nullptr)
bb->addInstrBefore(store_instr, instr);
else
bb->addInstrBack(store_instr);
return store_instr;
}
RegAlloca::RegAlloca() {
// 初始化寄存器对象池。
if (regPool.size() == 0) {
for (int i = 0; i < 32; i++)
regPool.push_back(new RiscvIntReg(new Register(Register::Int, i)));
for (int i = 0; i < 32; i++)
regPool.push_back(new RiscvFloatReg(new Register(Register::Float, i)));
}
// fp 的保护单独进行处理
regUsed.insert(getRegOperand("ra"));
savedRegister.push_back(getRegOperand("ra")); // 保护 ra
// 保护 s1-s11
for (int i = 1; i <= 11; i++)
savedRegister.push_back(getRegOperand("s" + std::to_string(i)));
// 保护 fs0-fs11
for (int i = 0; i <= 11; i++)
savedRegister.push_back(getRegOperand("fs" + std::to_string(i)));
}
RiscvInstr *RegAlloca::writeback(Value *val, RiscvBasicBlock *bb,
RiscvInstr *instr) {
auto reg = getPositionReg(val);
return writeback(reg, bb, instr);
}
Value *RegAlloca::getRegPosition(RiscvOperand *reg) {
if (regPos.find(reg) == regPos.end())
return nullptr;
return this->DSU_for_Variable.query(regPos[reg]);
}
RiscvOperand *RegAlloca::getPositionReg(Value *val) {
val = this->DSU_for_Variable.query(val);
if (curReg.find(val) == curReg.end())
return nullptr;
return curReg[val];
}
RiscvOperand *RegAlloca::findPtr(Value *val, RiscvBasicBlock *bb,
RiscvInstr *instr) {
val = this->DSU_for_Variable.query(val);
if (ptrPos.find(val) == ptrPos.end()) {
std::cerr << "[Fatal Error] Value's pointer position not found."
<< std::endl;
std::terminate();
}
return ptrPos[val];
}
void RegAlloca::writeback_all(RiscvBasicBlock *bb, RiscvInstr *instr) {
std::vector<RiscvOperand *> regs_to_writeback;
for (auto p : regPos)
regs_to_writeback.push_back(p.first);
for (auto r : regs_to_writeback)
writeback(r, bb, instr);
}
void RegAlloca::setPointerPos(Value *val, RiscvOperand *PointerMem) {
val = this->DSU_for_Variable.query(val);
assert(val->type_->tid_ == Type::TypeID::PointerTyID ||
val->type_->tid_ == Type::TypeID::ArrayTyID);
// std::cerr << "SET POINTER: " << val->name_ << "!" << PointerMem->print()
// << "\n";
this->ptrPos[val] = PointerMem;
}
void RegAlloca::clear() {
curReg.clear();
regPos.clear();
safeFindTimeStamp = 0;
regFindTimeStamp.clear();
}

@ -0,0 +1,295 @@
#ifndef REGALLOCH
#define REGALLOCH
#include "riscv.h"
#include <cassert>
template <typename T> class DSU {
private:
std::map<T, T> father;
T getfather(T x) {
return father[x] == x ? x : (father[x] = getfather(father[x]));
}
public:
DSU() = default;
T query(T id) {
// 不存在变量初值为自己
if (father.find(id) == father.end()) {
// std::cerr << std::hex << "[Debug] [DSU] [" << this << "] New value " <<
// id
// << std::endl;
return father[id] = id;
} else {
// std::cerr << std::hex << "[Debug] [DSU] [" << this << "] Find value "
// << id
// << std::endl;
return getfather(id);
}
}
/**
* Merge DSU's node u to v.
* @param u child
* @param v father
*/
void merge(T u, T v) {
u = query(u), v = query(v);
assert(u != nullptr && v != nullptr);
if (u == v)
return;
// std::cerr << std::hex << "[Debug] [DSU] [" << this << "] Merge " << u
// << " to " << v << std::endl;
father[u] = v;
}
};
// 关于额外发射指令问题说明
// 举例如果当前需要使用特定寄存器以a0为例以存取返回值
// 1. 如果当前变量在内存a)
// a)
// 由regalloca发射一个sw指令使用rbb中addInstrback函数将现在的a0送回对应内存或栈上地址
// b) 由regalloca发射一个lw指令使用rbb中addInstrback函数将该变量移入a0中
// 2. 如果该变量在寄存器x中
// a)
// 由regalloca发射一个sw指令使用rbb中addInstrback函数将现在的a0送回对应内存或栈上地址
// b) 由regalloca发射一个mv指令使用rbb中addInstrback函数将该变量从x移入a0中
// 举例:为当前一个未指定寄存器,或当前寄存器堆中没有存放该变量的寄存器。现在需要为该变量找一个寄存器以进行运算
// 存在空寄存器找一个空闲未分配寄存器然后返回一个寄存器指针riscvOperand*
// 不存在空寄存器:
// a) 找一个寄存器
// b)
// 由regalloca发射一个sw指令使用rbb中addInstrback函数将现在该寄存器的数字送回对应内存或栈上地址
// c) 返回该寄存器指针riscvOperand*
// 注意区分指针类型(如*a0和算数值a0的区别
// 每个变量会有一个固定的内存或栈地址,可能会被分配一个固定寄存器地址
extern int IntRegID, FloatRegID; // 测试阶段使用
Register *NamefindReg(std::string reg);
// 辅助函数
// 根据寄存器 riscvReg 的类型返回存储指令的类型
Type *getStoreTypeFromRegType(RiscvOperand *riscvReg);
// RegAlloca类被放置在**每个函数**内,每个函数内是一个新的寄存器分配类。
// 因而约定x8-x9 x18-27、f8-9、f18-27
// 是约定的所有函数都要保护的寄存器,用完要恢复原值
// 其他的寄存器除函数参数所用的a0-a7等寄存器都视为是不安全的可能会在之后的运算中发生变化
// 在该类的实例生存周期内使用到的需要保护的寄存器使用一个vector<Register*>
// 存储
// 寄存器分配IR变量到汇编变量地址映射
// 所有的临时变量均分配在栈上(从当前函数开始的地方开始计算栈地址,相对栈偏移地址),所有的全局变量放置在内存中(首地址+偏移量形式)
// 当存在需要寄存器保护的时候,直接找回原地址去进行
class RegAlloca {
public:
DSU<Value *> DSU_for_Variable;
/**
* Value Value
*
* @param val Value
* @param bb
* @param instr instr
* @param inReg true
* @param load Value true
* @param specified nullptr
* Value
*
* @param direct Value
* findMem
* @return IntegerReg* FloatReg* rs
* @attention Value Alloca load=1
* Alloca
* @attention Value load=1 LI
*
* @attention 使 direct
*/
RiscvOperand *findReg(Value *val, RiscvBasicBlock *bb,
RiscvInstr *instr = nullptr, int inReg = 0,
int load = 1, RiscvOperand *specified = nullptr,
bool direct = true);
/**
* Value offset(rs)
* @param val Value
* @param bb
* @param instr instr
* @param direct direct false 使使
* Value (getElementInstr) findMem()
* t5 0(t5)
* @return IntegerPhiReg* FloatPhiReg* offset(rs)
* (t5)
*/
RiscvOperand *findMem(Value *val, RiscvBasicBlock *bb, RiscvInstr *instr,
bool direct);
/**
* Value offset(rs)
* @attention
* @param val Value
*/
RiscvOperand *findMem(Value *val);
// 实现一个函数,以找到一个当前尚未使用的寄存器以存放某个值。
RiscvOperand *findNonuse(Type *ty, RiscvBasicBlock *bb,
RiscvInstr *instr = nullptr);
/**
* Value
* @param val Value
* @param RegName
* @param bb
* @param instr instr
* @param direct Value
* findMem
* @return rs
* @note Value Alloca load=1
* Alloca
* @note Value load=1 LI
*
* @attention 使 direct
*/
RiscvOperand *findSpecificReg(Value *val, std::string RegName,
RiscvBasicBlock *bb,
RiscvInstr *instr = nullptr,
bool direct = true);
/**
* Value offset(rs)
* @param val Value
* @param riscvVal offset(rs)
* @attention Alloca
*/
void setPosition(Value *val, RiscvOperand *riscvVal);
/**
* Value rs
* @param val Value
* @param riscvReg rs
* @param bb
* @param instr instr
* @attention 使使 findReg
* findSpecificReg
*/
void setPositionReg(Value *val, RiscvOperand *riscvReg, RiscvBasicBlock *bb,
RiscvInstr *instr = nullptr);
/**
* Value rs
* @param val Value
* @param riscvReg rs
* @attention 使使 findReg
* findSpecificReg
*/
void setPositionReg(Value *val, RiscvOperand *riscvReg);
/**
* getElementPtr Value offset(sp)
*
* @param val Value
* @param PointerMem offset(sp)
* @attention offset
*/
void setPointerPos(Value *val, RiscvOperand *PointerMem);
/**
* pos
* @param riscvReg
* @param bb
* @param instr sw
* @return
*/
RiscvInstr *writeback(RiscvOperand *riscvReg, RiscvBasicBlock *bb,
RiscvInstr *instr = nullptr);
/**
* Value pos
* @param val Value
* @param bb
* @param instr sw
* @return
*/
RiscvInstr *writeback(Value *val, RiscvBasicBlock *bb,
RiscvInstr *instr = nullptr);
/**
* reg Value
*/
Value *getRegPosition(RiscvOperand *reg);
/**
* Value reg
*/
RiscvOperand *getPositionReg(Value *val);
/**
*
*/
std::vector<RiscvOperand *> savedRegister;
/**
*
*/
RegAlloca();
// 指针所指向的内存地址
std::map<Value *, RiscvOperand *> ptrPos;
/**
* Value offset(sp)
* @attention bb, instr 使
*/
RiscvOperand *findPtr(Value *val, RiscvBasicBlock *bb,
RiscvInstr *instr = nullptr);
/**
*
* @param bb
* @param instr
*/
void writeback_all(RiscvBasicBlock *bb, RiscvInstr *instr = nullptr);
/**
*
*/
void clear();
/**
* 使
*/
std::set<RiscvOperand *> getUsedReg() { return regUsed; }
private:
std::map<Value *, RiscvOperand *> pos, curReg;
std::map<RiscvOperand *, Value *> regPos;
/**
* SAFE_FIND_LIMIT
*
*/
std::map<RiscvOperand *, int> regFindTimeStamp;
int safeFindTimeStamp = 0;
static const int SAFE_FIND_LIMIT = 3;
/**
* 使
*/
std::set<RiscvOperand *> regUsed;
};
/**
*
*/
static std::vector<RiscvOperand *> regPool;
/**
*
*/
RiscvOperand *getRegOperand(std::string reg);
/**
*
*/
RiscvOperand *getRegOperand(Register::RegType op_ty_, int id);
#endif // !REGALLOCH

@ -0,0 +1,137 @@
#include "riscv.h"
#include "backend.h"
#include "ir.h"
const int REG_NUMBER = 32;
RiscvFunction::RiscvFunction(std::string name, int num_args,
OpTy Ty) // 返回值无返回使用void类型
: RiscvLabel(Function, name), num_args_(num_args), resType_(Ty),
base_(-VARIABLE_ALIGN_BYTE) {
regAlloca = new RegAlloca();
}
// 输出函数对应的全部riscv语句序列
// 由于一个函数可能有若干个出口因而恢复现场的语句根据basic block
// 语句中的ret语句前面附带出现因而不在此出现
std::string RiscvFunction::print() {
// TODO: temporaily add '.global' to declare function
// Don't know if '.type' is needed
std::string riscvInstr =
".global " + this->name_ + "\n" + this->name_ + ":\n"; // 函数标号打印
// 对各个basic block进行拼接
for (auto x : this->blk)
riscvInstr += x->print();
return riscvInstr;
}
std::string RiscvBasicBlock::print() {
std::string riscvInstr = this->name_ + ":\n";
for (auto x : this->instruction)
riscvInstr += x->print();
return riscvInstr;
}
// 出栈顺序和入栈相反
// 建议不使用pop语句直接从栈中取值最后直接修改sp的值即可
// 使用一个单独的return block以防止多出口return
extern int LabelCount;
RiscvOperand::OpTy RiscvOperand::getType() { return tid_; }
bool RiscvOperand::isRegister() { return tid_ == FloatReg || tid_ == IntReg; }
Type *findPtrType(Type *ty) {
while (ty->tid_ == Type::PointerTyID) {
ty = static_cast<PointerType *>(ty)->contained_;
}
while (ty->tid_ == Type::ArrayTyID) {
ty = static_cast<ArrayType *>(ty)->contained_;
}
assert(ty->tid_ == Type::IntegerTyID || ty->tid_ == Type::FloatTyID);
return ty;
}
std::string RiscvGlobalVariable::print(bool print_name, Constant *initVal) {
std::string code = "";
// 如果在调用的第一层,初始化 initVal
if (print_name) {
code += this->name_ + ":\n";
initVal = initValue_;
}
if (initVal == nullptr)
return "\t.zero\t" + std::to_string(this->elementNum_ * 4) + "\n";;
// 如果无初始值或初始值为0IR中有ConstZero类则直接用zero命令
if (dynamic_cast<ConstantZero *>(initVal) != nullptr) {
code += "\t.zero\t" + std::to_string(calcTypeSize(initVal->type_)) + "\n";
return code;
}
// 下面是非零的处理
// 整型
if (initVal->type_->tid_ == Type::TypeID::IntegerTyID) {
code += "\t.word\t" + std::to_string(dynamic_cast<ConstantInt *>(initVal)->value_) + "\n";
return code;
}
// 浮点
else if (initVal->type_->tid_ == Type::TypeID::FloatTyID) {
std::string valString = dynamic_cast<ConstantFloat *>(initVal)->print32();
while (valString.length() < 10)
valString += "0";
code += "\t.word\t" + valString.substr(0, 10) + "\n";
return code;
}
else if (initVal->type_->tid_ == Type::TypeID::ArrayTyID) {
ConstantArray *const_arr = dynamic_cast<ConstantArray *>(initVal);
assert(const_arr != nullptr);
int zeroSpace = calcTypeSize(initVal->type_);
for (auto elements : const_arr->const_array) {
code += print(false, elements);
zeroSpace -= 4;
}
if (zeroSpace)
code += "\t.zero\t" + std::to_string(zeroSpace) + "\n";
return code;
} else {
std::cerr
<< "[Fatal Error] Unknown RiscvGlobalVariable::print() initValue type."
<< std::endl;
std::terminate();
}
}
std::string RiscvGlobalVariable::print() { return print(true, nullptr); }
RiscvFunction *createSyslibFunc(Function *foo) {
if (foo->name_ == "__aeabi_memclr4") {
auto *rfoo = createRiscvFunction(foo);
// 预处理块
auto *bb1 = createRiscvBasicBlock();
bb1->addInstrBack(new MoveRiscvInst(getRegOperand("t5"),
getRegOperand("a0"), bb1));
bb1->addInstrBack(new MoveRiscvInst(getRegOperand("t6"),
getRegOperand("a1"), bb1));
bb1->addInstrBack(new BinaryRiscvInst(RiscvInstr::ADD, getRegOperand("a0"),
getRegOperand("t6"),
getRegOperand("t6"), bb1));
bb1->addInstrBack(
new MoveRiscvInst(getRegOperand("a0"), new RiscvConst(0), bb1));
auto *bb2 = createRiscvBasicBlock();
// 循环块
// 默认clear为全0
bb2->addInstrBack(new StoreRiscvInst(
new Type(Type::TypeID::IntegerTyID), getRegOperand("zero"),
new RiscvIntPhiReg(NamefindReg("t5")), bb2));
bb2->addInstrBack(new BinaryRiscvInst(RiscvInstr::ADDI, getRegOperand("t5"),
new RiscvConst(4),
getRegOperand("t5"), bb1));
bb2->addInstrBack(new ICmpRiscvInstr(ICmpInst::ICMP_SLT,
getRegOperand("t5"),
getRegOperand("t6"), bb2, bb2));
bb2->addInstrBack(new ReturnRiscvInst(bb2));
rfoo->addBlock(bb1);
rfoo->addBlock(bb2);
return rfoo;
}
return nullptr;
}

@ -0,0 +1,343 @@
#ifndef RISCVH
#define RISCVH
class RiscvLabel;
class RiscvBasicBlock;
class RiscvInstr;
class RegAlloca;
class Register;
class RiscvOperand;
const int VARIABLE_ALIGN_BYTE = 8;
#include "ir.h"
#include "string.h"
class RiscvOperand {
public:
enum OpTy {
Void = 0, // 空类型,为无函数返回值专用的类型
IntImm, // 整型立即数
FloatImm, // 浮点立即数
IntReg, // 数值直接保存在整型寄存器
FloatReg, // 数值直接保存在浮点寄存器
IntMem, // 整型M[R(rd)+shift]无寄存器可用x0无偏移可用shift=0
FloatMem, // 浮点,同上
Function, // 调用函数
Block // 基本语句块标号
};
OpTy tid_;
explicit RiscvOperand(OpTy tid) : tid_(tid) {}
~RiscvOperand() = default;
virtual std::string print() = 0;
OpTy getType();
// If this operand is a register, return true.
bool isRegister();
};
// 寄存器堆
class Register {
public:
Register() = default;
~Register() = default;
enum RegType {
Int = 1, // 整型
Float, // 浮点
Stack, // 栈专用
Zero // 零寄存器
};
RegType regtype_;
int rid_; // 寄存器编号
Register(RegType regtype, int rid) : regtype_(regtype), rid_(rid) {}
std::string print() {
using std::to_string;
if (this->regtype_ == Float) {
if (this->rid_ <= 7)
return "ft" + to_string(rid_);
else if (this->rid_ <= 9)
return "fs" + to_string(rid_ - 8);
else if (this->rid_ <= 17)
return "fa" + to_string(rid_ - 10);
else if (this->rid_ <= 27)
return "fs" + to_string(rid_ - 18 + 2);
else if (this->rid_ <= 31)
return "ft" + to_string(rid_ - 28 + 8);
else
return "wtf";
}
// 整型各类输出
switch (this->rid_) {
case 0:
return "zero";
case 1:
return "ra";
case 2:
return "sp";
case 3:
return "gp";
case 4:
return "tp";
case 5:
case 6:
case 7:
return "t" + to_string(this->rid_ - 5);
case 8:
return "fp"; // another name: s0
case 9:
return "s1";
}
if (this->rid_ >= 10 && this->rid_ <= 17)
return "a" + to_string(this->rid_ - 10);
if (this->rid_ >= 18 && this->rid_ <= 27)
return "s" + to_string(this->rid_ - 16);
return "t" + to_string(this->rid_ - 25);
}
};
extern const int REG_NUMBER;
// 常数
class RiscvConst : public RiscvOperand {
public:
int intval;
float floatval;
RiscvConst() = default;
explicit RiscvConst(int val) : RiscvOperand(IntImm), intval(val) {}
explicit RiscvConst(float val) : RiscvOperand(FloatImm), floatval(val) {}
std::string print() {
if (this->tid_ == IntImm)
return std::to_string(intval);
else
return std::to_string(floatval);
}
};
// 整型寄存器直接存储
class RiscvIntReg : public RiscvOperand {
public:
Register *reg_;
RiscvIntReg(Register *reg) : RiscvOperand(IntReg), reg_(reg) {
assert(reg_->regtype_ == Register::Int); // 判断整型寄存器存储
}
std::string print() { return reg_->print(); }
};
class RiscvFloatReg : public RiscvOperand {
public:
Register *reg_;
RiscvFloatReg(Register *reg) : RiscvOperand(FloatReg), reg_(reg) {
assert(reg_->regtype_ == Register::Float); // 判断整型寄存器存储
}
std::string print() { return reg_->print(); }
};
// 需间接寻址得到的数据,整型
class RiscvIntPhiReg : public RiscvOperand {
public:
int shift_;
int isGlobalVariable;
Register *base_;
std::string MemBaseName;
RiscvIntPhiReg(Register *base, int shift = 0, int isGVar = false)
: RiscvOperand(IntMem), base_(base), shift_(shift),
isGlobalVariable(isGVar), MemBaseName(base_->print()) {}
// 内存以全局形式存在的变量(常量)
RiscvIntPhiReg(std::string s, int shift = 0, int isGVar = false)
: RiscvOperand(IntMem), base_(nullptr), shift_(shift), MemBaseName(s),
isGlobalVariable(isGVar) {}
std::string print() {
std::string ans = "";
if (base_ != nullptr)
ans += "(" + base_->print() + ")";
else {
if (isGlobalVariable)
return MemBaseName; // If global variable, use direct addressing
else
ans += "(" + MemBaseName + ")";
}
if (shift_)
ans = std::to_string(shift_) + ans;
return ans;
}
/**
* Return if shift value overflows.
*/
bool overflow() { return std::abs(shift_) >= 1024; }
};
// 需间接寻址得到的数据,浮点
class RiscvFloatPhiReg : public RiscvOperand {
public:
int shift_;
Register *base_;
std::string MemBaseName;
int isGlobalVariable;
RiscvFloatPhiReg(Register *base, int shift = 0, int isGVar = false)
: RiscvOperand(FloatMem), base_(base), shift_(shift),
isGlobalVariable(isGVar), MemBaseName(base_->print()) {}
// 内存以全局形式存在的变量(常量)
RiscvFloatPhiReg(std::string s, int shift = 0, int isGVar = false)
: RiscvOperand(FloatMem), base_(nullptr), shift_(shift), MemBaseName(s),
isGlobalVariable(isGVar) {}
std::string print() {
std::string ans = "";
if (base_ != nullptr)
ans += "(" + base_->print() + ")";
else {
if (isGlobalVariable)
return MemBaseName; // If global variable, use direct addressing
else
ans += "(" + MemBaseName + ")";
}
if (shift_)
ans = std::to_string(shift_) + ans;
return ans;
}
/**
* Return if shift value overflows.
*/
bool overflow() { return std::abs(shift_) >= 1024; }
};
class RiscvLabel : public RiscvOperand {
public:
std::string name_; // 标号名称
~RiscvLabel() = default;
RiscvLabel(OpTy Type, std::string name) : RiscvOperand(Type), name_(name) {
// std::cout << "CREATE A LABEL:" << name << "\n";
}
virtual std::string print() = 0;
};
// 全局变量
// 需指明是浮点还是整型
// 最后拼装
class RiscvGlobalVariable : public RiscvLabel {
public:
bool isConst_;
bool isData; // 是否是给定初值的变量
int elementNum_;
Constant *initValue_;
// 对于一般单个全局变量的定义
RiscvGlobalVariable(OpTy Type, std::string name, bool isConst,
Constant *initValue)
: RiscvLabel(Type, name), isConst_(isConst), initValue_(initValue),
elementNum_(1) {}
// 对于数组全局变量的定义
RiscvGlobalVariable(OpTy Type, std::string name, bool isConst,
Constant *initValue, int elementNum)
: RiscvLabel(Type, name), isConst_(isConst), initValue_(initValue),
elementNum_(elementNum) {}
// 输出全局变量定义
// 根据ir中全局变量定义转化
// 问题在于全局变量如果是数组有初值如何处理
std::string print();
std::string print(bool print_name, Constant *initVal);
};
// 用标号标识函数
// 函数挂靠在module下接入若干条instruction以及function不设置module指针
// 默认不保护现场,如果当寄存器不够的时候再临时压栈
// 父函数调用子函数的参数算在子函数的栈空间内,子函数结束后由子函数清除这部分栈空间
class RiscvFunction : public RiscvLabel {
public:
RegAlloca *regAlloca;
int num_args_;
OpTy resType_;
std::vector<RiscvOperand *> args;
RiscvFunction(std::string name, int num_args,
OpTy Ty); // 返回值无返回使用void类型
void setArgs(int ind, RiscvOperand *op) {
assert(ind >= 0 && ind < args.size());
args[ind] = op;
}
void deleteArgs(int ind) {
assert(ind >= 0 && ind < args.size());
args[ind] = nullptr;
}
~RiscvFunction() = default;
std::string printname() { return name_; }
std::vector<RiscvBasicBlock *> blk;
bool is_libfunc() {
if (name_ == "putint" || name_ == "putch" || name_ == "putarray" ||
name_ == "_sysy_starttime" || name_ == "_sysy_stoptime" ||
name_ == "__aeabi_memclr4" || name_ == "__aeabi_memset4" ||
name_ == "__aeabi_memcpy4" || name_ == "getint" || name_ == "getch" ||
name_ == "getarray" || name_ == "getfloat" || name_ == "getfarray" ||
name_ == "putfloat" || name_ == "putfarray" ||
name_ == "llvm.memset.p0.i32") {
return true;
} else
return false;
}
std::map<RiscvOperand *, int>
argsOffset; // 函数使用到的参数含调用参数、局部变量和返回值在栈中位置。需满足字对齐4的倍数
// 届时将根据该函数的参数情况决定sp下移距离
void addArgs(RiscvOperand *val) { // 在栈上新增操作数映射
if (argsOffset.count(val) == 0) {
argsOffset[val] = base_;
base_ -= VARIABLE_ALIGN_BYTE;
}
}
int querySP() { return base_; }
void setSP(int SP) { base_ = SP; }
void addTempVar(RiscvOperand *val) {
addArgs(val);
tempRange += VARIABLE_ALIGN_BYTE;
}
void shiftSP(int shift_value) { base_ += shift_value; }
void storeArray(int elementNum) {
if(elementNum & 7) {
elementNum += 8 - (elementNum & 7); // Align to 8 byte.
}
base_ -= elementNum;
}
void deleteArgs(RiscvOperand *val) { argsOffset.erase(val); } // 删除一个参数
// 默认所有寄存器不保护
// 如果这个时候寄存器不够了,则临时把其中一个寄存器对应的值压入栈上,等函数结束的时候再恢复
// 仅考虑函数内部SP相对关系而不要计算其绝对关系
void saveOperand(RiscvOperand *val) {
storedEnvironment[val] = base_;
argsOffset[val] = base_;
base_ -= VARIABLE_ALIGN_BYTE;
}
int findArgs(RiscvOperand *val) { // 查询栈上位置
if (argsOffset.count(val) == 0)
addArgs(val);
return argsOffset[val];
}
void ChangeBlock(RiscvBasicBlock *bb, int ind) {
assert(ind >= 0 && ind < blk.size());
blk[ind] = bb;
}
void addBlock(RiscvBasicBlock *bb) { blk.push_back(bb); }
std::string
print(); // 函数语句需先push保护现场然后pop出需要的参数再接入各block
private:
int base_;
int tempRange; // 局部变量的数量,需要根据这个数量进行栈帧下移操作
std::map<RiscvOperand *, int>
storedEnvironment; // 栈中要保护的地址。该部分需要在函数结束的时候全部恢复
};
class RiscvModule {
public:
std::vector<RiscvFunction *> func_;
std::vector<RiscvGlobalVariable *> globalVariable_;
void addFunction(RiscvFunction *foo) { func_.push_back(foo); }
void addGlobalVariable(RiscvGlobalVariable *g) {
globalVariable_.push_back(g);
}
};
Type *findPtrType(Type *ty);
RiscvFunction *createSyslibFunc(Function *foo);
#endif // !RISCVH
Loading…
Cancel
Save