From faf16ff4bd2086244fc7d0e5b092e350d3c62c2e Mon Sep 17 00:00:00 2001 From: wqz <1197460504@qq.com> Date: Wed, 26 Apr 2023 20:32:47 +0800 Subject: [PATCH] =?UTF-8?q?=E7=94=9F=E6=88=90=E6=B1=87=E7=BC=96=E4=BB=A3?= =?UTF-8?q?=E7=A0=81=EF=BC=8C=E5=88=9D=E5=A7=8B=E7=89=88=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/CMakeLists.txt | 5 + src/IR.h | 10 ++ src/backend/codegen.cpp | 331 ++++++++++++++++++++++++++++++++++++++++ src/backend/codegen.hpp | 252 ++++++++++++++++++++++++++++++ src/sysyc.cpp | 32 +++- test/01_add.sy | 10 ++ 6 files changed, 634 insertions(+), 6 deletions(-) create mode 100644 src/backend/codegen.cpp create mode 100644 src/backend/codegen.hpp create mode 100644 test/01_add.sy diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 84d5d47..d372566 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -11,13 +11,18 @@ add_library(SysYParser SHARED ${ANTLR_SysYGen_CXX_OUTPUTS}) target_include_directories(SysYParser PUBLIC ${ANTLR_RUNTIME}/runtime/src) target_link_libraries(SysYParser PUBLIC antlr4_shared) +#backend/aarch32_target.hpp backend/codegen.hpp backend/target_info.hpp add_executable(sysyc sysyc.cpp IR.cpp SysYIRGenerator.cpp Diagnostic.cpp + backend/codegen.cpp ) target_include_directories(sysyc PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +target_include_directories(sysyc PRIVATE "backend") +target_include_directories(sysyc PRIVATE ".") +target_compile_options(sysyc PRIVATE -Wall -Wno-unused-parameter -Wno-unused-function -Wno-unused-variable) target_link_libraries(sysyc PRIVATE SysYParser) set(THREADS_PREFER_PTHREAD_FLAG ON) diff --git a/src/IR.h b/src/IR.h index 8280c04..82b37f3 100644 --- a/src/IR.h +++ b/src/IR.h @@ -1053,6 +1053,16 @@ namespace sysy public: void print(std::ostream &os) const; + + public: + std::map *getFunctions() + { + return &functions; + } + std::map *getGlobalValues() + { + return &globals; + } }; // class Module /*! diff --git a/src/backend/codegen.cpp b/src/backend/codegen.cpp new file mode 100644 index 0000000..f34d6de --- /dev/null +++ b/src/backend/codegen.cpp @@ -0,0 +1,331 @@ +#include "codegen.hpp" + +namespace backend +{ + using RegId = RegManager::RegId; + + string CodeGen::code_gen() + { + string code; + code += module_gen(module); + return code; + } + + string CodeGen::module_gen(Module *module) + { + string code; + string dataCode; + string textCode; + // clear last module's label record + clearModuleRecord(module); + // generate asmcode for all global values + dataCode += globaldata_gen(); + + code += space + ".arch armv7ve " + endl; + code += space + ".text " + endl; + auto functions = module->getFunctions(); + + for (auto iter = functions->begin(); iter != functions->end(); ++iter) + { + string name = iter->first; + Function *func = iter->second; + auto bblist = func->getBasicBlocks(); + if (bblist.empty()) + continue; + // generate asmcode for each function + textCode += function_gen(func) + endl; + } + code += (dataCode + textCode + endl); + return code; + } + + string CodeGen::functionHead_gen(Function *func) + { + string code; + code += space + ".globl " + func->getName() + endl; + code += space + ".p2align " + std::to_string(int_p2align) + endl; + code += space + ".type " + func->getName() + ", %function" + endl; + code += func->getName() + ":" + endl; + return code; + } + /** + * stack structure: + * + * last function stack + * -------------------------- <-- fp point + * callee preserved regs (include fp, sp, lr) + * -------------------------- + * tempory variable(ir inst) + * -------------------------- + * arg0,arg1,arg2,arg3(as tempory variable) + * -------------------------- + * dynamic stack(by AllocaInst) ******** + * -------------------------- + * caller preserved regs + * -------------------------- + * arg4 ~ argX + * -------------------------- <-- sp point + * next function stack + * + */ + /** + * prologue : + * preserve callee-saved register (lr, fp and other callee-saved regs) + * set new fp + * alloc stack space for local var/ args / return value. + * store args to stack + * */ + string CodeGen::prologueCode_gen(Function *func) + { + string code; + /** + *code in here + */ + return code; + } + + /* epilogue : + * free stack space + * restore sp + * restore used callee-saved register(lr, fp and other callee-saved regs) + * bx lr + */ + string CodeGen::epilogueCode_gen(Function *func) + { + string code; + /** + *code in here + */ + return code; + } + + string CodeGen::function_gen(Function *func) + { + curFunc = func; + clearFunctionRecord(func); + string bbCode; + auto bbs = func->getBasicBlocks(); + for (auto iter = bbs.begin(); iter != bbs.end(); ++iter) + { + auto bb = iter->get(); + bbCode += basicBlock_gen(bb); + } + string code; + string funcHead = functionHead_gen(func); + string prologueCode = prologueCode_gen(func); + string epilogueCode = epilogueCode_gen(func); + string literalPoolsCode = literalPoolsCode_gen(func); + // + code = funcHead + prologueCode + bbCode + + epilogueCode + literalPoolsCode; + return code; + } + + string CodeGen::basicBlock_gen(BasicBlock *bb) + { + curBB = bb; + string bbLabel = getBBLabel(bb); + string code; + code += bbLabel + ":" + endl; + for (auto &instr : bb->getInstructions()) + { + auto instrType = instr->getKind(); + code += instruction_gen(instr.get()); + } + return code; + } + /** + * RegId : binaryInst_gen returns RegId as its destination operand + * code : asmcode generated by binaryInst_gen + */ + pair CodeGen::binaryInst_gen(BinaryInst *bInst, RegId dstRegId) + { + string code; + /** + *code in here + */ + return {dstRegId, code}; + } + + pair CodeGen::unaryInst_gen(UnaryInst *uInst, RegId dstRegId) + { + string code; + /** + *code in here + */ + return {dstRegId, code}; + } + pair + CodeGen::allocaInst_gen(AllocaInst *aInst, RegManager::RegId dstRegId) + { + string code; + /** + *code in here + */ + return {dstRegId, code}; + } + + string CodeGen::storeInst_gen(StoreInst *stInst) + { + string code; + /** + *code in here + */ + return code; + } + pair + CodeGen::loadInst_gen(LoadInst *ldInst, RegId dstRegId) + { + string code; + /** + *code in here + */ + return {dstRegId, code}; + } + string CodeGen::returnInst_gen(ReturnInst *retInst) + { + string code; + /** + *code in here + */ + return code; + } + string CodeGen::uncondBrInst_gen(UncondBrInst *ubInst) + { + string code; + /** + *code in here + */ + return code; + } + string CodeGen::condBrInst_gen(CondBrInst *cbInst) + { + string code; + /** + *code in here + */ + return code; + } + pair + CodeGen::callInst_gen(CallInst *callInst, RegId dstRegId) + { + string code; + /** + *code in here + */ + return {dstRegId, code}; + } + + string CodeGen::instruction_gen(Instruction *instr) + { + string code; + string unkName = instr->getName(); + RegManager::RegId dstRegId = RegManager::RNONE; + auto instrType = instr->getKind(); + pair tmp; + switch (instrType) + { + // binary inst + case Instruction::kAdd: + case Instruction::kMul: + case Instruction::kSub: + { + BinaryInst *bInst = dynamic_cast(instr); + // registers are used only for instruction operation, consider use which register (any one that is free for use) + tmp = binaryInst_gen(bInst, RegManager::RANY); + code += tmp.second; + dstRegId = tmp.first; + break; + } + case Instruction::kLoad: + { + LoadInst *ldInst = dynamic_cast(instr); + tmp = loadInst_gen(ldInst, RegManager::RANY); + code += M_emitComment("load inst"); + code += tmp.second; + dstRegId = tmp.first; + break; + } + case Instruction::kStore: + { + StoreInst *stInst = dynamic_cast(instr); + code += M_emitComment("store inst"); + code += storeInst_gen(stInst); + return code; + break; + } + case Instruction::kAlloca: + { + AllocaInst *aInst = dynamic_cast(instr); + tmp = allocaInst_gen(aInst, RegManager::RANY); + code += M_emitComment("alloca inst"); + code += tmp.second; + dstRegId = tmp.first; + break; + } + case Instruction::kReturn: + { + ReturnInst *retInst = dynamic_cast(instr); + code += M_emitComment("return inst"); + code += returnInst_gen(retInst); + return code; + break; + } + case Instruction::kCall: + { + CallInst *cInst = dynamic_cast(instr); + auto tmp = callInst_gen(cInst, RegManager::RANY); + code += tmp.second; + dstRegId = tmp.first; + if (dstRegId == RegManager::R0) + return code; + break; + } + case Instruction::kBr: + { + UncondBrInst *ubInst = dynamic_cast(instr); + code += uncondBrInst_gen(ubInst); + return code; + break; + } + case Instruction::kCondBr: + { + CondBrInst *cbInst = dynamic_cast(instr); + code += condBrInst_gen(cbInst); + return code; + break; + } + default: + { + code += "ERROR CODE : instruction " + + unkName + " is not implementation" + endl; + break; + } + } + if (!instr->getType()->isVoid()) + { + code += storeRegToStack_gen(dstRegId, instr); + // regm.freeReg(dstRegId);//TODO : code in here. + } + return code; + } + // + string CodeGen::globaldata_gen() + { + string asmCode; + /** + *code in here + */ + return asmCode; + } + + string CodeGen::literalPoolsCode_gen(Function *func) + { + string code; + /** + *code in here + */ + return code; + } + +} // namespace backend diff --git a/src/backend/codegen.hpp b/src/backend/codegen.hpp new file mode 100644 index 0000000..716d485 --- /dev/null +++ b/src/backend/codegen.hpp @@ -0,0 +1,252 @@ +/** + * TODO : 简单的寄存器分配方法 处理分配不到寄存器的情况. + * + * 后端设计需要注意的点 + * + * label分配 : 全局变量的label,常量的label,basicblock的label,函数入口label,函数出口label + * 全局数据处理 + * 常量处理 + * 临时变量的处理(包括函数参数也是临时变量) + * 函数参数的处理 + * 块参数的处理 + * 函数栈空间结构的设计 + * 块的遍历问题 + * arm32的过程调用约定 + * + * 后端测试需要注意的点 + * + * 如何汇编与链接. + * qemu以及树莓派如何运行. + */ +#ifndef __CODEGEN_HPP_ +#define __CODEGEN_HPP_ +#include +#include +#include +#include +#include +#include + +using namespace sysy; +using std::find; +using std::map; +using std::pair; +using std::set; +using std::string; +using std::to_string; +using std::vector; + +#define Macro_ERROR_MSG(...) \ + do \ + { \ + fprintf(stderr, "ERROR in line-%d of %s: ", __LINE__, __FILE__); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + fflush(stderr); \ + exit(1); \ + } while (0); + +#define M_emitInst(STR) (space + STR + endl) +#define M_emitComment(STR) (space + "//" + STR + endl) +#define M_emitLabel(STR) (STR + ":" + endl) + +// #define REG_EXP + +namespace backend +{ + static const string space = string(4, ' '); + static const string endl = "\n"; + // only for armv7 + static const int int_align = 4; + static const int int_size = 4; + static const int int_p2align = 2; + static const int reg_size = 4; + + // default arm32 max imm + // + static const uint32_t maxMovImm = 0xFFF; + static const string stackIndexReg = "fp"; + + static uint16_t getWordHigh(uint32_t cval) + { + return (cval >> 16) & 0xFFFF; + } + static uint16_t getWordLow(uint32_t cval) + { + return cval & 0xFFFF; + } + // + static string + emitInst_1srcR_noDstR(string name, string srcReg) + { + return space + name + " " + srcReg + endl; + } + static string + emitInst_2srcR_1dstR(string name, string srcReg0, string srcReg1, string dstReg) + { + return space + name + " " + dstReg + ", " + + srcReg0 + ", " + srcReg1 + endl; + } + // + class RegManager + { + public: + //{0,1,2,3,4,5,6,7,8,9,10}; + enum RegId : unsigned + { + R0 = 0, + R1 = 1, + R2 = 2, + R3 = 3, + R4 = 4, + R5 = 5, + R6 = 6, + R7 = 7, + R8 = 8, + R9 = 9, + R10 = 10, + RNONE = 1024, + RANY = 2048, + }; + static string toString(RegId reg) + { + if (reg == RNONE) + return "RNONE"; + if (reg == RANY) + return "RANY"; + return "r" + to_string(reg); + } + }; + + class Operand + { + public: + using RegId = RegManager::RegId; + enum Kind + { + kReg, + kImm, + }; + Kind kind; + union + { + uint32_t imm; + RegId regId; + }; + Operand(){}; + Operand(uint32_t imm) : kind(kImm), imm(imm) {} + Operand(RegId regId) : kind(kReg), regId(regId) {} + bool isImm(void) { return kind == kImm; } + bool isReg(void) { return kind == kReg; } + uint32_t getImm(void) + { + assert(kind == kImm); + return imm; + } + RegId getRegId(void) + { + assert(kind == kReg); + return regId; + } + string toString(void) + { + if (kind == kImm) + return "#" + to_string(imm); + else + return RegManager::toString(regId); + } + }; + + class CodeGen + { + public: + using RegId = RegManager::RegId; + + private: + Module *module; + Function *curFunc; + BasicBlock *curBB; + // + RegManager regm; + // globalValue + bool loadGlobalValByMOVWT = true; + // basicBlock + vector linear_bb; + int bb_no = 0; + // function params, return value and localVar + map paramsStOffset; + map localVarStOffset; + int retValueStOffset = 0; + size_t stOffsetAcc = 0; + // label manager + map bb_labels; + uint64_t label_no = 0; + + public: + CodeGen(Module *module) : module(module) {} + // code_gen function list + string code_gen(); + string module_gen(Module *module); + string function_gen(Function *func); + string basicBlock_gen(BasicBlock *bb); + string instruction_gen(Instruction *instr); + string globaldata_gen(); + string prologueCode_gen(Function *func); + string epilogueCode_gen(Function *func); + string literalPoolsCode_gen(Function *func); + string functionHead_gen(Function *func); + // Module + void clearModuleRecord(Module *module) + { + label_no = 0; + } + // instruction gen function list + // + pair loadInst_gen(LoadInst *ldInst, RegId dstRegId); + string storeInst_gen(StoreInst *stInst); + pair allocaInst_gen(AllocaInst *aInst, RegId dstRegId); + string returnInst_gen(ReturnInst *retInst); + pair callInst_gen(CallInst *retInst, RegId dstRegId); + pair binaryInst_gen(BinaryInst *bInst, RegId dstRegId); + pair unaryInst_gen(UnaryInst *bInst, RegId dstRegId); + string uncondBrInst_gen(UncondBrInst *ubInst); + string condBrInst_gen(CondBrInst *ubInst); + + // + string storeRegToStack_gen(RegId regId, Instruction *inst) + { + string code; + /** + *code in here + */ + return code; + } + // function + void clearFunctionRecord(Function *func) + { + localVarStOffset.clear(); + paramsStOffset.clear(); + retValueStOffset = 0; + bb_labels.clear(); + // + stOffsetAcc = 0; + } + string getBBLabel(BasicBlock *bb) + { + auto t = bb_labels.find(bb); + string label; + if (t == bb_labels.end()) + { + label = ".LBB_" + to_string(label_no++); + bb_labels.emplace(bb, label); + } + else + { + label = t->second; + } + return label; + } + }; +} + +#endif //__CODEGEN_HPP_ diff --git a/src/sysyc.cpp b/src/sysyc.cpp index a865a7e..239c51a 100644 --- a/src/sysyc.cpp +++ b/src/sysyc.cpp @@ -8,15 +8,25 @@ using namespace std; using namespace antlr4; // #include "SysYFormatter.h" #include "SysYIRGenerator.h" +#include "backend/codegen.hpp" using namespace sysy; +using backend::CodeGen; -int main(int argc, char **argv) { - if (argc != 2) { - cerr << "Usage: " << argv[0] << "inputfile\n"; +int main(int argc, char **argv) +{ + if (argc > 3) + { + cerr << "Usage: " << argv[0] << "inputfile [ir]\n"; return EXIT_FAILURE; } + bool genir = false; + if (argc > 2) + { + genir = true; + } ifstream fin(argv[1]); - if (not fin) { + if (not fin) + { cerr << "Failed to open file " << argv[1]; return EXIT_FAILURE; } @@ -29,7 +39,17 @@ int main(int argc, char **argv) { SysYIRGenerator generator; generator.visitModule(moduleAST); auto moduleIR = generator.get(); - moduleIR->print(cout); - + // only generate SysY IR code + if (genir) + { + moduleIR->print(cout); + return EXIT_SUCCESS; + } + + CodeGen codegen(moduleIR); + string asmCode = codegen.code_gen(); + cout << asmCode << endl; + ; + return EXIT_SUCCESS; } \ No newline at end of file diff --git a/test/01_add.sy b/test/01_add.sy new file mode 100644 index 0000000..dc2bbf9 --- /dev/null +++ b/test/01_add.sy @@ -0,0 +1,10 @@ +//test add + + +int main(){ + int a, b; + a = 10; + b = 2; + int c = a; + return a + b + c; +}