From b7dca4cc3809de0e3e6676fb1317ad50af227e3d Mon Sep 17 00:00:00 2001 From: huangjielun <2872405629@qq.com> Date: Wed, 24 Aug 2022 11:28:35 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=90=E4=BA=86Getword()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 词法分析/.vscode/c_cpp_properties.json | 18 ++ 词法分析/.vscode/launch.json | 24 +++ 词法分析/.vscode/settings.json | 34 ++++ 词法分析/gets.h | 22 ++ 词法分析/lexer.cpp | 221 +++++++++++++++++++++ 5 files changed, 319 insertions(+) create mode 100644 词法分析/.vscode/c_cpp_properties.json create mode 100644 词法分析/.vscode/launch.json create mode 100644 词法分析/.vscode/settings.json create mode 100644 词法分析/gets.h create mode 100644 词法分析/lexer.cpp diff --git a/词法分析/.vscode/c_cpp_properties.json b/词法分析/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..5d0d605 --- /dev/null +++ b/词法分析/.vscode/c_cpp_properties.json @@ -0,0 +1,18 @@ +{ + "configurations": [ + { + "name": "windows-gcc-x64", + "includePath": [ + "${workspaceFolder}/**" + ], + "compilerPath": "D:/computer/C++/Dev-Cpp/MinGW64/bin/gcc.exe", + "cStandard": "${default}", + "cppStandard": "${default}", + "intelliSenseMode": "windows-gcc-x64", + "compilerArgs": [ + "" + ] + } + ], + "version": 4 +} \ No newline at end of file diff --git a/词法分析/.vscode/launch.json b/词法分析/.vscode/launch.json new file mode 100644 index 0000000..8faaa78 --- /dev/null +++ b/词法分析/.vscode/launch.json @@ -0,0 +1,24 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "C/C++ Runner: Debug Session", + "type": "cppdbg", + "request": "launch", + "args": [], + "stopAtEntry": false, + "externalConsole": true, + "cwd": "e:/编译原理/compiler/compiler/词法分析", + "program": "e:/编译原理/compiler/compiler/词法分析/build/Debug/outDebug", + "MIMode": "gdb", + "miDebuggerPath": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ] + } + ] +} \ No newline at end of file diff --git a/词法分析/.vscode/settings.json b/词法分析/.vscode/settings.json new file mode 100644 index 0000000..00d85fe --- /dev/null +++ b/词法分析/.vscode/settings.json @@ -0,0 +1,34 @@ +{ + "C_Cpp_Runner.cCompilerPath": "gcc", + "C_Cpp_Runner.cppCompilerPath": "g++", + "C_Cpp_Runner.debuggerPath": "gdb", + "C_Cpp_Runner.cStandard": "", + "C_Cpp_Runner.cppStandard": "", + "C_Cpp_Runner.msvcBatchPath": "", + "C_Cpp_Runner.useMsvc": false, + "C_Cpp_Runner.warnings": [ + "-Wall", + "-Wextra", + "-Wpedantic" + ], + "C_Cpp_Runner.enableWarnings": true, + "C_Cpp_Runner.warningsAsError": false, + "C_Cpp_Runner.compilerArgs": [], + "C_Cpp_Runner.linkerArgs": [], + "C_Cpp_Runner.includePaths": [], + "C_Cpp_Runner.includeSearch": [ + "*", + "**/*" + ], + "C_Cpp_Runner.excludeSearch": [ + "**/build", + "**/build/**", + "**/.*", + "**/.*/**", + "**/.vscode", + "**/.vscode/**" + ], + "files.associations": { + "*.tcc": "cpp" + } +} \ No newline at end of file diff --git a/词法分析/gets.h b/词法分析/gets.h new file mode 100644 index 0000000..9265d41 --- /dev/null +++ b/词法分析/gets.h @@ -0,0 +1,22 @@ +char getfdstr(char filename[],char data[]) +{ + int size = 0; + char letter[Max] = ""; + char w; + int length = 0; + freopen(filename, "r",stdin); + while(cin>>w) + { + if(w!='') + { + letter[length] = w; + length++; + } + } + letter[length] = '\0'; + size = strlen(letter); + char *result = new char[size]; + strcpy_s(result,size+1,letter); + strcpy_s(data,size+1,result); + return *result; +} \ No newline at end of file diff --git a/词法分析/lexer.cpp b/词法分析/lexer.cpp new file mode 100644 index 0000000..ecd2627 --- /dev/null +++ b/词法分析/lexer.cpp @@ -0,0 +1,221 @@ +#include +#include +#include +#include +#include +#include"gets.h" + +#define MAX 4000 + +using namespace std; + +enum +{ + UNDERLINE = 0,//下划线 + LETTER = 1,//字母 + NUMBER = 2,//数字 + SYMBOL = 3,//符号 + SYMBOLERROR = 4,//错误符号 + IDENTIFIERERROR = 5//错误标识符 +}; + +char data[MAX]; +char letter[MAX]; + +bool isfloat = false;//浮点数判断 +bool isbool = false;//布尔判断 +bool notes = false;//注释判断 +bool isvariable = false;//标识符判断 +bool isiderror = false;//错误标识符判断 + +//关键字及其对应种别码与助记符 +string key[32]={"char","double","enum","float","int","long","short","signed", + "struct","union","unsigned","void","for","do","while","break","continue", + "if","else","goto","switch","case","default","return","auto","extern","register", + "static","const","sizeof","typedef","volatile"}; +int keyNum[32]={1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}; +string keyword[32]={"CHAR","DOUBLE","ENUM","FLOAT","INT","LONG","SHORT","SIGNED", + "STRUCT","UNION","UNSIGNED","VOID","FOR","DO","WHILE","BREAK","CONTINUE", + "IF","ELSE","GOTO","SWITCH","CASE","DEFAULT","RETURN","AUTO","EXTERN","REGISTER", + "STATIC","CONST","SIZEOF","TYPEDEF","VOLATILE"}; + +//符号的种别码与助记符 +string symbol[33]={"+","-","*","/","%","++","--",">","<","==", + "!=",">=","<=","&&","||","!","=","+=","-=", + "*=","/=","%=",",","(",")","[","]","{","}", + ";","/*","*/","'"}; +int symbolNum[33]={33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66}; +string symbolword[33]={"PLUS","MINUS","MULTI","RDIV","MODULO","INC","DEC","GT","LT","EQ","NEQ", + "GE","LE","AND","OR","NOT","ASSIGN","PLUS_A","MINUS_A","MUL_A","DIV_A", + "MOD_A","COMMA","LR_BRAC","RR_BRAC","LS_BRAC","RS_BRAC","L_PRA","R_PRA", + "SEMIC","L_ANNO","R_ANNO","QMARK"}; + +int num; +int length; + +void Getword();//处理data里的各类字符 +void error(FILE *fp,int type,string word); + +string identify(char s,int n);//返回标识符 +string Number(char s,int n);//返回数字 +string symbolstr(char s,int n);//返回符号 +string variable(char s,int n);//返回标识符 + +string Keyword(int n);//根据关键词种别返回关键词 +string Symbolword(int n);//根据符号种别返回符号 + +bool isNum(char s);//判断是否是数字 +bool isLetter(char s);//判断是否是字母 +bool issymbol(char s);//判断是否是符号 +bool isBool(string s);//因为表里没有bool关键字 但有布尔类型 所以额外加了一个判断 + +int wordtype(char str);//字符类型判断 +int iskeyword(string s);//返回关键词种别码 +int isSymbol(string s);//返回符号种别码 + +int main() +{ + char filename[MAX] = "input.txt";//根据输入文件名修改 + getfdstr(filename,data);//预处理,去除输入文件中的空格,存在data里 + length = strlen(data); + Getword(); + getch(); +} + +void Getword() +{ + FILE *fp; + int key = 0; + int count = 0; + + fp = fopen("output.txt", "w"); + if (fp == NULL) + { + printf("Error: Couldn't open output file\n"); + system("pause"); + exit(0); + } + for(num = 0; num < length;) + { + char str; + string word; + str = data[num]; + key = wordtype(str); + switch(key) + { + case UNDERLINE: + word = variable(str,num); + printf("%s (%s,70) 标识符\n",word.c_str(),word.c_str()); + fprintf(fp,"%s (%s,70) 标识符\n",word.c_str(),word.c_str()); + isvariable = false; + break; + + case LETTER: + word = identify(str,num); + if(notes) break; + else if (isvariable) + { + if(iskeyword(word)) + { + printf("%s (%s,%d) 关键字\n",word.c_str(),Keyword(iskeyword(word)).c_str(),iskeyword(word)); + fprintf(fp,"%s (%s,%d) 关键字\n",word.c_str(),Keyword(iskeyword(word)).c_str(),iskeyword(word)); + } + else + { + printf("%s (IDE,70) 标识符\n",word.c_str()); + fprintf(fp,"%s (IDE,70) 标识符\n",word.c_str()); + } + break; + } + else + { + if(!word.compare("bool")) + { + printf("%s (BOOL,%d) 关键字\n",word.c_str(),67); + fprintf(fp,"%s (BOOL,%d) 关键字\n",word.c_str(),67); + } + else if(isbool) + { + printf("%s (CONDST_BOOL,%d) 布尔型\n",word.c_str(),67); + fprintf(fp,"%s (CONDST_BOOL,%d) 布尔型\n",word.c_str(),67); + isbool = false; + } + else + { + printf("%s (%s,70) 标识符\n",word.c_str(),word.c_str()); + fprintf(fp,"%s (%s,70) 标识符\n",word.c_str(),word.c_str()); + isvariable = false; + } + break; + } + + case NUMBER: + word = Number(str,num); + if(notes) break; + else if(isiderror) + { + error(fp,IDENTIFIERERROR,word); + break; + } + else + { + if(isfloat) + { + printf("%s (CONST_FLOAT,69) 浮点型\n",word.c_str()); + fprintf(fp,"%s (CONST_FLOAT,69) 浮点型\n",word.c_str()); + isfloat = false; + } + else + { + printf("%s (CONST_INT,68) 整型\n",word.c_str()); + fprintf(fp,"%s (CONST_INT,68) 整型\n",word.c_str()); + } + break; + } + case SYMBOL: + word = symbolstr(str,num); + if(notes) + { + if(count == 0) + { + printf("%s (%s,%d) 界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word)); + fprintf(fp,"%s (%s,%d) 界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word)); + count++; + } + break; + } + else + { + if(isSymbol(word) == SYMBOLERROR) + { + error(fp,SYMBOLERROR,word); + break; + } + if(!word.compare("+")||!word.compare("-")||!word.compare("*")||!word.compare("/")||!word.compare("%")|| + !word.compare("++")||!word.compare("--")||!word.compare(">")||!word.compare("<")||!word.compare("==")|| + !word.compare("!=")||!word.compare(">=")||!word.compare("<=")||!word.compare("&&")||!word.compare("||")|| + !word.compare("!")||!word.compare("=")||!word.compare("+=")||!word.compare("-=")||!word.compare("*=")|| + !word.compare("/=")||!word.compare("%=")) + { + printf("%s (%sk,%d) 运算符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word)); + fprintf(fp,"%s (%sk,%d) 运算符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word)); + break; + } + if(!word.compare("*/")) + { + printf("————内容被注释————\n"); + fprintf(fp,"————内容被注释————\n"); + printf("%s (%s,%d) 界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word)); + fprintf(fp,"%s (%s,%d) 界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word)); + count = 0; + break; + } + printf("%s (%s,%d) 界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word)); + fprintf(fp,"%s (%s,%d) 界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word)); + break; + } + } + + } + fclose(fp); +} \ No newline at end of file