From b7dca4cc3809de0e3e6676fb1317ad50af227e3d Mon Sep 17 00:00:00 2001
From: huangjielun <2872405629@qq.com>
Date: Wed, 24 Aug 2022 11:28:35 +0800
Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=90=E4=BA=86Getword()?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 词法分析/.vscode/c_cpp_properties.json |  18 ++
 词法分析/.vscode/launch.json           |  24 +++
 词法分析/.vscode/settings.json         |  34 ++++
 词法分析/gets.h                        |  22 ++
 词法分析/lexer.cpp                     | 221 +++++++++++++++++++++
 5 files changed, 319 insertions(+)
 create mode 100644 词法分析/.vscode/c_cpp_properties.json
 create mode 100644 词法分析/.vscode/launch.json
 create mode 100644 词法分析/.vscode/settings.json
 create mode 100644 词法分析/gets.h
 create mode 100644 词法分析/lexer.cpp

diff --git a/词法分析/.vscode/c_cpp_properties.json b/词法分析/.vscode/c_cpp_properties.json
new file mode 100644
index 0000000..5d0d605
--- /dev/null
+++ b/词法分析/.vscode/c_cpp_properties.json
@@ -0,0 +1,18 @@
+{
+  "configurations": [
+    {
+      "name": "windows-gcc-x64",
+      "includePath": [
+        "${workspaceFolder}/**"
+      ],
+      "compilerPath": "D:/computer/C++/Dev-Cpp/MinGW64/bin/gcc.exe",
+      "cStandard": "${default}",
+      "cppStandard": "${default}",
+      "intelliSenseMode": "windows-gcc-x64",
+      "compilerArgs": [
+        ""
+      ]
+    }
+  ],
+  "version": 4
+}
\ No newline at end of file
diff --git a/词法分析/.vscode/launch.json b/词法分析/.vscode/launch.json
new file mode 100644
index 0000000..8faaa78
--- /dev/null
+++ b/词法分析/.vscode/launch.json
@@ -0,0 +1,24 @@
+{
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "C/C++ Runner: Debug Session",
+      "type": "cppdbg",
+      "request": "launch",
+      "args": [],
+      "stopAtEntry": false,
+      "externalConsole": true,
+      "cwd": "e:/编译原理/compiler/compiler/词法分析",
+      "program": "e:/编译原理/compiler/compiler/词法分析/build/Debug/outDebug",
+      "MIMode": "gdb",
+      "miDebuggerPath": "gdb",
+      "setupCommands": [
+        {
+          "description": "Enable pretty-printing for gdb",
+          "text": "-enable-pretty-printing",
+          "ignoreFailures": true
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/词法分析/.vscode/settings.json b/词法分析/.vscode/settings.json
new file mode 100644
index 0000000..00d85fe
--- /dev/null
+++ b/词法分析/.vscode/settings.json
@@ -0,0 +1,34 @@
+{
+  "C_Cpp_Runner.cCompilerPath": "gcc",
+  "C_Cpp_Runner.cppCompilerPath": "g++",
+  "C_Cpp_Runner.debuggerPath": "gdb",
+  "C_Cpp_Runner.cStandard": "",
+  "C_Cpp_Runner.cppStandard": "",
+  "C_Cpp_Runner.msvcBatchPath": "",
+  "C_Cpp_Runner.useMsvc": false,
+  "C_Cpp_Runner.warnings": [
+    "-Wall",
+    "-Wextra",
+    "-Wpedantic"
+  ],
+  "C_Cpp_Runner.enableWarnings": true,
+  "C_Cpp_Runner.warningsAsError": false,
+  "C_Cpp_Runner.compilerArgs": [],
+  "C_Cpp_Runner.linkerArgs": [],
+  "C_Cpp_Runner.includePaths": [],
+  "C_Cpp_Runner.includeSearch": [
+    "*",
+    "**/*"
+  ],
+  "C_Cpp_Runner.excludeSearch": [
+    "**/build",
+    "**/build/**",
+    "**/.*",
+    "**/.*/**",
+    "**/.vscode",
+    "**/.vscode/**"
+  ],
+  "files.associations": {
+    "*.tcc": "cpp"
+  }
+}
\ No newline at end of file
diff --git a/词法分析/gets.h b/词法分析/gets.h
new file mode 100644
index 0000000..9265d41
--- /dev/null
+++ b/词法分析/gets.h
@@ -0,0 +1,22 @@
+char getfdstr(char filename[],char data[])
+{
+    int size = 0;
+    char letter[Max] = "";
+    char w;
+    int length = 0;
+    freopen(filename, "r",stdin);
+    while(cin>>w)
+    {
+        if(w!='')
+        {
+            letter[length] = w;
+            length++;
+        }
+    }
+    letter[length] = '\0';
+    size = strlen(letter);
+    char *result = new char[size];
+    strcpy_s(result,size+1,letter);
+    strcpy_s(data,size+1,result);
+    return *result;
+}
\ No newline at end of file
diff --git a/词法分析/lexer.cpp b/词法分析/lexer.cpp
new file mode 100644
index 0000000..ecd2627
--- /dev/null
+++ b/词法分析/lexer.cpp
@@ -0,0 +1,221 @@
+#include<iostream>
+#include<stdio.h>
+#include<string.h>
+#include<conio.h>
+#include<windows.h>
+#include"gets.h"
+
+#define MAX 4000
+
+using namespace std;
+
+enum
+{
+    UNDERLINE = 0,//下划线
+    LETTER = 1,//字母
+    NUMBER = 2,//数字
+    SYMBOL = 3,//符号
+    SYMBOLERROR = 4,//错误符号
+    IDENTIFIERERROR = 5//错误标识符
+};
+
+char data[MAX];
+char letter[MAX];
+
+bool isfloat = false;//浮点数判断
+bool isbool = false;//布尔判断
+bool notes = false;//注释判断
+bool isvariable = false;//标识符判断
+bool isiderror = false;//错误标识符判断
+
+//关键字及其对应种别码与助记符
+string key[32]={"char","double","enum","float","int","long","short","signed",
+                "struct","union","unsigned","void","for","do","while","break","continue",
+                "if","else","goto","switch","case","default","return","auto","extern","register",
+                "static","const","sizeof","typedef","volatile"};
+int keyNum[32]={1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
+string keyword[32]={"CHAR","DOUBLE","ENUM","FLOAT","INT","LONG","SHORT","SIGNED",
+                "STRUCT","UNION","UNSIGNED","VOID","FOR","DO","WHILE","BREAK","CONTINUE",
+                "IF","ELSE","GOTO","SWITCH","CASE","DEFAULT","RETURN","AUTO","EXTERN","REGISTER",
+                "STATIC","CONST","SIZEOF","TYPEDEF","VOLATILE"};
+
+//符号的种别码与助记符
+string symbol[33]={"+","-","*","/","%","++","--",">","<","==",
+                    "!=",">=","<=","&&","||","!","=","+=","-=",
+                    "*=","/=","%=",",","(",")","[","]","{","}",
+                    ";","/*","*/","'"};
+int symbolNum[33]={33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66};
+string symbolword[33]={"PLUS","MINUS","MULTI","RDIV","MODULO","INC","DEC","GT","LT","EQ","NEQ",
+	                    "GE","LE","AND","OR","NOT","ASSIGN","PLUS_A","MINUS_A","MUL_A","DIV_A",
+                        "MOD_A","COMMA","LR_BRAC","RR_BRAC","LS_BRAC","RS_BRAC","L_PRA","R_PRA",
+                        "SEMIC","L_ANNO","R_ANNO","QMARK"};
+
+int num;
+int length;
+
+void Getword();//处理data里的各类字符
+void error(FILE *fp,int type,string word);
+
+string identify(char s,int n);//返回标识符
+string Number(char s,int n);//返回数字
+string symbolstr(char s,int n);//返回符号
+string variable(char s,int n);//返回标识符
+
+string Keyword(int n);//根据关键词种别返回关键词
+string Symbolword(int n);//根据符号种别返回符号
+
+bool isNum(char s);//判断是否是数字
+bool isLetter(char s);//判断是否是字母
+bool issymbol(char s);//判断是否是符号
+bool isBool(string s);//因为表里没有bool关键字 但有布尔类型 所以额外加了一个判断
+ 
+int wordtype(char str);//字符类型判断
+int iskeyword(string s);//返回关键词种别码
+int isSymbol(string s);//返回符号种别码
+
+int main()
+{
+    char filename[MAX] = "input.txt";//根据输入文件名修改
+    getfdstr(filename,data);//预处理，去除输入文件中的空格，存在data里
+    length = strlen(data);
+    Getword();
+    getch();
+}
+
+void Getword()
+{
+    FILE *fp;
+    int key = 0;
+    int count = 0;
+
+    fp = fopen("output.txt", "w");
+    if (fp == NULL)
+    {
+        printf("Error: Couldn't open output file\n");
+        system("pause");
+        exit(0);
+    }
+    for(num = 0; num < length;)
+    {
+        char str;
+        string word;
+        str = data[num];
+        key = wordtype(str);
+        switch(key)
+        {
+            case UNDERLINE:
+                word = variable(str,num);
+                printf("%s  (%s,70)   标识符\n",word.c_str(),word.c_str());
+                fprintf(fp,"%s  (%s,70)   标识符\n",word.c_str(),word.c_str());
+                isvariable = false;
+                break;
+            
+            case LETTER:
+                word = identify(str,num);
+                if(notes) break;
+                else if (isvariable)
+                {
+                    if(iskeyword(word))
+                    {
+                        printf("%s  (%s,%d)   关键字\n",word.c_str(),Keyword(iskeyword(word)).c_str(),iskeyword(word));
+                        fprintf(fp,"%s  (%s,%d)   关键字\n",word.c_str(),Keyword(iskeyword(word)).c_str(),iskeyword(word));
+                    }
+                    else
+                    {
+                        printf("%s  (IDE,70)   标识符\n",word.c_str());
+                        fprintf(fp,"%s  (IDE,70)   标识符\n",word.c_str());
+                    }
+                    break;
+                }
+                else
+                {
+                    if(!word.compare("bool"))
+                    {
+                        printf("%s  (BOOL,%d)   关键字\n",word.c_str(),67);
+                        fprintf(fp,"%s (BOOL,%d)   关键字\n",word.c_str(),67);
+                    }
+                    else if(isbool)
+                    {
+                        printf("%s  (CONDST_BOOL,%d)   布尔型\n",word.c_str(),67);
+                        fprintf(fp,"%s  (CONDST_BOOL,%d)   布尔型\n",word.c_str(),67);
+                        isbool = false;
+                    }
+                    else
+                    {
+                        printf("%s  (%s,70)   标识符\n",word.c_str(),word.c_str());
+                        fprintf(fp,"%s  (%s,70)   标识符\n",word.c_str(),word.c_str());
+                        isvariable = false;
+                    }
+                    break;
+                }
+            
+            case NUMBER:
+                word = Number(str,num);
+                if(notes) break;
+                else if(isiderror)
+                {
+                    error(fp,IDENTIFIERERROR,word);
+                    break;
+                }
+                else
+                {
+                    if(isfloat)
+                    {
+                        printf("%s  (CONST_FLOAT,69)   浮点型\n",word.c_str());
+                        fprintf(fp,"%s  (CONST_FLOAT,69)   浮点型\n",word.c_str());
+                        isfloat = false;
+                    }
+                    else
+                    {
+                        printf("%s  (CONST_INT,68)   整型\n",word.c_str());
+                        fprintf(fp,"%s (CONST_INT,68)   整型\n",word.c_str());
+                    }
+                    break;
+                }
+            case SYMBOL:
+                word = symbolstr(str,num);
+                if(notes)
+                {
+                    if(count == 0)
+                    {
+                        printf("%s  (%s,%d)   界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
+                        fprintf(fp,"%s  (%s,%d)   界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
+                        count++;
+                    }
+                    break;
+                }
+                else
+                {
+                    if(isSymbol(word) == SYMBOLERROR)
+                    {
+                        error(fp,SYMBOLERROR,word);
+                        break;
+                    }
+                    if(!word.compare("+")||!word.compare("-")||!word.compare("*")||!word.compare("/")||!word.compare("%")||
+					    !word.compare("++")||!word.compare("--")||!word.compare(">")||!word.compare("<")||!word.compare("==")||
+					    !word.compare("!=")||!word.compare(">=")||!word.compare("<=")||!word.compare("&&")||!word.compare("||")||
+					    !word.compare("!")||!word.compare("=")||!word.compare("+=")||!word.compare("-=")||!word.compare("*=")||
+					    !word.compare("/=")||!word.compare("%="))
+                    {
+                        printf("%s  (%sk,%d)   运算符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
+                        fprintf(fp,"%s  (%sk,%d)   运算符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
+                        break;
+                    }
+                    if(!word.compare("*/"))
+                    {
+                        printf("————内容被注释————\n");
+                        fprintf(fp,"————内容被注释————\n");
+                        printf("%s  (%s,%d)   界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
+                        fprintf(fp,"%s  (%s,%d)   界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
+                        count = 0;
+                        break;
+                    }
+                    printf("%s  (%s,%d)   界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
+                        fprintf(fp,"%s  (%s,%d)   界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
+                        break;
+                }
+        }
+
+    }
+    fclose(fp);
+}
\ No newline at end of file