大家好,下面介绍的是我当时上编译原理所做的实验,主要内容就是先对源程序进行预处理,然后再对处理过的程序进行词法分析。用的是最基本的C语言写的,如有不足,欢迎大家批评指正!
一、实验目的设计并实现一个包含预处理功能的词法分析程序,加深对编译中词法分析过程的理解。
二、 实验要求1、实现预处理功能
源程序中可能包含有对程序执行无意义的符号,要求将其剔除。
首先编制一个源程序的输入过程,从键盘、文件或文本框输入若干行语句,依次存入输入缓冲区(字符型数据);然后编制一个预处理子程序,去掉输入串中的回车符、换行符和跳格符等编辑性文字;把多个空白符合并为一个;去掉注释。
2、实现词法分析功能
输入:所给文法的源程序字符串。
输出:二元组(syn,token或sum)构成的序列。其中,
syn为单词种别码。
Token为存放的单词自身字符串。
Sum为整型常量。
具体实现时,可以将单词的二元组用结构进行处理。
1)首先编写一个预处理子程序,用于读取文件并且去除文件中的回车换行、将多个空格合并成一个空格
2)然后再编写一个分析子程序,用于分析经过处理后的程序,并且识别枚举类型和共用体类型,识别struct,enum,识别&&和||、++和–,==、识别!+,-=,+=、识别浮点数、识别指数、识别指针变量、识别字符串、去掉多行注释、识别错误信息,数字后面跟有字母,123fds(这个有个bug,识别不了16进制的数字,比如123efh,他其实是一个16进制数,但是也会当作错误信息)、对处理程序的大小没有限制
3)显示处理过的程序并保存在相应的文件中
运行效果
代码由三个文件构成,如下图所示
其中,Hong.h中主要是对一下常用的变量进行宏定义,fun.h中是对各种方法的实现,main.c则是对fun.h中实现的方法的简单的调用,一些需要注意的地方都在代码中以注释的形式展现,话不多说,上代码!
Hong.h
#include#define SizeRes 60 #define Sizestr 20 char ch; //字符变量,存访最新读进的源程序字符 char strToken[Sizestr]; //字符数组,存访构成单词符号的字符串 int GetBC(); //子程序过程,检查ch中的字符是否为空白,若是,则调用GetChar(),直至ch中进入一个非空白字符 int IsLetter(); //布尔函数过程,判断ch是否为字母 int IsDigit(); //布尔函数过程额,判断ch是否是数字 char const *FindRes(char str[]); //确定是否为关键字,如果是关键字返回其类型 int IsIdentifier(); //判断是否是标识符的组成 int IsFloat(); //判断是否是浮点数的组成 char *IsIntorFloatorExponent(char string[]); //判断是整数还是浮点数还是指数 int IsError(); //判断数字后面的东西 FILE *InserRes(char const *type, char const *value, FILE *p); //向result文件中输出信息,并返回文件指针 void Yu(char F_open[], char F_out[]); //预处理程序 //结构体数组 struct s1 { char const *type; char const *name; } ReservedWord[SizeRes] = { {"1", "main"}, {"2", "if"}, {"3", "then"}, {"4", "while"}, {"5", "do"}, {"6", "static"}, {"7", "int"}, {"8", "double"}, {"9", "struct"}, {"10", "break"}, {"11", "else"}, {"12", "long"}, {"13", "swith"}, {"14", "case"}, {"15", "typedef"}, {"16", "char"}, {"17", "return"}, {"18", "const"}, {"19", "float"}, {"20", "short"}, {"21", "continue"}, {"22", "for"}, {"23", "void"}, {"24", "default"}, {"25", "sizeof"}, {"26", "enum"}, {"27", "union"}, //1、新加的枚举类型和共用体类型 {"28", "+"},{"29", "-"},{"30", "*"},{"31", "/"},{"32", ":"},{"33", ":="},{"34", "<"}, {"35", "<>"},{"36", "<="},{"37", ">"},{"38", ">="},{"39", "="},{"40", ";"},{"41", "("}, {"42", ")"},{"43", "||"},{"44", "&&"},{"45", "{"},{"46", "}"}, //2、新加或运算和与运算 {"47", "++"},{"48", "--"},{"49", ","},{"50", "=="},{"51", "["},{"52", "]"},{"53", "#"}, {"54", "include"},{"55", "define"},{"56", "&"},{"57", "."},{"58", "+="},{"59", "-="}, {"60", "!="} //3、++和--和== };
fun.h
#include#include "Hong.h" //确定是否为关键字,如果是关键字返回其类型 char const *FindRes(char str[]) { for (int i = 0; i < SizeRes; i++) { if (strcmp(str, ReservedWord[i].name) == 0) return ReservedWord[i].type; } return "ID"; } //判断ch是否是字母 int IsLetter() { if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) return 1; return 0; } //判断是否是标识符的组成 int IsIdentifier() { if (IsLetter() || IsDigit() || ch == '_') return 1; return 0; } //判断是否是浮点数的组成 int IsFloat() { if (IsDigit() || ch == '.' || ch == 'e' || ch == 'E' || ch == '-') return 1; return 0; } //判断是整数还是浮点数还是指数 char *IsIntorFloatorExponent(char string[]) { for (int i = 0; string[i] != ''; i++) { if (string[i] == '.') return "float"; if (string[i] == 'e' || string[i] == 'E') return "Exponent"; } return "int"; } //判断ch是否是数字 int IsDigit() { if (ch >= '0' && ch <= '9') return 1; return 0; } //判断数字后面的东西 int IsError() { if (IsLetter() || IsDigit()) return 1; return 0; } //检查ch中是否为空白,如果是,将指针移到第一个不是的地方 int GetBC(char str[], int i) { while (str[i] != '' && str[i] == ' ') { i++; } return i; } //向result文件中输出信息,并返回文件指针 FILE *InserRes(char const *type, char const *value, FILE *p) { fputs(type, p); fputc(',', p); fputc('t', p); fputs(value, p); fputc('n', p); return p; } //进行单词分割和区分 void Process(char F_open[], char Result_file[], char error_file[]) { FILE *fp; fp = fopen(F_open, "r"); FILE *res_file; res_file = fopen(Result_file, "w"); FILE *err_file; err_file = fopen(error_file, "w"); int size = 20; char L1[size]; char temp[size]; int index = 0; int WritedFlag = 0; while (fgets(L1, size, fp) != NULL) { int i = 0; while (L1[i] != '') { ch = L1[i]; if (IsDigit()) { strToken[index] = ch; i++; index++; if (L1[i] == '') //读到第一个数字,但是是最后一个字符的情况 { fgets(temp, size, fp); strcpy(L1, temp); i = 0; } ch = L1[i]; while (IsFloat()) { strToken[index] = ch; i++; index++; if (L1[i] == '') //如果找到最后,数字被截断了,就把标志状态改一下 { fgets(temp, size, fp); strcpy(L1, temp); i = 0; } ch = L1[i]; } if (IsLetter()) { strToken[index] = ch; i++; index++; if (L1[i] == '') { fgets(temp, size, fp); strcpy(L1, temp); i = 0; } ch = L1[i]; while (IsError()) { strToken[index] = ch; i++; index++; if (L1[i] == '') { fgets(temp, size, fp); strcpy(L1, temp); i = 0; } ch = L1[i]; } strToken[index] = ''; printf("Errors! "); printf("%sn", strToken); fputs(strToken, err_file); fputc('n', err_file); res_file = InserRes("error", strToken, res_file); WritedFlag = 1; } else { strToken[index] = ''; printf("<%st,t%s>n", IsIntorFloatorExponent(strToken), strToken); res_file = InserRes(IsIntorFloatorExponent(strToken), strToken, res_file); //向结果result文件中输出信息 WritedFlag = 1; } } else if (IsLetter()) { ch = L1[GetBC(L1, i)]; strToken[index] = ch; i++; index++; if (L1[i] == '') { fgets(temp, size, fp); strcpy(L1, temp); i = 0; } ch = L1[i]; while (IsIdentifier()) { strToken[index] = ch; i++; index++; if (L1[i] == '') { fgets(temp, size, fp); strcpy(L1, temp); i = 0; } ch = L1[i]; } strToken[index] = ''; } else if (ch == '#') { strToken[index] = ch; index++; strToken[index] = ''; i++; ch = L1[i]; } else if (ch == '+') { strToken[index] = ch; index++; i++; if (L1[i] == '') { fgets(temp, size, fp); strcpy(L1, temp); i = 0; } ch = L1[i]; if (ch == '+') { strToken[index] = ch; i++; index++; ch = L1[i]; } if (ch == '=') { strToken[index] = ch; i++; index++; ch = L1[i]; } strToken[index] = ''; } else if (ch == '-') { strToken[index] = ch; index++; i++; if (L1[i] == '') { fgets(temp, size, fp); strcpy(L1, temp); i = 0; } ch = L1[i]; if (ch == '-') { strToken[index] = ch; i++; index++; ch = L1[i]; } if (ch == '=') { strToken[index] = ch; i++; index++; ch = L1[i]; } strToken[index] = ''; } else if (ch == '!') { strToken[index] = ch; index++; i++; if (L1[i] == '') { fgets(temp, size, fp); strcpy(L1, temp); i = 0; } ch = L1[i]; if (ch == '=') { strToken[index] = ch; i++; index++; ch = L1[i]; } strToken[index] = ''; } else if (ch == '*') //识别乘号和指针变量 { if (L1[i - 1] == ' ' || L1[i - 1] == ';') { ch = L1[GetBC(L1, i)]; strToken[index] = ch; i++; index++; if (L1[i] == '') { fgets(temp, size, fp); strcpy(L1, temp); i = 0; } ch = L1[i]; while (IsIdentifier()) { strToken[index] = ch; i++; index++; if (L1[i] == '') { fgets(temp, size, fp); strcpy(L1, temp); i = 0; } ch = L1[i]; } strToken[index] = ''; } else { strToken[index] = ch; index++; strToken[index] = ''; i++; ch = L1[i]; } } else if (ch == '/') { strToken[index] = ch; index++; strToken[index] = ''; i++; ch = L1[i]; } else if (ch == ':') { strToken[index] = ch; i++; index++; ch = L1[i]; if (ch == '=') { strToken[index] = ch; i++; index++; ch = L1[i]; strToken[index] = ''; } else { strToken[index] = ''; } } else if (ch == '<') { strToken[index] = ch; i++; index++; if (L1[i] == '') { fgets(temp, size, fp); strcpy(L1, temp); i = 0; } ch = L1[i]; if (ch == '>') { strToken[index] = ch; i++; index++; ch = L1[i]; //strToken[index] = ''; } else if (ch == '=') { strToken[index] = ch; i++; index++; ch = L1[i]; // strToken[index] = ''; } strToken[index] = ''; } else if (ch == '>') { strToken[index] = ch; i++; index++; if (L1[i] == '') { fgets(temp, size, fp); strcpy(L1, temp); i = 0; } ch = L1[i]; if (ch == '=') { strToken[index] = ch; i++; index++; ch = L1[i]; } strToken[index] = ''; } else if (ch == '=') { strToken[index] = ch; index++; i++; if (L1[i] == '') { fgets(temp, size, fp); strcpy(L1, temp); i = 0; } ch = L1[i]; if (ch == '=') { strToken[index] = ch; i++; index++; ch = L1[i]; } strToken[index] = ''; } else if (ch == ';') { strToken[index] = ch; index++; strToken[index] = ''; i++; ch = L1[i]; } else if (ch == '(') { strToken[index] = ch; index++; strToken[index] = ''; i++; ch = L1[i]; } else if (ch == ')') { strToken[index] = ch; index++; strToken[index] = ''; i++; ch = L1[i]; } else if (ch == '{') { strToken[index] = ch; index++; strToken[index] = ''; i++; ch = L1[i]; } else if (ch == '}') { strToken[index] = ch; index++; strToken[index] = ''; i++; ch = L1[i]; } else if (ch == '|') { strToken[index] = ch; i++; index++; if (L1[i] == '') { fgets(temp, size, fp); strcpy(L1, temp); i = 0; } ch = L1[i]; if (ch == '|') { strToken[index] = ch; i++; index++; ch = L1[i]; strToken[index] = ''; } } else if (ch == '"') { i++; if (L1[i] == '') { fgets(temp, size, fp); strcpy(L1, temp); i = 0; } ch = L1[i]; while (ch != '"') { strToken[index] = ch; i++; index++; if (L1[i] == '') { fgets(temp, size, fp); strcpy(L1, temp); i = 0; } ch = L1[i]; } strToken[index] = ''; res_file = InserRes("string", strToken, res_file); WritedFlag = 1; i++; ch = L1[i]; } else if (ch == '&') { strToken[index] = ch; i++; index++; if (L1[i] == '') { fgets(temp, size, fp); strcpy(L1, temp); i = 0; } ch = L1[i]; if (ch == '&') { strToken[index] = ch; i++; index++; ch = L1[i]; } strToken[index] = ''; } else { strToken[index] = ch; index++; strToken[index] = ''; i++; ch = L1[i]; } if ((!WritedFlag) && (strcmp(" ", strToken) != 0)) { res_file = InserRes(FindRes(strToken), strToken, res_file); printf("<%st,t%s>n", FindRes(strToken), strToken); } WritedFlag = 0; //找出一个字符串就要置0 index = 0; memset(strToken, 0, sizeof strToken); } } printf("finish!n"); } //预处理程序 void Yu(char F_open[], char F_out[]) { FILE *fp; fp = fopen(F_open, "r"); //从该文件中读取源程序 FILE *op; op = fopen(F_out, "w"); //将处理源程序的结果放在该文件中 if (fp != NULL) { char ch = fgetc(fp); while (ch != EOF) { switch (ch) { case 'n': //去掉换行,读取文件的时候,自动将'r''n'合并成了'n',所以下面的case 'r'可以省略 ch = fgetc(fp); break; case 'r': //去掉回车 ch = fgetc(fp); break; case ' ': //将多个空格合并成一个 printf("%c", ch); fputc(ch, op); ch = fgetc(fp); while (ch != EOF && ch == ' ') { ch = fgetc(fp); } break; case '/': //去掉多行注释和单行注释 ch = fgetc(fp); if (ch == '/') //去掉单行注释 { ch = fgetc(fp); while (ch != EOF && ch != 'n') { ch = fgetc(fp); } ch = fgetc(fp); //while结束时ch等于回车换行,所以再往后读一个字符 } else if (ch == '*') //去掉多行注释 { ch = fgetc(fp); while (1) { while (ch != '*') { ch = fgetc(fp); } ch = fgetc(fp); if (ch == '/') { ch = fgetc(fp); //向后读一个字符,然后退出while循环 break; } } } else { printf("%c", '/'); fputc('/', op); } break; case 't': //去掉tab ch = getc(fp); break; default: printf("%c", ch); fputc(ch, op); ch = fgetc(fp); break; } } } else { printf("the file can not openn"); } fclose(fp); fclose(op); printf("nnYu finish!nn"); }
mian.c
大家可以自行新建文本文件,只需要把对应的文件地址改一下即可,具体说明在注释中
#include "fun.h" int main() { Yu("E:\test\shiyan1\test.txt", "E:\test\shiyan1\out.txt");//传入文件地址即可,第一个文件里面存的是要处理的源程序,第二个文件存的是预处理过后的程序 Process("E:\test\shiyan1\out.txt", "E:\test\shiyan1\result.txt", "E:\test\shiyan1\errors.txt");//第一个文件是处理过后的程序,第二个文件存的是最终的结果,第三个文件存的是错误信息 return 0; }
写在最后,如果大家觉得这篇文章对你有帮助的话,还请大家赞一下下啦 : )
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)