首先分析
#include#include #include #include #define _KEY_WORDEND "waiting for your expanding" using namespace std; typedef struct //词的结构,二元组形式(单词种别,单词自身的值) { int typenum; //单词种别 char * word; }WORD; char input[255]; char token[255] = ""; int p_input; //0 int p_token; //0 char ch; //用于读取输入的字符 char *rwtab[] = { "if", "while", "int", "main", "else", "float", "double", "return", "cout" }; //记录已有的关键字,用来将标识符和关键字进行区分。 WORD *scanner();//扫描 int main() { int over = 1; WORD* oneword = new WORD; //实现从文件读取代码段 cout << "read something from data.txt" << endl; FILE *fp; // printf("%d", p_token); if((fp=freopen("data.txt","r",stdin))==NULL) {//从文件data.txt文件中读输入的数据。并且将文件data.txt文件中的内容重定向到输入中。 printf("Not found file!n"); //如果没有找到文件就输出提示。文件存在内容为空的时候并不会报错。 return 0; } else { while ((scanf("%[^#]s", &input)) != EOF) {//当遇到的输入符号为#的时候就结束。 p_input = 0; printf("your words:n%sn", input); while (over != -1) {//over==-1证明出现错误,语法分析器到此结束,不做错误处理。 oneword = scanner(); if(oneword->typenum != 999 && oneword->typenum < 1000) { if(oneword->typenum == 10) cout << "[ "<< "标识符" <<"t,"<< oneword->word <<" ]"<< endl; else if(oneword->typenum == 20) cout << "[ " << "数值 " <<"t,"<< oneword->word <<" ]"<< endl; else if(oneword->typenum >= 1 && oneword->typenum <= 9) cout << "[ " << "关键字" <<"t,"<< oneword->word <<" ]"<< endl; else if(oneword->typenum >= 21 && oneword->typenum <= 25 || oneword->typenum >= 35 && oneword->typenum <= 38) cout << "[ " << "运算符" <<"t,"<< oneword->word <<" ]"<< endl; else if(oneword->typenum >= 26 && oneword->typenum <= 34 || oneword->typenum >= 35 && oneword->typenum <= 38) cout << "[ " << "分界符" <<"t,"<< oneword->word <<" ]"<< endl; } over = oneword->typenum; //用over来记录当前的token序列的种别码type } scanf("%[^#]s", input); } } return 0; } //从输入缓冲区读取一个字符到ch中 char m_getch() { ch = input[p_input]; p_input++; return ch; } //去掉空白符号 void getbc() { while (ch == ' ' || ch == 10 || ch == 9) {//ch==10对应的是换行键,ch==9对应的是tab键。 ch = input[p_input]; p_input++; } } //拼接单词 void concat() { token[p_token] = ch; p_token++; token[p_token] = ''; } //判断是否字母 int letter() { if (ch >= 'a'&&ch <= 'z' || ch >= 'A'&&ch <= 'Z') return 1; else return 0; } //判断是否数字 int digit() { if (ch >= '0'&&ch <= '9') return 1; else return 0; } //检索关键字表格 int reserve() { int i = 0; while(strcmp(rwtab[i], "cout")) {//遍历rwtab关键字表格 if (!strcmp(rwtab[i], token)) return i + 1; i++; } return 10;//如果不是关键字,则返回种别码10 } //回退一个字符 void retract() { p_input--; } //词法扫描程序,返回值是一个WORD数据结构 WORD * scanner(){ WORD * myword = new WORD; myword->typenum = 10; //初始值 myword->word = ""; p_token = 0; //单词缓冲区指针,用于寻找该token序列。 m_getch(); //ch是一个全局变量,不需要使用ch=m_getch()函数。 getbc();//去掉空白 if (letter()){//判断读取到的首字母是字母 //如int while (letter() || digit()) { concat(); //连接 m_getch(); } retract(); //回退一个字符 myword->typenum = reserve();//判断是否为关键字,返回种别码 myword->word = token; return myword; } else if (digit()) {//判断读取到的单词首字符是数字 while (digit()) {//所有数字连接起来 concat(); m_getch(); } retract();//回退一个字符,必须要有回退的 *** 作,应为在这儿已经读到了下一个的字符的。 //数字单词种别码统一为20,单词自身的值为数字本身 myword->typenum = 20; myword->word = token; return(myword); } else switch (ch) {// 运算符的识别 case '=': m_getch();//首字符为=,再读取下一个字符判断 if (ch == '=') {// 如果下一个也是=,就是==符号,若不是,就要有回退 myword->typenum = 39; myword->word = "=="; return(myword); } retract();//读取到的下个字符不是=,则要回退,直接输出= myword->typenum = 21; myword->word = "="; return(myword); break; case '+': // 不考虑+=,-=,/=,*=等麻烦 *** 作,只考虑+,-,*,/ myword->typenum = 22; myword->word = "+"; return(myword); break; case '-': myword->typenum = 23; myword->word = "-"; return(myword); break; case '/'://读取到该符号之后,要判断下一个字符是什么符号,判断是否为注释 m_getch();//首字符为/,再读取下一个字符判断 if (ch == '*') {// 说明读取到的是注释 m_getch(); while(ch != '*') { m_getch();//注释没结束之前一直读取注释,但不输出 if(ch == '*') { m_getch(); if(ch == '/') {//注释结束 myword->typenum = 999; myword->word = "注释"; return (myword); break; } } } } else { retract();//读取到的下个字符不是*,即不是注释,则要回退,直接输出/ myword->typenum = 24; myword->word = "/"; return (myword); break; } case '*': myword->typenum = 25; myword->word = "*"; return(myword); break; case '(': myword->typenum = 26; myword->word = "("; return(myword); break; case ')': myword->typenum = 27; myword->word = ")"; return(myword); break; case '[': myword->typenum = 28; myword->word = "["; return(myword); break; case ']': myword->typenum = 29; myword->word = "]"; return(myword); break; case '{': myword->typenum = 30; myword->word = "{"; return(myword); break; case '}': myword->typenum = 31; myword->word = "}"; return(myword); break; case ',': myword->typenum = 32; myword->word = ","; return(myword); break; case ':': m_getch(); if (ch == '=') { myword->typenum = 18; myword->word = ":="; return(myword); break; } else { retract(); myword->typenum = 33; myword->word = ":"; return(myword); break; } case ';': myword->typenum = 34; myword->word = ";"; return(myword); break; case '>': m_getch(); if (ch == '=') { myword->typenum = 35; myword->word = ">="; return(myword); break; } retract(); myword->typenum = 36; myword->word = ">"; return(myword); break; case '<': m_getch(); if (ch == '=') { myword->typenum = 37; myword->word = "<="; return(myword); break; } else { retract(); myword->typenum = 38; myword->word = "<"; return (myword); } case '!': m_getch(); if (ch == '=') { myword->typenum = 40; myword->word = "!="; return(myword); break; } retract(); myword->typenum = -1; myword->word = "ERROR"; return(myword); break; case ' " ': myword->typenum = 41; myword->word = " " "; return(myword); break; case '': myword->typenum = 1000; myword->word = "OVER"; return(myword); break; case '#': myword->typenum = 0; myword->word = "#"; return (myword); break; default: myword->typenum = -1; myword->word = "ERROR"; return(myword); break; } }
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)