我最近正在学编译原理,我有c语言实现的词法分析程序,不知可不可以,识别的是TEST语言的单词。
#include<stdio.h>
#include<ctype.h>
#include<string.h>
#define keywordSum 8
char * keyword[keywordSum] = {"do", "else", "for", "if", "int", "read", "while", "write"}
char singleword[50] = "+-*(){},:"
char doubleword[10] = "><=!|&"
char Scanin[300], Scanout[300]
FILE * fin, * fout
int binaryFind(int low, int high, char * c1, char ** c2) {
int mid
if(low >high) return -1
mid = (low+high)/2
if(strcmp(c1, c2[mid]) == 0) return mid
else if(strcmp(c1, c2[mid]) >0) return binaryFind(mid+1, high, c1, c2)
else return binaryFind(low, mid-1, c1, c2)
}
int TESTscan() {
char ch, token[40]
int es = 0, j, n
printf("请输入源文件名(包括路径):")
scanf("%s", Scanin)
printf("请输入词法分析输出文件名(包括路径):")
scanf("%s", Scanout)
if((fin=fopen(Scanin, "r")) == NULL) {
printf("\n打开词法分析输入文件出错!\n")
return 1
}
if((fout=fopen(Scanout, "w")) == NULL) {
printf("\n创建词法分析输出文件出错!\n")
return 2
}
// printf("%c", getc(fin))
ch = getc(fin)
while(ch != EOF) {
while(ch==' ' || ch=='\n' || ch=='\t') {
ch = getc(fin)
}
if(isalpha(ch)) { //标识符
token[0] = ch
j = 1
ch = getc(fin)
while(isalnum(ch)) { //判断当前字符是否是字母或数字
token[j++] = ch
ch = getc(fin)
}
token[j] = '\0'
// printf("%s", token)
n = binaryFind(0, keywordSum-1, token, keyword)
if(n <0 ) {
fprintf(fout, "%s\t%s\n", "ID", token)
} else {
fprintf(fout, "%s\t%s\n", token, token)
}
} else if(isdigit(ch)) { //数字
token[0] = ch
j = 1
ch = getc(fin)
while(isdigit(ch)) {
token[j++] = ch
ch = getc(fin)
}
token[j] = '\0'
fprintf(fout, "%s\t%s\n", "NUM", token)
} else if(strchr(singleword, ch) >0) { //singleword
token[0] = ch
token[1] = '\0'
ch = getc(fin)
fprintf(fout, "%s\t%s\n", token, token)
} else if(strchr(doubleword, ch) >0) { //doubleword
token[0] = ch
ch = getc(fin)
if(ch=='=' &&(token[0]=='>'||token[0]=='<' || token[0] == '!')) {
token[1] = ch
token[2] = '\0'
ch = getc(fin)
} else if((ch=='&')||(ch=='|')||(ch=='=') &&ch==token[0]) {
token[1] = ch
token[2] = '\0'
ch = getc(fin)
} else {
token[1] = '\0'
}
fprintf(fout, "%s\t%s\n", token, token)
} else if(ch == '/') { //注释
ch = getc(fin)
if(ch == '*') {
char ch1
ch1 = getc(fin)
do {
ch = ch1
ch1 = getc(fin)
} while((ch!='*'||ch1!='/') &&ch1!=EOF)
ch = getc(fin)
} else {
token[0] = '/'
token[1] = '\0'
fprintf(fout, "%s\t%s\n", token, token)
}
} else {
token[0] = ch
token[1] = '\0'
ch = getc(fin)
es = 3
fprintf(fout, "%s\t%s\n", "ERROR", token)
}
}
fclose(fin)
fclose(fout)
return es
}
void main() {
int es = 0
es = TESTscan()
if(es >0) {
printf("词法分析有错, 编译停止!\n")
} else {
printf("词法分析成功!\n")
}
}
我也做过这个作业package source
import java.util.LinkedList
public class LexicalAnalysis
{
//私有变量声明
private LinkedList<Word> optr = new LinkedList<Word>()
private String exp
//词法分析
public LinkedList<Word> lexical_analysis(String exp)
{
char ch = '\0' //当前文件指针内容
int index = 0 //文件指针
StringBuffer strToken = new StringBuffer("")
//扫描处理字符串
while(true)
{
ch = exp.charAt(index)
index++
//标识符(字母开头,数字或字符组成)
if(Character.isLetter(ch))
{
while(Character.isLetter(ch) || Character.isDigit(ch))
{
strToken.append(ch)
ch = exp.charAt(index)
index++
}
index--
String str = strToken.toString()
if(str.equals("if"))
optr.add(new Word(str, 13))
else if(str.equals("else"))
optr.add(new Word(str, 14))
else if(str.equals("then"))
optr.add(new Word(str, 15))
else
optr.add(new Word(str, 26))
}
//数字
else if(Character.isDigit(ch))
{
while(Character.isDigit(ch))
{
strToken.append(ch)
ch = exp.charAt(index)
index++
}
index--
optr.add(new Word(strToken.toString(), 26))
}
//加号或自加
else if(ch == '+')
{
ch = exp.charAt(index)
index++
if(ch == '+')
optr.add(new Word("++", 21))
else if(ch == '=')
optr.add(new Word("+=", 16))
else
{
index--
optr.add(new Word("+", 19))
}
}
//加号或自加
else if(ch == '-')
{
ch = exp.charAt(index)
index++
if(ch == '-')
optr.add(new Word("--", 21))
else if(ch == '=')
optr.add(new Word("-=", 16))
else
{
index--
optr.add(new Word("-", 19))
}
}
//乘法或乘幂
else if(ch == '*')
{
ch = exp.charAt(index)
index++
if(ch == '*')
optr.add(new Word("**", 20))
else if(ch == '=')
optr.add(new Word("*=", 16))
else
{
index--
optr.add(new Word("*", 20))
}
}
//除法或注释
else if(ch == '/')
{
ch = exp.charAt(index)
index++
//多行注释
if(ch == '*')
{
while(true)
{
ch = exp.charAt(index)
index++
if(ch == '*')
{
ch = exp.charAt(index)
index++
if(ch == '/') break
else if(ch == '\n')
{
exp = Input.newLine()
index = 0
ch = exp.charAt(index)
index++
}
else index--
}
else if(ch == '#')
{
int tIndex = index - 1
if(exp.length() > tIndex+9)
{
String end = exp.substring(tIndex, tIndex+9)
if(end.equals("#?e_N_d?#")) break
}
else
{
System.out.println("非法符号\'#\'后的语句忽略!")
exp = Input.newLine()
index = 0
break
}
}
else if(ch == '\n')
{
exp = Input.newLine()
index = 0
}
}
}
//单行注释
else if(ch == '/')
break
else if(ch == '=')
optr.add(new Word("/=", 16))
else
{
index--
optr.add(new Word("/", 20))
}
}
//大于或大于等于或右移
else if(ch == '>')
{
ch = exp.charAt(index)
index++
if(ch == '=')
optr.add(new Word(">=", 18))
else if(ch == '>')
optr.add(new Word(">>", 20))
else
{
index--
optr.add(new Word(">", 18))
}
}
//小于或小于等于或左移
else if(ch == '<')
{
ch = exp.charAt(index)
index++
if(ch == '=')
optr.add(new Word("<=", 18))
else if(ch == '<')
optr.add(new Word("<<", 20))
else
{
index--
optr.add(new Word("<", 18))
}
}
//赋值或等于
else if(ch == '=')
{
ch = exp.charAt(index)
index++
if(ch == '=')
optr.add(new Word("==", 18))
else
{
index--
optr.add(new Word("=", 16))
}
}
//或运算按位或
else if(ch == '|')
{
ch = exp.charAt(index)
index++
if(ch == '|')
optr.add(new Word("||", 17))
else
{
index--
optr.add(new Word("|", 20))
}
}
//与运算
else if(ch == '&')
{
ch = exp.charAt(index)
index++
if(ch == '&')
optr.add(new Word("&&", 17))
else
{
index--
optr.add(new Word("&", 20))
}
}
//非运算或不等于
else if(ch == '!')
{
ch = exp.charAt(index)
index++
if(ch == '=')
optr.add(new Word("!=", 18))
else
{
index--
optr.add(new Word("!", 21))
}
}
//按位亦或
else if(ch == '^')
optr.add(new Word("^", 20))
//取模运算
else if(ch == '%')
optr.add(new Word("%", 20))
//左括号
else if(ch == '(')
optr.add(new Word("(", 22))
//右括号
else if(ch == ')')
optr.add(new Word(")", 23))
//左大括号
else if(ch == '{')
optr.add(new Word("{", 24))
//右大括号
else if(ch == '}')
optr.add(new Word("}", 25))
//结束扫描标志为:#?e_N_d?#
else if(ch == '\n')
{
break
}
else if(ch == '#')
{
int tIndex = index - 1
if(exp.length() > tIndex+9)
{
String end = exp.substring(tIndex, tIndex+9)
if(end.equals("#?e_N_d?#"))
{
optr.add(new Word("#", 27))
break
}
}
else
{
System.out.println("非法符号\'#\'后的语句忽略!")
optr.add(new Word("#", 27))
break
}
}
//清空扫描串
strToken.setLength(0)
}
return optr
}
}
首先看下我们要分析的代码段如下:
输出结果如下:
输出结果(a).PNG
输出结果(b).PNG
输出结果(c).PNG
括号里是一个二元式:(单词类别编码,单词位置编号)
代码如下:
?
1234567891011121314package Yue.LexicalAnalyzer import java.io.* /* * 主程序 */public class Main { public static void main(String[] args) throws IOException { Lexer lexer = new Lexer() lexer.printToken() lexer.printSymbolsTable() }}?
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283package Yue.LexicalAnalyzer import java.io.*import java.util.* /* * 词法分析并输出 */public class Lexer { /*记录行号*/ public static int line = 1 /*存放最新读入的字符*/ char character = ' ' /*保留字*/ Hashtable<String, KeyWord>keywords = new Hashtable<String, KeyWord>() /*token序列*/ private ArrayList<Token>tokens = new ArrayList<Token>() /*符号表*/ private ArrayList<Symbol>symtable = new ArrayList<Symbol>() /*读取文件变量*/ BufferedReader reader = null /*保存当前是否读取到了文件的结尾*/ private Boolean isEnd = false /* 是否读取到文件的结尾 */ public Boolean getReaderState() { return this.isEnd } /*打印tokens序列*/ public void printToken() throws IOException { FileWriter writer = new FileWriter("E:\\lex.txt") System.out.println("词法分析结果如下:") System.out.print("杜悦-2015220201031\r\n\n") writer.write("杜悦-2015220201031\r\n\r\n") while (getReaderState() == false) { Token tok = scan() String str = "line " + tok.line + "\t(" + tok.tag + "," + tok.pos + ")\t\t" + tok.name + ": " + tok.toString() + "\r\n" writer.write(str) System.out.print(str) } writer.flush() } /*打印符号表*/ public void printSymbolsTable() throws IOException { FileWriter writer = new FileWriter("E:\\symtab1.txt") System.out.print("\r\n\r\n符号表\r\n") System.out.print("编号\t行号\t名称\r\n") writer.write("符号表\r\n") writer.write("编号 " + "\t行号 " + "\t名称 \r\n") Iterator<Symbol>e = symtable.iterator() while (e.hasNext()) { Symbol symbol = e.next() String desc = symbol.pos + "\t" + symbol.line + "\t" + symbol.toString() System.out.print(desc + "\r\n") writer.write(desc + "\r\n") } writer.flush() } /*打印错误*/ public void printError(Token tok) throws IOException{ FileWriter writer = new FileWriter("E:\\error.txt") System.out.print("\r\n\r\n错误词法如下:\r\n") writer.write("错误词法如下:\r\n") String str = "line " + tok.line + "\t(" + tok.tag + "," + tok.pos + ")\t\t" + tok.name + ": " + tok.toString() + "\r\n" writer.write(str) } /*添加保留字*/ void reserve(KeyWord w) { keywords.put(w.lexme, w) } public Lexer() { /*初始化读取文件变量*/ try { reader = new BufferedReader(new FileReader("E:\\输入.txt")) } catch (IOException e) { System.out.print(e) } /*添加保留字*/ this.reserve(KeyWord.begin) this.reserve(KeyWord.end) this.reserve(KeyWord.integer) this.reserve(KeyWord.function) this.reserve(KeyWord.read) this.reserve(KeyWord.write) this.reserve(KeyWord.aIf) this.reserve(KeyWord.aThen) this.reserve(KeyWord.aElse) } /*按字符读*/ public void readch() throws IOException { character = (char) reader.read() if ((int) character == 0xffff) { this.isEnd = true } } /*判断是否匹配*/ public Boolean readch(char ch) throws IOException { readch() if (this.character != ch) { return false } this.character = ' ' return true } /*数字的识别*/ public Boolean isDigit() throws IOException { if (Character.isDigit(character)) { int value = 0 while (Character.isDigit(character)) { value = 10 * value + Character.digit(character, 10) readch() } Num n = new Num(value) n.line = line tokens.add(n) return true } else return false } /*保留字、标识符的识别*/ public Boolean isLetter() throws IOException { if (Character.isLetter(character)) { StringBuffer sb = new StringBuffer() /*首先得到整个的一个分割*/ while (Character.isLetterOrDigit(character)) { sb.append(character) readch() } /*判断是保留字还是标识符*/ String s = sb.toString() KeyWord w = keywords.get(s) /*如果是保留字的话,w不应该是空的*/ if (w != null) { w.line = line tokens.add(w) } else { /*否则就是标识符,此处多出记录标识符编号的语句*/ Symbol sy = new Symbol(s) Symbol mark = sy //用于标记已存在标识符 Boolean isRepeat = false sy.line = line for (Symbol i : symtable) { if (sy.toString().equals(i.toString())) { mark = i isRepeat = true } } if (!isRepeat) { sy.pos = symtable.size() + 1 symtable.add(sy) } else if (isRepeat) { sy.pos = mark.pos } tokens.add(sy) } return true } else return false } /*符号的识别*/ public Boolean isSign() throws IOException { switch (character) { case '#': readch() AllEnd.allEnd.line = line tokens.add(AllEnd.allEnd) return true case '\r': if (readch('\n')) { readch() LineEnd.lineEnd.line = line tokens.add(LineEnd.lineEnd) line++ return true } case '(': readch() Delimiter.lpar.line = line tokens.add(Delimiter.lpar) return true case ')': readch() Delimiter.rpar.line = line tokens.add(Delimiter.rpar) return true case '': readch() Delimiter.sem.line = line tokens.add(Delimiter.sem) return true case '+': readch() CalcWord.add.line = line tokens.add(CalcWord.add) return true case '-': readch() CalcWord.sub.line = line tokens.add(CalcWord.sub) return true case '*': readch() CalcWord.mul.line = line tokens.add(CalcWord.mul) return true case '/': readch() CalcWord.div.line = line tokens.add(CalcWord.div) return true case ':': if (readch('=')) { readch() CalcWord.assign.line = line tokens.add(CalcWord.assign) return true } break case '>': if (readch('=')) { readch() CalcWord.ge.line = line tokens.add(CalcWord.ge) return true } break case '<': if (readch('=')) { readch() CalcWord.le.line = line tokens.add(CalcWord.le) return true } break case '!': if (readch('=')) { readch() CalcWord.ne.line = line tokens.add(CalcWord.ne) return true } break } return false } /*下面开始分割关键字,标识符等信息*/ public Token scan() throws IOException { Token tok while (character == ' ') readch() if (isDigit() || isSign() || isLetter()) { tok = tokens.get(tokens.size() - 1) } else { tok = new Token(character) printError(tok) } return tok }}欢迎分享,转载请注明来源:内存溢出
评论列表(0条)