编译原理实验(一)—— 源程序的预处理及词法分析程序的设计与实现(C语言实现)

编译原理实验(一)—— 源程序的预处理及词法分析程序的设计与实现(C语言实现),第1张

编译原理实验(一)—— 源程序预处理及词法分析程序的设计与实现(C语言实现)

大家好,下面介绍的是我当时上编译原理所做的实验,主要内容就是先对源程序进行预处理,然后再对处理过的程序进行词法分析。用的是最基本的C语言写的,如有不足,欢迎大家批评指正!

一、实验目的

设计并实现一个包含预处理功能的词法分析程序,加深对编译中词法分析过程的理解。

二、 实验要求

1、实现预处理功能

源程序中可能包含有对程序执行无意义的符号,要求将其剔除。
首先编制一个源程序的输入过程,从键盘、文件或文本框输入若干行语句,依次存入输入缓冲区(字符型数据);然后编制一个预处理子程序,去掉输入串中的回车符、换行符和跳格符等编辑性文字;把多个空白符合并为一个;去掉注释。
2、实现词法分析功能

输入:所给文法的源程序字符串。
输出:二元组(syn,token或sum)构成的序列。其中,
syn为单词种别码。
Token为存放的单词自身字符串。
Sum为整型常量。
具体实现时,可以将单词的二元组用结构进行处理。

三、实验设计

1)首先编写一个预处理子程序,用于读取文件并且去除文件中的回车换行、将多个空格合并成一个空格
2)然后再编写一个分析子程序,用于分析经过处理后的程序,并且识别枚举类型和共用体类型,识别struct,enum,识别&&和||、++和–,==、识别!+,-=,+=、识别浮点数、识别指数、识别指针变量、识别字符串、去掉多行注释、识别错误信息,数字后面跟有字母,123fds(这个有个bug,识别不了16进制的数字,比如123efh,他其实是一个16进制数,但是也会当作错误信息)、对处理程序的大小没有限制
3)显示处理过的程序并保存在相应的文件中

四、代码实现及运行效果

运行效果

代码由三个文件构成,如下图所示

其中,Hong.h中主要是对一下常用的变量进行宏定义,fun.h中是对各种方法的实现,main.c则是对fun.h中实现的方法的简单的调用,一些需要注意的地方都在代码中以注释的形式展现,话不多说,上代码!

Hong.h

#include 
#define SizeRes 60
#define Sizestr 20
char ch;                                          //字符变量,存访最新读进的源程序字符
char strToken[Sizestr];                           //字符数组,存访构成单词符号的字符串
int GetBC();                                      //子程序过程,检查ch中的字符是否为空白,若是,则调用GetChar(),直至ch中进入一个非空白字符
int IsLetter();                                   //布尔函数过程,判断ch是否为字母
int IsDigit();                                    //布尔函数过程额,判断ch是否是数字
char const *FindRes(char str[]);                        //确定是否为关键字,如果是关键字返回其类型
int IsIdentifier();                               //判断是否是标识符的组成
int IsFloat();                                    //判断是否是浮点数的组成
char *IsIntorFloatorExponent(char string[]);      //判断是整数还是浮点数还是指数
int IsError();                                    //判断数字后面的东西
FILE *InserRes(char const *type, char const *value, FILE *p); //向result文件中输出信息,并返回文件指针
void Yu(char F_open[], char F_out[]);             //预处理程序

//结构体数组
struct s1
{
    char const *type;
    char const *name;
} ReservedWord[SizeRes] = {
    {"1", "main"}, {"2", "if"}, {"3", "then"}, {"4", "while"}, {"5", "do"}, 
    {"6", "static"}, {"7", "int"}, {"8", "double"}, {"9", "struct"}, {"10", "break"}, 
    {"11", "else"}, {"12", "long"}, {"13", "swith"}, {"14", "case"}, {"15", "typedef"}, 
    {"16", "char"}, {"17", "return"}, {"18", "const"}, {"19", "float"}, {"20", "short"}, 
    {"21", "continue"}, {"22", "for"}, {"23", "void"}, {"24", "default"}, {"25", "sizeof"}, 
    {"26", "enum"}, {"27", "union"}, //1、新加的枚举类型和共用体类型
    {"28", "+"},{"29", "-"},{"30", "*"},{"31", "/"},{"32", ":"},{"33", ":="},{"34", "<"},
    {"35", "<>"},{"36", "<="},{"37", ">"},{"38", ">="},{"39", "="},{"40", ";"},{"41", "("},
    {"42", ")"},{"43", "||"},{"44", "&&"},{"45", "{"},{"46", "}"}, //2、新加或运算和与运算
    {"47", "++"},{"48", "--"},{"49", ","},{"50", "=="},{"51", "["},{"52", "]"},{"53", "#"},
    {"54", "include"},{"55", "define"},{"56", "&"},{"57", "."},{"58", "+="},{"59", "-="},
    {"60", "!="} //3、++和--和==
};

fun.h

#include 
#include "Hong.h"
//确定是否为关键字,如果是关键字返回其类型
char const *FindRes(char str[])
{
  for (int i = 0; i < SizeRes; i++)
  {
    if (strcmp(str, ReservedWord[i].name) == 0)
      return ReservedWord[i].type;
  }
  return "ID";
}

//判断ch是否是字母
int IsLetter()
{
  if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
    return 1;
  return 0;
}

//判断是否是标识符的组成
int IsIdentifier()
{
  if (IsLetter() || IsDigit() || ch == '_')
    return 1;
  return 0;
}

//判断是否是浮点数的组成
int IsFloat()
{
  if (IsDigit() || ch == '.' || ch == 'e' || ch == 'E' || ch == '-')
    return 1;
  return 0;
}
//判断是整数还是浮点数还是指数
char *IsIntorFloatorExponent(char string[])
{
  for (int i = 0; string[i] != ''; i++)
  {
    if (string[i] == '.')
      return "float";
    if (string[i] == 'e' || string[i] == 'E')
      return "Exponent";
  }
  return "int";
}

//判断ch是否是数字
int IsDigit()
{
  if (ch >= '0' && ch <= '9')
    return 1;
  return 0;
}

//判断数字后面的东西
int IsError()
{
  if (IsLetter() || IsDigit())
    return 1;
  return 0;
}

//检查ch中是否为空白,如果是,将指针移到第一个不是的地方
int GetBC(char str[], int i)
{
  while (str[i] != '' && str[i] == ' ')
  {
    i++;
  }
  return i;
}

//向result文件中输出信息,并返回文件指针
FILE *InserRes(char const *type, char const *value, FILE *p)
{
  fputs(type, p);
  fputc(',', p);
  fputc('t', p);
  fputs(value, p);
  fputc('n', p);
  return p;
}

//进行单词分割和区分
void Process(char F_open[], char Result_file[], char error_file[])
{
  FILE *fp;
  fp = fopen(F_open, "r");
  FILE *res_file;
  res_file = fopen(Result_file, "w");
  FILE *err_file;
  err_file = fopen(error_file, "w");
  int size = 20;
  char L1[size];
  char temp[size];
  int index = 0;
  int WritedFlag = 0;
  while (fgets(L1, size, fp) != NULL)
  {
    int i = 0;
    while (L1[i] != '')
    {
      ch = L1[i];
      if (IsDigit())
      {
        strToken[index] = ch;
        i++;
        index++;
        if (L1[i] == '') //读到第一个数字,但是是最后一个字符的情况
        {
          fgets(temp, size, fp);
          strcpy(L1, temp);
          i = 0;
        }
        ch = L1[i];
        while (IsFloat())
        {
          strToken[index] = ch;
          i++;
          index++;
          if (L1[i] == '') //如果找到最后,数字被截断了,就把标志状态改一下
          {
            fgets(temp, size, fp);
            strcpy(L1, temp);
            i = 0;
          }
          ch = L1[i];
        }
        if (IsLetter())
        {
          strToken[index] = ch;
          i++;
          index++;
          if (L1[i] == '')
          {
            fgets(temp, size, fp);
            strcpy(L1, temp);
            i = 0;
          }
          ch = L1[i];
          while (IsError())
          {
            strToken[index] = ch;
            i++;
            index++;
            if (L1[i] == '')
            {
              fgets(temp, size, fp);
              strcpy(L1, temp);
              i = 0;
            }
            ch = L1[i];
          }
          strToken[index] = '';
          printf("Errors!    ");
          printf("%sn", strToken);
          fputs(strToken, err_file);
          fputc('n', err_file);
          res_file = InserRes("error", strToken, res_file);
          WritedFlag = 1;
        }
        else
        {
          strToken[index] = '';
          printf("<%st,t%s>n", IsIntorFloatorExponent(strToken), strToken);
          res_file = InserRes(IsIntorFloatorExponent(strToken), strToken, res_file); //向结果result文件中输出信息
          WritedFlag = 1;
        }
      }
      else if (IsLetter())
      {
        ch = L1[GetBC(L1, i)];
        strToken[index] = ch;
        i++;
        index++;
        if (L1[i] == '')
        {
          fgets(temp, size, fp);
          strcpy(L1, temp);
          i = 0;
        }
        ch = L1[i];
        while (IsIdentifier())
        {
          strToken[index] = ch;
          i++;
          index++;
          if (L1[i] == '')
          {
            fgets(temp, size, fp);
            strcpy(L1, temp);
            i = 0;
          }
          ch = L1[i];
        }
        strToken[index] = '';
      }
      else if (ch == '#')
      {
        strToken[index] = ch;
        index++;
        strToken[index] = '';
        i++;
        ch = L1[i];
      }
      else if (ch == '+')
      {
        strToken[index] = ch;
        index++;
        i++;
        if (L1[i] == '')
        {
          fgets(temp, size, fp);
          strcpy(L1, temp);
          i = 0;
        }
        ch = L1[i];
        if (ch == '+')
        {
          strToken[index] = ch;
          i++;
          index++;
          ch = L1[i];
        }
        if (ch == '=')
        {
          strToken[index] = ch;
          i++;
          index++;
          ch = L1[i];
        }
        strToken[index] = '';
      }
      else if (ch == '-')
      {
        strToken[index] = ch;
        index++;
        i++;
        if (L1[i] == '')
        {
          fgets(temp, size, fp);
          strcpy(L1, temp);
          i = 0;
        }
        ch = L1[i];
        if (ch == '-')
        {
          strToken[index] = ch;
          i++;
          index++;
          ch = L1[i];
        }
        if (ch == '=')
        {
          strToken[index] = ch;
          i++;
          index++;
          ch = L1[i];
        }
        strToken[index] = '';
      }
      else if (ch == '!')
      {
        strToken[index] = ch;
        index++;
        i++;
        if (L1[i] == '')
        {
          fgets(temp, size, fp);
          strcpy(L1, temp);
          i = 0;
        }
        ch = L1[i];
        if (ch == '=')
        {
          strToken[index] = ch;
          i++;
          index++;
          ch = L1[i];
        }
        strToken[index] = '';
      }
      else if (ch == '*') //识别乘号和指针变量
      {
        if (L1[i - 1] == ' ' || L1[i - 1] == ';')
        {
          ch = L1[GetBC(L1, i)];
          strToken[index] = ch;
          i++;
          index++;
          if (L1[i] == '')
          {
            fgets(temp, size, fp);
            strcpy(L1, temp);
            i = 0;
          }
          ch = L1[i];
          while (IsIdentifier())
          {
            strToken[index] = ch;
            i++;
            index++;
            if (L1[i] == '')
            {
              fgets(temp, size, fp);
              strcpy(L1, temp);
              i = 0;
            }
            ch = L1[i];
          }
          strToken[index] = '';
        }
        else
        {
          strToken[index] = ch;
          index++;
          strToken[index] = '';
          i++;
          ch = L1[i];
        }
      }
      else if (ch == '/')
      {
        strToken[index] = ch;
        index++;
        strToken[index] = '';
        i++;
        ch = L1[i];
      }
      else if (ch == ':')
      {
        strToken[index] = ch;
        i++;
        index++;
        ch = L1[i];
        if (ch == '=')
        {
          strToken[index] = ch;
          i++;
          index++;
          ch = L1[i];
          strToken[index] = '';
        }
        else
        {
          strToken[index] = '';
        }
      }
      else if (ch == '<')
      {
        strToken[index] = ch;
        i++;
        index++;
        if (L1[i] == '')
        {
          fgets(temp, size, fp);
          strcpy(L1, temp);
          i = 0;
        }
        ch = L1[i];
        if (ch == '>')
        {
          strToken[index] = ch;
          i++;
          index++;
          ch = L1[i];
          //strToken[index] = '';
        }
        else if (ch == '=')
        {
          strToken[index] = ch;
          i++;
          index++;
          ch = L1[i];
          //  strToken[index] = '';
        }
        strToken[index] = '';
      }
      else if (ch == '>')
      {
        strToken[index] = ch;
        i++;
        index++;
        if (L1[i] == '')
        {
          fgets(temp, size, fp);
          strcpy(L1, temp);
          i = 0;
        }
        ch = L1[i];
        if (ch == '=')
        {
          strToken[index] = ch;
          i++;
          index++;
          ch = L1[i];
        }
        strToken[index] = '';
      }
      else if (ch == '=')
      {
        strToken[index] = ch;
        index++;
        i++;
        if (L1[i] == '')
        {
          fgets(temp, size, fp);
          strcpy(L1, temp);
          i = 0;
        }
        ch = L1[i];
        if (ch == '=')
        {
          strToken[index] = ch;
          i++;
          index++;
          ch = L1[i];
        }
        strToken[index] = '';
      }
      else if (ch == ';')
      {
        strToken[index] = ch;
        index++;
        strToken[index] = '';
        i++;
        ch = L1[i];
      }
      else if (ch == '(')
      {
        strToken[index] = ch;
        index++;
        strToken[index] = '';
        i++;
        ch = L1[i];
      }
      else if (ch == ')')
      {
        strToken[index] = ch;
        index++;
        strToken[index] = '';
        i++;
        ch = L1[i];
      }
      else if (ch == '{')
      {
        strToken[index] = ch;
        index++;
        strToken[index] = '';
        i++;
        ch = L1[i];
      }
      else if (ch == '}')
      {
        strToken[index] = ch;
        index++;
        strToken[index] = '';
        i++;
        ch = L1[i];
      }
      else if (ch == '|')
      {
        strToken[index] = ch;
        i++;
        index++;
        if (L1[i] == '')
        {
          fgets(temp, size, fp);
          strcpy(L1, temp);
          i = 0;
        }
        ch = L1[i];
        if (ch == '|')
        {
          strToken[index] = ch;
          i++;
          index++;
          ch = L1[i];
          strToken[index] = '';
        }
      }
      else if (ch == '"')
      {
        i++;
        if (L1[i] == '')
        {
          fgets(temp, size, fp);
          strcpy(L1, temp);
          i = 0;
        }
        ch = L1[i];
        while (ch != '"')
        {
          strToken[index] = ch;
          i++;
          index++;
          if (L1[i] == '')
          {
            fgets(temp, size, fp);
            strcpy(L1, temp);
            i = 0;
          }
          ch = L1[i];
        }
        strToken[index] = '';
        res_file = InserRes("string", strToken, res_file);
        WritedFlag = 1;
        i++;
        ch = L1[i];
      }
      else if (ch == '&')
      {
        strToken[index] = ch;
        i++;
        index++;
        if (L1[i] == '')
        {
          fgets(temp, size, fp);
          strcpy(L1, temp);
          i = 0;
        }
        ch = L1[i];
        if (ch == '&')
        {
          strToken[index] = ch;
          i++;
          index++;
          ch = L1[i];
        }
        strToken[index] = '';
      }
      else
      {
        strToken[index] = ch;
        index++;
        strToken[index] = '';
        i++;
        ch = L1[i];
      }
      if ((!WritedFlag) && (strcmp(" ", strToken) != 0))
      {
        res_file = InserRes(FindRes(strToken), strToken, res_file);
        printf("<%st,t%s>n", FindRes(strToken), strToken);
      }
      WritedFlag = 0; //找出一个字符串就要置0
      index = 0;
      memset(strToken, 0, sizeof strToken);
    }
  }
  printf("finish!n");
}

//预处理程序
void Yu(char F_open[], char F_out[])
{
  FILE *fp;
  fp = fopen(F_open, "r"); //从该文件中读取源程序
  FILE *op;
  op = fopen(F_out, "w"); //将处理源程序的结果放在该文件中
  if (fp != NULL)
  {
    char ch = fgetc(fp);
    while (ch != EOF)
    {
      switch (ch)
      {
      case 'n': //去掉换行,读取文件的时候,自动将'r''n'合并成了'n',所以下面的case 'r'可以省略
        ch = fgetc(fp);
        break;
      case 'r': //去掉回车
        ch = fgetc(fp);
        break;
      case ' ': //将多个空格合并成一个
        printf("%c", ch);
        fputc(ch, op);
        ch = fgetc(fp);
        while (ch != EOF && ch == ' ')
        {
          ch = fgetc(fp);
        }
        break;
      case '/': //去掉多行注释和单行注释
        ch = fgetc(fp);
        if (ch == '/') //去掉单行注释
        {
          ch = fgetc(fp);
          while (ch != EOF && ch != 'n')
          {
            ch = fgetc(fp);
          }
          ch = fgetc(fp); //while结束时ch等于回车换行,所以再往后读一个字符
        }
        else if (ch == '*') //去掉多行注释
        {
          ch = fgetc(fp);
          while (1)
          {
            while (ch != '*')
            {
              ch = fgetc(fp);
            }
            ch = fgetc(fp);
            if (ch == '/')
            {
              ch = fgetc(fp); //向后读一个字符,然后退出while循环
              break;
            }
          }
        }
        else
        {
          printf("%c", '/');
          fputc('/', op);
        }
        break;
      case 't': //去掉tab
        ch = getc(fp);
        break;
      default:
        printf("%c", ch);
        fputc(ch, op);
        ch = fgetc(fp);
        break;
      }
    }
  }
  else
  {
    printf("the file can not openn");
  }
  fclose(fp);
  fclose(op);
  printf("nnYu finish!nn");
}

mian.c

大家可以自行新建文本文件,只需要把对应的文件地址改一下即可,具体说明在注释中

#include "fun.h"

int main()
{
  Yu("E:\test\shiyan1\test.txt", "E:\test\shiyan1\out.txt");//传入文件地址即可,第一个文件里面存的是要处理的源程序,第二个文件存的是预处理过后的程序
  Process("E:\test\shiyan1\out.txt", "E:\test\shiyan1\result.txt", "E:\test\shiyan1\errors.txt");//第一个文件是处理过后的程序,第二个文件存的是最终的结果,第三个文件存的是错误信息
  return 0;
}

写在最后,如果大家觉得这篇文章对你有帮助的话,还请大家赞一下下啦 : )

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/zaji/5702644.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-12-17
下一篇 2022-12-17

发表评论

登录后才能评论

评论列表(0条)

保存