您所在位置：网站首页 > 建筑/环境 > 施工组织 > java词法分析器实验报告

java词法分析器实验报告.pdf

15页

卖家[上传人]：M****1

文档编号：574503538

上传时间：2024-08-16

文档格式：PDF

文档大小：575.06KB

文档加载中……请稍候！
如果长时间未打开，您也可以点击刷新试试。

下载文档到电脑，查找使用更方便

15金贝

下载

/ 15 举报版权申诉马上下载

文本预览

下载提示

常见问题

Java 词法分析器实验报告 --07111101 --奥特曼一．词法分析器功能概述： 1. 使用DFA实现词法分析器的设计； 2. 实现对Java源程序中注释和空格(空行)的过滤； 3. 利用两对半缓冲区从文件中逐一读取单词； 4. 词法分析结果属性字流存放在独立文件(c:\words.txt)中； 5. 统计源程序所有单词数以、错误单词数、单词所在的行数； 6. 具有报告词法错误和出错位置（源程序行号）的功能；二．源程序设计实现： //程序大部分参照网络，自己做了小部分改动 #include #include #include #include #include #include "const.h" using namespace std; char rbuf[RBUFSIZE]; //读文件缓冲区 int rp; //读文件缓冲区指针 char ch; //当前扫描到的字符 int type; //单词的类型 char sbuf[SBUFSIZE]; //单词字符串缓冲区 int sp; //单词字符串缓冲区指针 ifstream inFile; //输入文件 ofstream outFile; //输出文件 void clear_rbuf()//清空读文件缓冲区 { int i; for(i=0;i='0'&&c<='7') result = (int)(c - '0'); else if(c>='8'&&c<='9') { if (base > 8) result=(int)(c-'0'); else result = -1; } else if(c>='a'&&c<= 'f') { if (base>10) result=(int)(c-'a'+10); else result=-1; } else if (c>='A'&&c<='F') { if (base>10) result=(int)(c-'A'+10); else result=-1; } else result=-1; return result; } void scan_fraction()//扫描指数 { while(digit(10)>=0) { put_ch(ch); get_ch(); } if(ch=='e'||ch=='E') { put_ch(ch); get_ch(); if(ch=='+'||ch=='-') { put_ch(ch); get_ch(); } while(digit(10)>=0) { put_ch(ch); get_ch(); } return; } return; } void scan_suffix() //扫描浮点数后缀 { scan_fraction(); if(ch=='f'||ch=='F'||ch=='d'||ch=='D') { put_ch(ch); get_ch(); } type=T_FLOAT; return; } bool is_spectial(char &ch)//判断字符是否是特殊字符 { if(ch=='!'||ch=='%'||ch=='&'||ch=='*'||ch=='?'||ch=='+'||ch=='-'||ch==':'||ch=='<'||ch=='='||ch=='>'||ch=='^'||ch=='|'||ch=='~') return true; else return false; } void scan_operator()//扫描运算符 { while (is_spectial(ch)) { put_ch(ch); get_ch(); } get_type(sbuf); if(type==0) type=T_ERROR; return; } void scan_number(int radix)//扫描8、 10、 16进制数值 { while(digit(radix)>=0) { put_ch(ch); get_ch(); } if(radix!=10&&ch=='.') { put_ch(ch); get_ch(); type=T_ERROR; } else if(radix==10&&ch=='.') { put_ch('.'); get_ch(); if(digit(10)>=0) scan_suffix(); } else if(radix==10&&(ch=='e'||ch=='E'||ch=='f'||ch=='F'||ch=='d'||ch=='D')) scan_suffix(); else if(ch == 'l' || ch == 'L') { put_ch(ch); get_ch(); type=T_INT; } else type=T_INT; return; } void skip_comment()//跳过注释内容 { while(ch!='\0') { switch(ch) { case '*': get_ch(); if (ch=='/') { get_ch(); return; } break; default: get_ch(); break; } } } bool is_idchar(char &ch)//判断字符是否标识符首字符 { return ((ch>='0'&&ch<='9')||(ch>='A'&&ch<='Z')||(ch>='a'&&ch<='z')||ch=='$'||ch=='_'); } void scan_ident()//搜索关键字、标识符 { bool id_or_key = true; bool tem=true;//是否仍是标识符或关键字 while(ch!=C_TAB&&ch!=C_FF&&ch!=C_CR&&ch!=C_LF&&ch!='\0') { if(is_idchar(ch)) { put_ch(ch); get_ch(); if(is_idchar(ch)) continue; else get_type(sbuf); if(type!=0) return; else { type=T_IDENTIFIER; return; } } } } void scan_char()//转义字符搜索字符 { int oct = 0; int hex = 0; if(ch=='\\') { get_ch(); if(ch=='\\') put_ch('\\');get_ch(); if(ch=='\'') put_ch('\'');get_ch(); if(ch=='\"') put_ch('\"');get_ch(); if(ch=='b') put_ch('\b');get_ch(); if(ch=='t') put_ch('\t');get_ch(); if(ch=='n') put_ch('\n');get_ch(); if(ch=='f') put_ch('\f');get_ch(); if(ch=='r') put_ch('\r');get_ch(); if('0'<=ch&&ch<='7') { oct=digit(8); get_ch(); if('0'<=ch&&ch<='7') { oct=oct*8+digit(8); get_ch(); if('0'<=ch&&ch<='7') { oct=oct*8+digit(8); get_ch(); } } put_ch((char)oct); } if(ch=='u') { get_ch(); if(('0'<=ch&&ch<='9')||('a'<=ch&&ch<='f')||('A'<=ch&&ch<='F')) { hex=hex*16+digit(16); get_ch(); if(('0'<=ch&&ch<='9')||('a'<=ch&&ch<='f')||('A'<=ch&&ch<='F')) { hex=hex*16+digit(16); get_ch(); if(('0'<=ch&&ch<='9')||('a'<=ch&&ch<='f')||('A'<=ch&&ch<='F')) { hex=hex*16+digit(16); get_ch(); if(('0'<=ch&&ch<='9')||('a'<=ch&&ch<='f')||('A'<=ch&&ch<='F')) { hex=hex*16+digit(16); get_ch(); } } } } put_ch((char)hex); } } else { put_ch(ch); get_ch(); } } void get_word()//获取下一个单词及属性 { clear_sbuf(); type=0; while (ch!='\0') { if((ch>='A'&&ch<='Z')||(ch>='a'&&ch<='z')||ch=='$'||ch=='_')//关键字、标识符 { scan_ident(); return; } else if(ch=='\'')//字符 { get_ch(); if(ch=='\'') { type=T_ERROR; strcpy(sbuf,"''"); get_ch(); } else { scan_char(); if(ch=='\'') { type=T_CHAR; get_ch(); } else type=T_ERROR; } return; } else if(ch=='\"')//字符串 { get_ch(); if(ch=='\"') { type=T_ERROR; strcpy(sbuf,"\"\""); get_ch(); } else { do { scan_char(); } while(ch!='\"'&&ch!=C_TAB&&ch!=C_FF&&ch!=C_CR&&ch!=C_LF); if(ch=='\"') { type=T_STRING; get_ch(); } else type=T_ERROR; } return; } else if(ch=='.')//.开头数字 { put_ch(ch); get_ch(); if(digit(10)>=0) scan_suffix(); else type=T_BOUND; return; } else if(ch=='0')//0开头数字 { put_ch('0'); get_ch(); if(ch=='x'||ch=='X') { put_ch(ch); get_ch(); if(digit(16)>=0&&ch!='0') scan_number(16); } else if(digit(8)>=0&&ch!='0') scan_number(8); else if(ch=='.') { put_ch('.'); get_ch(); if(digit(10)>=0) scan_suffix(); } else if(ch==' ') { get_ch(); type=T_INT; } else type=T_ERROR; return; } else if('1'<=ch&&ch<='9')//1-9开头数字 { scan_number(10); return; } else if((ch=='(')||(ch==')')||(ch=='[')||(ch==']'))//9个界限符中的8个 { put_ch(ch); get_ch(); type = T_BOUND; return; } else if(ch==',') { put_ch(ch); get_ch(); type = T_COMMA; return; } else if((ch=='{')||(ch=='}')) { put_ch(ch); get_ch(); type = T_BRACKET; return; } else if(ch==';') { put_ch(ch); get_ch(); type = T_SEMICOLON; return; } else if(ch=='/')//注释、'/'运算符、 '/='运算符 { get_ch(); if(ch=='/') { while(ch!=C_CR&&ch!=C_LF&&ch!='\0') get_ch(); break; } else if(ch=='*') { get_ch(); skip_comment(); } else if(ch=='=') { strcpy(sbuf, "/="); type=T_ASSIGN; get_ch(); } else { strcpy(sbuf, "/"); type=T_MULDIV; } return; } else if(is_spectial(ch))//特殊字符 { scan_operator(); return; } else get_ch();//间隔符 } } void readfile(char * fn_in)//将源文件读入缓冲区 { rp = 0; inFile.open(fn_in); if (!inFile.is_open()) return; while(inFile.get(rbuf[rp])) rp++; inFile.close(); rp = 0; } void writefile()//向输出文件写字符 { sp = 0; outFile << "(0x" << hex << type << ") "; outFile << "["; while(sbuf[sp]!='\0') { outFile << sbuf[sp]; sp++; } outFile << "]"; outFile << endl; sp = 0; } int main(int argc, char * argv[]) { char fn_in[NAMESIZE]; char fn_out[NAMESIZE]; cout << "Input the name of Java source file: "; cin >> fn_in; readfile(fn_in); cout << "Input name of testing result file: "; cin >> fn_out; outFile.open(fn_out); get_ch(); while(ch!='\0') { get_word(); if(strlen(sbuf)!=0) writefile(); } outFile.close(); cout << "The analysis has been completed!" << endl; system("pause"); return 0; } 三.程序执行流程 a.首先从 Java 文件中读取半个缓冲区的字符串读入预处理缓冲区中，将缓冲区中的注释、空行、空格全部处理，最后预处理缓冲区里面只剩下单词、一个空格、换行； b.将预处理缓冲区里面的的数据分两次读入两对半缓冲区ScanBuffer 中，送入词法分析器 wordScanner 进行逐个单词分析，由wordScanner调用相应的转换函数进行单词属性的分析。

四.心得体会不知经历了几个不眠之夜，前后一共花了将近两星期时间 Java 词法分析器终于横空出世回想这两个礼拜的艰苦历程，一开始真不知道如何下手，真是块烫手的芋头，徘徊挣扎了两天，最后横下一条心：拼了！！从预处理缓冲区开始着手，一个模块就花了一天时间，BUG 仍旧阴魂不散，郁闷之中，只能另辟蹊径，直入主题，先实现分析一类单词的功能，标识符 char *keyWordOrIdentifierOrBool( char *word )，首先想到的是这么多字母还有数字怎么处理，总不能一个个 switch…case 吧，于是我想到把所有字母和数字先用函数 dealChar()处理一下，把所有字母统一归为’ a’ ，数字归为’ 8’ ，这样就好办多了，期间又花了将近两天时间，改了又改，测了又测，终于喜见眉梢“哇咔咔接下来就是依葫芦画瓢了，相继攻克了其它各个功能，花了三天时间，差不多是时候了，最后压轴的是数字的处理，我处理的比较简单最后的调试阶段是比较揪心的，程序各个功能的实现基本正常，最要命的还是那个预处理缓冲区，我是一串串读入的，所以处理起来比挨个读入的要困难得多，悔不该当初用 fgets，不过路已经选择了就没有退路了，定于 2010-5-28 日这个黑色星期五与魔鬼决战，泡信息楼一天终于将其勉强降服，杀青时间为 2010-5-28 日 19:30 分，凯歌在耳旁奏响。

再回首，往事如梦，虽然整个程序仍有 BUG 作怪，只因本人编程水平有限，但构成生命的细胞元—单词，完全是由本人经过深思熟虑挨个敲出来的，算对的起祖国、对得起老师、对得起父母，对得起我的电脑！五．DFA 略图数字|字母|下划线|$ 0 1 2 字母|$|_ 其它运算符运算符其它界限符！=’‛’ ‚ !=’ \’’ ‘ \’’ 数字|.|e|f|L 数字其它 Other 4 5 6 7 8 14 13 。

点击阅读更多内容