编译器DIY——词法分析
在上一篇文章中已经介绍了读文件的操作,那么这一篇文章中将会细致解释词法分析。
在源文件里解析出的单词流必须识别为保留字,标识符,常量,操作符和界符五大类
1.显然我们须要列举出全部的保留字,而这里与保留字相似的那么就是标识符,在C语言中,保留字都是以小写字母开头,并且当中的字母仅仅能是小写字母,而标识符的第一个字母则必须为字符(小写大写皆可)后面能够接大写和小写字母和字符 ‘_’, 在我写的这个编译器中,标识符不能超过100,在C语言中的标识符定义的长度大小远远大于此。
2.对于常量,这里须要注意的是整型和浮点型常量。
3.运算符依照的是以下的表:
C语言运算符表
运算符依照优先级大小由上向下排列,在同一行的运算符具有同样优先级。第二行是全部的一元运算符。
运算符 | 解释 | 结合方式 |
() [] -> . | 括号(函数等),数组,两种结构成员訪问 | 由左向右 |
! ~ ++ -- + -
* & |
否定,按位否定,增量,减量,正负号,
间接,取地址 |
由右向左 |
* / % | 乘,除,取模 | 由左向右 |
+ - | 加,减 | 由左向右 |
<< >> | 左移,右移 | 由左向右 |
< <= >= > | 小于,小于等于,大于等于,大于 | 由左向右 |
== != | 等于,不等于 | 由左向右 |
& | 按位与 | 由左向右 |
^ | 按位异或 | 由左向右 |
| | 按位或 | 由左向右 |
&& | 逻辑与 | 由左向右 |
|| | 逻辑或 | 由左向右 |
? : | 条件 | 由右向左 |
= += -= *= /=
&= ^= |= <<= >>= |
各种赋值 | 由右向左 |
, | 逗号(顺序) | 由左向右 |
4.界符:“;”“{}”,单引號,双引號
接下来我介绍的是对保留字的归类,为了查找方便,将保留字依照a-z的顺序排好,根据数组的下标定位,降低寻找的时间
/*
* keyword.h
*
* Created on: Jun 12, 2014
*
*/ #ifndef KEYWORD_H_
#define KEYWORD_H_ struct keyword{
char *keyName;
}; static struct keyword key__[]={
{"__int64"},
{"end"}
}; static struct keyword key_A[]={
{"auto"},
{"end"}
};
static struct keyword key_B[]={
{"break"},
{"end"}
};
static struct keyword key_C[]={
{"case"},
{"char"},
{"const"},
{"continue"},
{"end"}
};
static struct keyword key_D[]={
{"default"},
{"do"},
{"double"},
{"end"}
};
static struct keyword key_E[]={
{"else"},
{"enum"},
{"extern"},
{"end"}
};
static struct keyword key_F[]={
{"float"},
{"for"},
{"end"}
};
static struct keyword key_G[]={
{"goto"},
{"end"}
};
static struct keyword key_H[]={
{"end"}
};
static struct keyword key_I[]={
{"if"},
{"int"},
{"end"}
};
static struct keyword key_J[]={
{"end"}
};
static struct keyword key_K[]={
{"end"}
};
static struct keyword key_L[]={
{"long"},
{"end"}
};
static struct keyword key_M[]={
{"end"}
};
static struct keyword key_N[]={
{"end"}
};
static struct keyword key_O[]={
{"end"}
};
static struct keyword key_P[]={
{"end"}
};
static struct keyword key_Q[]={
{"end"}
};
static struct keyword key_R[]={
{"register"},
{"return"},
{"end"}
};
static struct keyword key_S[]={
{"short"},
{"signed"},
{"sizeof"},
{"static"},
{"struct"},
{"switch"},
{"end"}
};
static struct keyword key_T[]={
{"typedef"},
{"end"}
};
static struct keyword key_U[]={
{"union"},
{"unsigned"},
{"end"}
};
static struct keyword key_V[]={
{"void"},
{"volatile"},
{"end"}
};
static struct keyword key_W[]={
{"while"},
{"end"}
};
static struct keyword key_X[]={
{"end"}
};
static struct keyword key_Y[]={
{"end"}
};
static struct keyword key_Z[]={
{"end"}
};
// size is 27
static struct keyword *keywords[]={
key__,key_A,key_B,key_C,key_D,key_E,
key_F,key_G,key_H,key_I,key_J,key_K,
key_L,key_M,key_N,key_O,key_P,key_Q,
key_R,key_S,key_T,key_U,key_V,key_W,
key_X,key_Y,key_Z
}; #endif /* KEYWORD_H_ */
以下是词法分析的源代码;
/*
* lex.h
*
* Created on: Jun 13, 2014
*
*/
#include "input.h"
#include "keyword.h" #define isDigit(c) (c>='0' && c<='9')
#define isUpperLetter(c) (c>='A' && c <='Z')
#define isLowerLetter(c) (c>='a' && c<='z')
#define isLetter(c) (isUpperLetter || isLowerLetter)
/*
* lex.c
*
* Created on: Jun 13, 2014
*
*/
#include "zcc.h"
#include "lex.h" #define curr source.cursor int getToken() {
char a[100];
int a_length, i, flag;
/*
*skip ' ','\n' and '\b'
*/
while (*curr == ' ' || *curr == 10 || *curr == 9) {
curr++;
if (*curr == END_OF_FILE) {
return -1;
}
}
/* name or keyword on first is a-z */
a_length=0;
if (*curr >= 'a' && *curr <= 'z') {
IDAndKey:
a_length = 0;
do {
a[a_length++] = *curr++;
} while ( isDigit(*curr) || isUpperLetter(*curr) || isLowerLetter(*curr)
|| *curr == '_');
a[a_length] = '\0';
i = 0;
flag = 0;
if (*a - 'a' <= 26 && *a - 'a' >= 0) {
while (strcmp(keywords[*a - 'a' + 1][i].keyName, "end") != 0) {
if (strcmp(keywords[*a - 'a' + 1][i].keyName, a) == 0) {
flag = 1;
break;
}
i++;
}
if (flag == 1) {
printf("keyword is %s\n", a);
return 1;
} else {
printf("Identify is %s\n", a);
return 1;
}
} else {
printf("Identify is %s\n", a);
return 1;
}
} else if (isUpperLetter(*curr)) {
goto IDAndKey;
} else if (isDigit(*curr)) {
a_length = 0;
do {
a[a_length++] = *curr++;
} while (isDigit(*curr));
//float number
if (*curr == '.') {
do {
a[a_length++] = *curr++;
} while (isDigit(*curr));
a[a_length] = '\0';
printf("float number is %s\n", a);
return 1;
} else {
// number
a[a_length] = '\0';
printf("number is %s\n", a);
return 1;
}
/*
* Operator begin
* */
} else if (*curr == '<') {
a[a_length++] = *curr++;
if (*curr == '<') {
a[a_length++] = *curr++;
lastOperatorDeal:
a[a_length] = '\0';
printf("Operator is %s\n", a);
return 1;
} else if (*curr == '=') {
a[a_length++] = *curr++;
goto lastOperatorDeal;
} else {
goto lastOperatorDeal;
}
} else if (*curr == '>') {
a[a_length++] = *curr++;
if (*curr == '>') {
a[a_length++] = *curr++;
goto lastOperatorDeal;
} else if (*curr == '=') {
a[a_length++] = *curr++;
goto lastOperatorDeal;
} else {
goto lastOperatorDeal;
} } else if (*curr == '=') {
a[a_length++] = *curr++;
if (*curr == '=') {
a[a_length++] = *curr++;
goto lastOperatorDeal;
} else {
goto lastOperatorDeal;
}
} else if (*curr == '(') {
singleOperator:
a[a_length++] = *curr++;
goto lastOperatorDeal;
} else if (*curr == ')') {
goto singleOperator;
} else if (*curr == '[') {
goto singleOperator;
} else if (*curr == ']') {
goto singleOperator;
} else if (*curr == '-') {
a[a_length++] = *curr++;
if (*curr == '>') {
a[a_length++] = *curr++;
goto lastOperatorDeal;
} else if (*curr == '-') {
a[a_length++] = *curr++;
goto lastOperatorDeal;
} else if (*curr == '=') {
a[a_length++] = *curr++;
goto lastOperatorDeal;
} else {
goto lastOperatorDeal;
}
}else if(*curr=='.'){
goto singleOperator;
}else if(*curr=='!'){
a[a_length++]=*curr++;
if(*curr=='='){
goto singleOperator;
}else{
goto lastOperatorDeal;
}
}else if(*curr=='~'){
goto singleOperator;
}else if(*curr=='+'){
a[a_length++]=*curr++;
if(*curr=='+'){
goto singleOperator;
}else if(*curr=='='){
goto singleOperator;
}else {
goto lastOperatorDeal;
}
}else if(*curr=='-'){
a[a_length++]=*curr++;
if(*curr=='-'){
goto singleOperator;
}else if(*curr=='='){
goto singleOperator;
}else {
goto lastOperatorDeal;
}
}else if(*curr=='*'){
a[a_length++]=*curr++;
if(*curr=='='){
goto singleOperator;
}else{
goto lastOperatorDeal;
}
}else if(*curr=='&'){
a[a_length++]=*curr++;
if(*curr=='&'){
goto singleOperator;
}else if(*curr=='='){
goto singleOperator;
}else{
goto lastOperatorDeal;
}
}else if(*curr=='/'){
a[a_length++]=*curr++;
if(*curr=='='){
goto singleOperator;
}if(*curr=='/'){
// skip line
while(*curr!='\n'){
if(*curr==END_OF_FILE)
return -1;
curr++;
}
}else if(*curr=='*'){
curr++;
// skip "/**/"
while(*curr!=END_OF_FILE)
{
if(*curr=='*' && *(curr+1)=='/'){
curr+=2;
break;
}
curr++;
}
}else{
goto lastOperatorDeal;
}
}else if(*curr=='%'){
a[a_length++]=*curr++;
if(*curr=='d'){
goto singleOperator;
}else if(*curr=='c'){
goto singleOperator;
}else if(*curr=='f'){
goto singleOperator;
}else if(*curr=='l'){
a[a_length++]=*curr++;
if(*curr=='d')
goto singleOperator;
else if(*curr=='f')
goto singleOperator;
else
goto singleOperator;
} }else if(*curr=='^'){
a[a_length++]=*curr++;
if(*curr=='='){
goto singleOperator;
}else{
goto lastOperatorDeal;
}
}else if(*curr=='|'){
a[a_length++]=*curr++;
if(*curr=='|'){
goto singleOperator;
}else if(*curr=='='){
goto singleOperator;
}else{
goto lastOperatorDeal;
}
}else if(*curr=='?'){
goto singleOperator;
}else if(*curr==':'){
goto singleOperator;
}else if(*curr==','){
goto singleOperator;
}else if(*curr=='\\'){
a[a_length++]=*curr++;
if(*curr=='n'){
goto singleOperator;
}else {
goto lastOperatorDeal;
} }
/*
* Operator end
* */
/*
* delimiter begin
* */
else if(*curr=='{'){
singleDelimiter:
a[a_length++]=*curr++;
a[a_length]='\0';
printf("Delimiter is %s\n", a);
return 1;
}else if(*curr=='}'){
goto singleDelimiter;
}else if(*curr==';'){
goto singleDelimiter;
}else if(*curr=='\''){
goto singleDelimiter;
}else if(*curr=='\"'){
goto singleDelimiter;
}
}
这里实现了将单词分成五类流,并将单词打印出来,在后面的语法分析中将会使用到这里的单词流结果。
忘了说了,我将自己写的编译器命名为:ZCC,头文件都包括在zcc.h中(*^__^*) 嘻嘻……,想写个类似与gcc 一样奇妙的玩意。
最后看測试文档:
struct Student{
int a;
char* name;
} int main()
{
int a=123;
float a2=1.2345677;
int b=1+3;
for(int i=0; i < 100; i++)
a+=i;
printf("%d\n", a);
return 0;
}
測试结果:
keyword is struct
Identify is Student
Delimiter is {
keyword is int
Identify is a
Delimiter is ;
keyword is char
Operator is *
Identify is name
Delimiter is ;
Delimiter is }
keyword is int
Identify is main
Operator is (
Operator is )
Delimiter is {
keyword is int
Identify is a
Operator is =
number is 123
Delimiter is ;
keyword is float
Identify is a2
Operator is =
float number is 1.2345677
Delimiter is ;
keyword is int
Identify is b
Operator is =
number is 1
Operator is +
number is 3
Delimiter is ;
keyword is for
Operator is (
keyword is int
Identify is i
Operator is =
number is 0
Delimiter is ;
Identify is i
Operator is <
number is 100
Delimiter is ;
Identify is i
Operator is ++
Operator is )
Identify is a
Operator is +=
Identify is i
Delimiter is ;
Identify is printf
Operator is (
Delimiter is "
Operator is %d
Operator is \n
Delimiter is "
Operator is ,
Identify is a
Operator is )
Delimiter is ;
keyword is return
number is 0
Delimiter is ;
Delimiter is }
做到这里,能够告一小段落了,接下来做的事情就是语法分析。
编译器DIY——词法分析的更多相关文章
- 编译器DIY——读文件
编译器的前端词法分析:将源文件解析成一个个的单词流.为语法分析做准备. 在词法分析阶段,我们要做的就是将词分出来,而且确定单词的类型,一般的程序设计语言的单词符号能够份为下面5种: 1.keyword ...
- atitit.词法分析的实现token attilax总结
atitit.词法分析的实现token attilax总结 1. 词法分析(英语:lexical analysis)跟token 1 1.1. 扫描器 2 2. 单词流必须识别为保留字,标识符(变量) ...
- Atitit.注解解析(1)---------词法分析 attilax总结 java .net
Atitit.注解解析(1)---------词法分析 attilax总结 java .net 1. 应用场景:::因为要使用ui化的注解 1 2. 流程如下::: 词法分析(生成token流) & ...
- Atitit.注解and属性解析(2)---------语法分析 生成AST attilax总结 java .net
Atitit.注解and属性解析(2)---------语法分析 生成AST attilax总结 java .net 1. 应用场景:::因为要使用ui化的注解 1 2. 使用解释器方式来实现生成 ...
- Atitit. 解释器模式框架选型 and应用场景attilax总结 oao
Atitit. 解释器模式框架选型 and应用场景attilax总结 oao 1. 解释器模式结构描述 1 2. 如何实现(简单的解释器模式,仅仅通过词法分析即可实现,而无需token流进行处理. 2 ...
- 翻译器DIY它———算在英文文本中的单词数,字符和行数
咳咳.这部分应该是序列化编译器DIY的,然而,在这样做DIY第一次使用前flex 为了练练手,对于后者的理解是有帮助. 在word 我经常看到一个字计数功能,因此,它是如何实现,当然,首先想到的是要经 ...
- atitit.词法分析原理 词法分析器 (Lexer)
atitit.词法分析原理 词法分析器 (Lexer) 1. 词法分析(英语:lexical analysis)1 2. :实现词法分析程序的常用途径:自动生成,手工生成.[1] 2 2.1. 词法分 ...
- 15个C++项目列表
实验楼上有很多C++的实战项目,从简单到进阶,学习每个项目都可以掌握相应的知识点. 如果你还是C++新手的话,那么这个C++的项目列表你可以拿去练手实战开发,毕竟学编程动手实践是少不了的! 如果你不知 ...
- 从零开始山寨Caffe·伍:Protocol Buffer简易指南
你为Class外访问private对象而苦恼嘛?你为设计序列化格式而头疼嘛? ——欢迎体验Google Protocol Buffer 面向对象之封装性 历史遗留问题 面向对象中最矛盾的一个特性,就是 ...
随机推荐
- Android API在不同版本系统上的兼容性
随着安卓版本的不断更新,新的API不断涌出,有时候高版本的API会在低版本crash的. 如果minSdkVersion设置过低,在build的时候,就会报错(Call requires API le ...
- 使QQ窗口八字形转圈
//先有思路 后有代码 总是不知不觉中乱敲一通 今天做个标记 感谢老师课堂上的讲解#include <stdio.h> #include <math.h> #include & ...
- Servlet学习第一天--Servlet开发映射URL配置
基础不扎实,从头学,认真记录笔记. 感谢@孤傲苍狼:http://www.cnblogs.com/xdp-gacl/p/3760336.html -为什么要配置? 由于客户端是通过URL访问web服务 ...
- 自绘Tab控件
自绘tab按钮效果图如下: 使用例子: MyTabControl *tabControl = NULL; tabControl = new MyTabControl();tabControl-> ...
- Qt for Windows:使用WinPcap开发高性能UDP服务器
首先介绍一下WinPcap WinPcap是Windows下一个网络库,性能极其强悍而且能够接收各种包. 大名鼎鼎的WireShark就是基于这个库开发的. 那么这个库性能到底有多高呢. 我测试了UD ...
- android 传感器使用 Compass指南针的实现功能
以下是指南针通过方向传感器而旋转实现. CompassDemo.java: package com.example.activity; import android.app.Activity; imp ...
- 创建自己的yum软件源(以Cloudera Hadoop的安装为例)
.下载Cloudera Manager安装文件 Cloudera Manager的可以从如下网址获得: http://archive.cloudera.com/cm4/installer/ 这里选择C ...
- java 笔试
单例设计模式: public class Singliton { //no new private Singliton (){ } static Singliton ins = null; publi ...
- Hibernate工作流程
Hibernate创建步骤 (五大核心接口:Configuration/SessionFactory/Session/Transaction/Query) 1.新建工程,导入需要的jar包. 2.利用 ...
- SQL serve创建与调用存储过程
(1)创建 2编写存储过程(创建传参的存储过程)存储过程语法网络上很多不在累述 语法解析 Use Person 指定在那个数据库下建立存储过程 if (object_id('MyFunction', ...