编译器DIY——词法分析

在上一篇文章中已经介绍了读文件的操作，那么这一篇文章中将会细致解释词法分析。

在源文件里解析出的单词流必须识别为保留字，标识符，常量，操作符和界符五大类

1.显然我们须要列举出全部的保留字，而这里与保留字相似的那么就是标识符，在C语言中，保留字都是以小写字母开头，并且当中的字母仅仅能是小写字母，而标识符的第一个字母则必须为字符（小写大写皆可）后面能够接大写和小写字母和字符 ‘_’，在我写的这个编译器中，标识符不能超过100，在C语言中的标识符定义的长度大小远远大于此。

2.对于常量，这里须要注意的是整型和浮点型常量。

3.运算符依照的是以下的表：

C语言运算符表

运算符依照优先级大小由上向下排列，在同一行的运算符具有同样优先级。第二行是全部的一元运算符。

运算符	解释	结合方式
() [] -> .	括号（函数等），数组，两种结构成员訪问	由左向右
! ~ ++ -- + - * &	否定，按位否定，增量，减量，正负号，间接，取地址	由右向左
* / %	乘，除，取模	由左向右
+ -	加，减	由左向右
<< >>	左移，右移	由左向右
< <= >= >	小于，小于等于，大于等于，大于	由左向右
== !=	等于，不等于	由左向右
&	按位与	由左向右
^	按位异或	由左向右
\|	按位或	由左向右
&&	逻辑与	由左向右
\|\|	逻辑或	由左向右
? :	条件	由右向左
= += -= *= /= &= ^= \|= <<= >>=	各种赋值	由右向左
,	逗号（顺序）	由左向右

4.界符：“；”“{}”,单引號，双引號

接下来我介绍的是对保留字的归类，为了查找方便，将保留字依照a-z的顺序排好，根据数组的下标定位，降低寻找的时间

/*

 * keyword.h

 *

 *  Created on: Jun 12, 2014

 *

 */

#ifndef KEYWORD_H_

#define KEYWORD_H_

struct keyword{

	char *keyName;

};

static struct keyword key__[]={

		{"__int64"},

		{"end"}

};

static struct keyword key_A[]={

		{"auto"},

		{"end"}

};

static struct keyword key_B[]={

		{"break"},

		{"end"}

};

static struct keyword key_C[]={

		{"case"},

		{"char"},

		{"const"},

		{"continue"},

		{"end"}

};

static struct keyword key_D[]={

		{"default"},

		{"do"},

		{"double"},

		{"end"}

};

static struct keyword key_E[]={

		{"else"},

		{"enum"},

		{"extern"},

		{"end"}

};

static struct keyword key_F[]={

		{"float"},

		{"for"},

		{"end"}

};

static struct keyword key_G[]={

		{"goto"},

		{"end"}

};

static struct keyword key_H[]={

		{"end"}

};

static struct keyword key_I[]={

		{"if"},

		{"int"},

		{"end"}

};

static struct keyword key_J[]={

		{"end"}

};

static struct keyword key_K[]={

		{"end"}

};

static struct keyword key_L[]={

		{"long"},

		{"end"}

};

static struct keyword key_M[]={

		{"end"}

};

static struct keyword key_N[]={

		{"end"}

};

static struct keyword key_O[]={

		{"end"}

};

static struct keyword key_P[]={

		{"end"}

};

static struct keyword key_Q[]={

		{"end"}

};

static struct keyword key_R[]={

		{"register"},

		{"return"},

		{"end"}

};

static struct keyword key_S[]={

		{"short"},

		{"signed"},

		{"sizeof"},

		{"static"},

		{"struct"},

		{"switch"},

		{"end"}

};

static struct keyword key_T[]={

		{"typedef"},

		{"end"}

};

static struct keyword key_U[]={

		{"union"},

		{"unsigned"},

		{"end"}

};

static struct keyword key_V[]={

		{"void"},

		{"volatile"},

		{"end"}

};

static struct keyword key_W[]={

		{"while"},

		{"end"}

};

static struct keyword key_X[]={

		{"end"}

};

static struct keyword key_Y[]={

		{"end"}

};

static struct keyword key_Z[]={

		{"end"}

};

// size is 27

static struct keyword *keywords[]={

		key__,key_A,key_B,key_C,key_D,key_E,

		key_F,key_G,key_H,key_I,key_J,key_K,

		key_L,key_M,key_N,key_O,key_P,key_Q,

		key_R,key_S,key_T,key_U,key_V,key_W,

		key_X,key_Y,key_Z

};

#endif /* KEYWORD_H_ */

以下是词法分析的源代码;

/*

 * lex.h

 *

 *  Created on: Jun 13, 2014

 *

 */

#include "input.h"

#include "keyword.h"

#define isDigit(c)			(c>='0' && c<='9')

#define isUpperLetter(c)	(c>='A' && c <='Z')

#define isLowerLetter(c)	(c>='a' && c<='z')

#define isLetter(c)			(isUpperLetter || isLowerLetter)

/*

 * lex.c

 *

 *  Created on: Jun 13, 2014

 *

 */

#include "zcc.h"

#include "lex.h"

#define curr source.cursor

int getToken() {

	char a[100];

	int a_length, i, flag;

	/*

	 *skip ' ','\n' and '\b'

	 */

	while (*curr == ' ' || *curr == 10 || *curr == 9) {

		curr++;

		if (*curr == END_OF_FILE) {

			return -1;

		}

	}

	/* name or keyword on first is a-z */

	a_length=0;

	if (*curr >= 'a' && *curr <= 'z') {

		IDAndKey:

		a_length = 0;

		do {

			a[a_length++] = *curr++;

		} while ( isDigit(*curr) || isUpperLetter(*curr) || isLowerLetter(*curr)

				|| *curr == '_');

		a[a_length] = '\0';

		i = 0;

		flag = 0;

		if (*a - 'a' <= 26 && *a - 'a' >= 0) {

			while (strcmp(keywords[*a - 'a' + 1][i].keyName, "end") != 0) {

				if (strcmp(keywords[*a - 'a' + 1][i].keyName, a) == 0) {

					flag = 1;

					break;

				}

				i++;

			}

			if (flag == 1) {

				printf("keyword is %s\n", a);

				return 1;

			} else {

				printf("Identify is %s\n", a);

				return 1;

			}

		} else {

			printf("Identify is %s\n", a);

			return 1;

		}

	} else if (isUpperLetter(*curr)) {

		goto IDAndKey;

	} else if (isDigit(*curr)) {

		a_length = 0;

		do {

			a[a_length++] = *curr++;

		} while (isDigit(*curr));

		//float number

		if (*curr == '.') {

			do {

				a[a_length++] = *curr++;

			} while (isDigit(*curr));

			a[a_length] = '\0';

			printf("float number is %s\n", a);

			return 1;

		} else {

			// number

			a[a_length] = '\0';

			printf("number is %s\n", a);

			return 1;

		}

	/*

	 * Operator begin

	 * */

	} else if (*curr == '<') {

		a[a_length++] = *curr++;

		if (*curr == '<') {

			a[a_length++] = *curr++;

		lastOperatorDeal:

			a[a_length] = '\0';

			printf("Operator is %s\n", a);

			return 1;

		} else if (*curr == '=') {

			a[a_length++] = *curr++;

			goto lastOperatorDeal;

		} else {

			goto lastOperatorDeal;

		}

	} else if (*curr == '>') {

		a[a_length++] = *curr++;

		if (*curr == '>') {

			a[a_length++] = *curr++;

			goto lastOperatorDeal;

		} else if (*curr == '=') {

			a[a_length++] = *curr++;

			goto lastOperatorDeal;

		} else {

			goto lastOperatorDeal;

		}

	} else if (*curr == '=') {

		a[a_length++] = *curr++;

		if (*curr == '=') {

			a[a_length++] = *curr++;

			goto lastOperatorDeal;

		} else {

			goto lastOperatorDeal;

		}

	} else if (*curr == '(') {

	    singleOperator:

		a[a_length++] = *curr++;

		goto lastOperatorDeal;

	} else if (*curr == ')') {

		goto singleOperator;

	} else if (*curr == '[') {

		goto singleOperator;

	} else if (*curr == ']') {

		goto singleOperator;

	} else if (*curr == '-') {

		a[a_length++] = *curr++;

		if (*curr == '>') {

			a[a_length++] = *curr++;

			goto lastOperatorDeal;

		} else if (*curr == '-') {

			a[a_length++] = *curr++;

			goto lastOperatorDeal;

		} else if (*curr == '=') {

			a[a_length++] = *curr++;

			goto lastOperatorDeal;

		} else {

			goto lastOperatorDeal;

		}

	}else if(*curr=='.'){

		goto singleOperator;

	}else if(*curr=='!'){

		a[a_length++]=*curr++;

		if(*curr=='='){

			goto singleOperator;

		}else{

			goto lastOperatorDeal;

		}

	}else if(*curr=='~'){

		goto singleOperator;

	}else if(*curr=='+'){

        a[a_length++]=*curr++;

        if(*curr=='+'){

        	goto singleOperator;

        }else if(*curr=='='){

        	goto singleOperator;

        }else {

        	goto lastOperatorDeal;

        }

	}else if(*curr=='-'){

        a[a_length++]=*curr++;

        if(*curr=='-'){

        	goto singleOperator;

        }else if(*curr=='='){

        	goto singleOperator;

        }else {

        	goto lastOperatorDeal;

        }

	}else if(*curr=='*'){

        a[a_length++]=*curr++;

        if(*curr=='='){

        	goto singleOperator;

        }else{

            goto lastOperatorDeal;

        }

	}else if(*curr=='&'){

		a[a_length++]=*curr++;

		if(*curr=='&'){

			goto singleOperator;

		}else if(*curr=='='){

			goto singleOperator;

		}else{

			goto lastOperatorDeal;

		}

	}else if(*curr=='/'){

		a[a_length++]=*curr++;

	    if(*curr=='='){

	    	goto singleOperator;

	    }if(*curr=='/'){

        	// skip line

        	while(*curr!='\n'){

        		if(*curr==END_OF_FILE)

        			return -1;

        		curr++;

        	}

        }else if(*curr=='*'){

        	curr++;

        	// skip "/**/"

            while(*curr!=END_OF_FILE)

            {

            	if(*curr=='*' && *(curr+1)=='/'){

            		curr+=2;

            		break;

            	}

                curr++;

            }

        }else{

        	goto lastOperatorDeal;

        }

	}else if(*curr=='%'){

		a[a_length++]=*curr++;

		if(*curr=='d'){

			goto singleOperator;

		}else if(*curr=='c'){

			goto singleOperator;

		}else if(*curr=='f'){

			goto singleOperator;

		}else if(*curr=='l'){

			a[a_length++]=*curr++;

			if(*curr=='d')

				goto singleOperator;

			else if(*curr=='f')

				goto singleOperator;

			else

				goto singleOperator;

		}

	}else if(*curr=='^'){

		a[a_length++]=*curr++;

	    if(*curr=='='){

	    	goto singleOperator;

	    }else{

	    	goto lastOperatorDeal;

	    }

	}else if(*curr=='|'){

		a[a_length++]=*curr++;

		if(*curr=='|'){

			goto singleOperator;

		}else if(*curr=='='){

			goto singleOperator;

		}else{

			goto lastOperatorDeal;

		}

	}else if(*curr=='?'){

        goto singleOperator;

	}else if(*curr==':'){

        goto singleOperator;

	}else if(*curr==','){

		goto singleOperator;

	}else if(*curr=='\\'){

		a[a_length++]=*curr++;

		if(*curr=='n'){

			goto singleOperator;

		}else {

			goto lastOperatorDeal;

		}

	}

	/*

	 * Operator end

	 * */

	/*

	 * delimiter begin

	 * */

	else if(*curr=='{'){

		singleDelimiter:

		a[a_length++]=*curr++;

		a[a_length]='\0';

		printf("Delimiter is %s\n", a);

		return 1;

	}else if(*curr=='}'){

        goto singleDelimiter;

	}else if(*curr==';'){

		goto singleDelimiter;

	}else if(*curr=='\''){

		goto singleDelimiter;

	}else if(*curr=='\"'){

		goto singleDelimiter;

	}

}

这里实现了将单词分成五类流，并将单词打印出来，在后面的语法分析中将会使用到这里的单词流结果。

忘了说了，我将自己写的编译器命名为：ZCC，头文件都包括在zcc.h中(*^__^*) 嘻嘻……，想写个类似与gcc 一样奇妙的玩意。

最后看測试文档：

struct  Student{

   int a;

   char* name;

}

int main()

{

    int a=123;

    float a2=1.2345677;

    int b=1+3;

    for(int i=0; i < 100; i++)

    		a+=i;

    printf("%d\n", a);

    return 0;

}

測试结果：

keyword is struct

Identify is Student

Delimiter is {

keyword is int

Identify is a

Delimiter is ;

keyword is char

Operator is *

Identify is name

Delimiter is ;

Delimiter is }

keyword is int

Identify is main

Operator is (

Operator is )

Delimiter is {

keyword is int

Identify is a

Operator is =

number is 123

Delimiter is ;

keyword is float

Identify is a2

Operator is =

float number is 1.2345677

Delimiter is ;

keyword is int

Identify is b

Operator is =

number is 1

Operator is +

number is 3

Delimiter is ;

keyword is for

Operator is (

keyword is int

Identify is i

Operator is =

number is 0

Delimiter is ;

Identify is i

Operator is <

number is 100

Delimiter is ;

Identify is i

Operator is ++

Operator is )

Identify is a

Operator is +=

Identify is i

Delimiter is ;

Identify is printf

Operator is (

Delimiter is "

Operator is %d

Operator is \n

Delimiter is "

Operator is ,

Identify is a

Operator is )

Delimiter is ;

keyword is return

number is 0

Delimiter is ;

Delimiter is }

做到这里，能够告一小段落了，接下来做的事情就是语法分析。

编译器DIY——词法分析的更多相关文章

编译器DIY——读文件
编译器的前端词法分析:将源文件解析成一个个的单词流.为语法分析做准备. 在词法分析阶段,我们要做的就是将词分出来,而且确定单词的类型,一般的程序设计语言的单词符号能够份为下面5种: 1.keyword ...
atitit.词法分析的实现token attilax总结
atitit.词法分析的实现token attilax总结 1. 词法分析(英语:lexical analysis)跟token 1 1.1. 扫描器 2 2. 单词流必须识别为保留字,标识符(变量) ...
Atitit.注解解析(1)---------词法分析 attilax总结 java .net
Atitit.注解解析(1)---------词法分析 attilax总结 java .net 1. 应用场景:::因为要使用ui化的注解 1 2. 流程如下::: 词法分析(生成token流) & ...
Atitit.注解and属性解析(2)---------语法分析生成AST attilax总结 java .net
Atitit.注解and属性解析(2)---------语法分析生成AST attilax总结 java .net 1. 应用场景:::因为要使用ui化的注解 1 2. 使用解释器方式来实现生成 ...
Atitit. 解释器模式框架选型 and应用场景attilax总结 oao
Atitit. 解释器模式框架选型 and应用场景attilax总结 oao 1. 解释器模式结构描述 1 2. 如何实现(简单的解释器模式,仅仅通过词法分析即可实现,而无需token流进行处理. 2 ...
翻译器DIY它———算在英文文本中的单词数，字符和行数
咳咳.这部分应该是序列化编译器DIY的,然而,在这样做DIY第一次使用前flex 为了练练手,对于后者的理解是有帮助. 在word 我经常看到一个字计数功能,因此,它是如何实现,当然,首先想到的是要经 ...
atitit.词法分析原理词法分析器 (Lexer)
atitit.词法分析原理词法分析器 (Lexer) 1. 词法分析(英语:lexical analysis)1 2. :实现词法分析程序的常用途径:自动生成,手工生成.[1] 2 2.1. 词法分 ...
15个C++项目列表
实验楼上有很多C++的实战项目,从简单到进阶,学习每个项目都可以掌握相应的知识点. 如果你还是C++新手的话,那么这个C++的项目列表你可以拿去练手实战开发,毕竟学编程动手实践是少不了的! 如果你不知 ...
从零开始山寨Caffe·伍：Protocol Buffer简易指南
你为Class外访问private对象而苦恼嘛?你为设计序列化格式而头疼嘛? ——欢迎体验Google Protocol Buffer 面向对象之封装性历史遗留问题面向对象中最矛盾的一个特性,就是 ...

随机推荐

Android API在不同版本系统上的兼容性
随着安卓版本的不断更新,新的API不断涌出,有时候高版本的API会在低版本crash的. 如果minSdkVersion设置过低,在build的时候,就会报错(Call requires API le ...
使QQ窗口八字形转圈
//先有思路后有代码总是不知不觉中乱敲一通今天做个标记感谢老师课堂上的讲解#include <stdio.h> #include <math.h> #include & ...
Servlet学习第一天--Servlet开发映射URL配置
基础不扎实,从头学,认真记录笔记. 感谢@孤傲苍狼:http://www.cnblogs.com/xdp-gacl/p/3760336.html -为什么要配置? 由于客户端是通过URL访问web服务 ...
自绘Tab控件
自绘tab按钮效果图如下: 使用例子: MyTabControl *tabControl = NULL; tabControl = new MyTabControl();tabControl-> ...
Qt for Windows：使用WinPcap开发高性能UDP服务器
首先介绍一下WinPcap WinPcap是Windows下一个网络库,性能极其强悍而且能够接收各种包. 大名鼎鼎的WireShark就是基于这个库开发的. 那么这个库性能到底有多高呢. 我测试了UD ...
android 传感器使用 Compass指南针的实现功能
以下是指南针通过方向传感器而旋转实现. CompassDemo.java: package com.example.activity; import android.app.Activity; imp ...
创建自己的yum软件源（以Cloudera Hadoop的安装为例）
.下载Cloudera Manager安装文件 Cloudera Manager的可以从如下网址获得: http://archive.cloudera.com/cm4/installer/ 这里选择C ...
java 笔试
单例设计模式: public class Singliton { //no new private Singliton (){ } static Singliton ins = null; publi ...
Hibernate工作流程
Hibernate创建步骤 (五大核心接口:Configuration/SessionFactory/Session/Transaction/Query) 1.新建工程,导入需要的jar包. 2.利用 ...
SQL serve创建与调用存储过程
(1)创建 2编写存储过程(创建传参的存储过程)存储过程语法网络上很多不在累述语法解析 Use Person 指定在那个数据库下建立存储过程 if (object_id('MyFunction', ...

编译器DIY——词法分析

编译器DIY——词法分析的更多相关文章

随机推荐

热门专题