Python版C语言词法分析器

一个带有图形界面的C语言词法分析器，版本为Python2.7。

#!/usr/bin/python

# -*- coding: utf-8 -*-

import sys

from Tkinter import *

from tkFont import *

from FileDialog import *

KEYWORD_LIST = ['if', 'else', 'while', 'break', 'continue', 'for', 'double', 'int', 'float', 'long', 'short', 'bool',

                'switch', 'case', 'return', 'void']

SEPARATOR_LIST = ['{', '}', '[', ']', '(', ')', '~', ',', ';', '.', '?', ':', ' ']

OPERATOR_LIST = ['+', '++', '-', '--', '+=', '-=', '*', '*=', '%', '%=', '->', '|', '||', '|=',

                 '/', '/=', '>', '<', '>=', '<=', '=', '==', '!=', '!', '&']

CATEGORY_DICT = {

    # KEYWORD

    "far": 257,

    "near": 258,

    "pascal": 259,

    "register": 260,

    "asm": 261,

    "cdecl": 262,

    "huge": 263,

    "auto": 264,

    "double": 265,

    "int": 266,

    "struct": 267,

    "break": 268,

    "else": 269,

    "long": 270,

    "switch": 271,

    "case": 272,

    "enum": 273,

    "register": 274,

    "typedef": 275,

    "char": 276,

    "extern": 277,

    "return": 278,

    "union": 279,

    "const": 280,

    "float": 281,

    "short": 282,

    "unsigned": 283,

    "continue": 284,

    "for": 285,

    "signed": 286,

    "void": 287,

    "default": 288,

    "goto": 289,

    "sizeof": 290,

    "volatile": 291,

    "do": 292,

    "if": 293,

    "while": 294,

    "static": 295,

    "interrupt": 296,

    "sizeof": 297,

    "NULL": 298,

    # SEPARATOR

    "{": 299,

    "}": 300,

    "[": 301,

    "]": 302,

    "(": 303,

    ")": 304,

    "~": 305,

    ",": 306,

    ";": 307,

    ".": 308,

    "#": 309,

    "?": 310,

    ":": 311,

    # OPERATOR

    "<<": 312,

    ">>": 313,

    "<": 314,

    "<=": 315,

    ">": 316,

    ">=": 317,

    "=": 318,

    "==": 319,

    "|": 320,

    "||": 321,

    "|=": 322,

    "^": 323,

    "^=": 324,

    "&": 325,

    "&&": 326,

    "&=": 327,

    "%": 328,

    "%=": 329,

    "+": 330,

    "++": 331,

    "+=": 332,

    "-": 333,

    "--": 334,

    "-=": 335,

    "->": 336,

    "/": 337,

    "/=": 338,

    "*": 339,

    "*=": 340,

    "!": 341,

    "!=": 342,

    "sizeof": 343,

    "<<=": 344,

    ">>=": 345,

    "inum": 346,

    "int16": 347,

    "int8": 348,

    "char": 350,

    "string": 351,

    "bool": 352,

    "fnum": 353,

    "IDN": 354

}

current_row = -1

current_line = 0

out_line = 1

def getchar(input_str):

    global current_row

    global current_line

    current_row += 1

    if current_row == len(input_str[current_line]):

        current_line += 1

        current_row = 0

    if current_line == len(input_str) - 1:

        return 'SCANEOF'

    return input_str[current_line][current_row]

def ungetchar(input_str):

    global current_row

    global current_line

    current_row = current_row - 1

    if current_row < 0:

        current_line = current_line - 1

        current_row = len(input_str[current_row]) - 1

    return input_str[current_line][current_row]

def error(msg, line=None, row=None):

    global out_line

    if line is None:

        line = current_line + 1

    if row is None:

        row = current_row + 1

    analysis.insert(str(out_line) + '.0', str(line) + ':' + str(row) + 'Error: ' + msg)

    analysis.insert(str(out_line) + '.end', "\n")

    out_line = out_line + 1

def scanner(input_str):

    global current_line

    global current_row

    current_char = getchar(input_str)

    if current_char == 'SCANEOF':

        return ('SCANEOF', '', '')

    if current_char.strip() == '':

        return

    if current_char.isdigit():

        int_value = 0

        while current_char.isdigit():

            int_value = int_value * 10 + int(current_char)

            current_char = getchar(input_str)

        if current_char not in OPERATOR_LIST and current_char not in SEPARATOR_LIST and current_char != 'e':

            line = current_line + 1

            row = current_row + 1

            # ungetchar(input_str)

            error('illigal identifier', line, row)

            # return ('SCANEOF', '', '')

            return ('', '', '')

        if current_char != '.' and current_char != 'e':

            ungetchar(input_str)

            return ('INUM', int_value, CATEGORY_DICT['inum'])

        if current_char == 'e':

            power_value = str(int_value) + 'e'

            current_char = getchar(input_str)

            if current_char == '+' or current_char == '-':

                power_value += current_char

                current_char = getchar(input_str)

            while current_char.isdigit():

                power_value += current_char

                current_char = getchar(input_str)

            if current_char not in OPERATOR_LIST and current_char not in SEPARATOR_LIST:

                line = current_line + 1

                row = current_row + 1

                # ungetchar(input_str)

                error('illigal const int value in power', line, row)

                # return ('SCANEOF', '', '')

                return ('', '', '')

            ungetchar(input_str)

            return ('INUM', power_value, CATEGORY_DICT['inum'])

        if current_char == '.':

            float_value = str(int_value) + '.'

            current_char = getchar(input_str)

            while current_char.isdigit():

                float_value += current_char

                current_char = getchar(input_str)

            if current_char not in OPERATOR_LIST and current_char not in SEPARATOR_LIST or current_char == '.':

                line = current_line + 1

                row = current_row + 1

                # ungetchar(input_str)

                error('illigal const float value', line, row)

                # return ('SCANEOF', '', '')

                return ('', '', '')

            ungetchar(input_str)

            return ('FNUM', float_value, CATEGORY_DICT['fnum'])

    if current_char.isalpha() or current_char == '_':

        string = ''

        while current_char.isalpha() or current_char.isdigit() or current_char == '_' and current_char != ' ':

            string += current_char

            current_char = getchar(input_str)

            if current_char == 'SCANEOF':

                break

        ungetchar(input_str)

        if string in KEYWORD_LIST:

            return (string, '', CATEGORY_DICT[string])

        else:

            return ('IDN', string, CATEGORY_DICT['IDN'])

    if current_char == '\"':

        str_literal = ''

        line = current_line + 1

        row = current_row + 1

        current_char = getchar(input_str)

        while current_char != '\"':

            str_literal += current_char

            current_char = getchar()

            if current_char == 'SCANEOF':

                error('missing terminating \"', line, row)

                current_line = line

                current_row = row

                return ('SCANEOF', '', '')

        return ('STRING_LITERAL', str_literal, CATEGORY_DICT['string'])

    if current_char == '/':

        next_char = getchar(input_str)

        line = int(current_line) + 1

        row = int(current_row) + 1

        if next_char == '*':

            comment = ''

            next_char = getchar(input_str)

            while True:

                if next_char == 'SCANEOF':

                    error('unteminated /* comment', line, row)

                    return ('SCANEOF', '', '')

                if next_char == '*':

                    end_char = getchar(input_str)

                    if end_char == '/':

                        return None

                    if end_char == 'SCANEOF':

                        error('unteminated /* comment', line, row)

                        return ('SCANEOF', '', '')

                comment += next_char

                next_char = getchar(input_str)

        else:

            ungetchar(input_str)

            op = current_char

            current_char = getchar(input_str)

            if current_char in OPERATOR_LIST:

                op += current_char

            else:

                ungetchar(input_str)

            return ('OP', op, CATEGORY_DICT[op])

    if current_char in SEPARATOR_LIST:

        return ('SEP', current_char, CATEGORY_DICT[current_char])

    if current_char in OPERATOR_LIST:

        op = current_char

        current_char = getchar(input_str)

        if current_char in OPERATOR_LIST:

            op += current_char

        else:

            ungetchar(input_str)

        return ('OP', op, CATEGORY_DICT[op])

    else:

        error('unknown character: ' + current_char)

def fileloader():

    global root

    code.delete(1.0, END)

    fd = LoadFileDialog(root)

    filename = fd.go()

    fin = open(filename, "r")

    input_file = fin.read()

    input_lines = input_file[0].split("\n")

    code.insert(1.0, input_file)

    fin.close()

def lexer_analysis(input_str):

    global current_row

    global current_line

    global out_line

    current_row = -1

    current_line = 0

    analysis_result = []

    while True:

        r = scanner(input_str)

        if r is not None:

            if r[0] == 'SCANEOF':

                break

            analysis_result.append(str(r[0]) + "\t\t" + str(r[1]) + "\t\t" + str(r[2]))

    return analysis_result

def lexer():

    input_str = []

    analysis.delete(1.0, END)

    input_raw = code.get(1.0, END)

    input_str = input_raw.split("\n")

    lexer_analysis(input_str)

    out_line = 1

    result = lexer_analysis(input_str)

    for each in result:

        analysis.insert(str(out_line) + '.end', each)

        analysis.insert(str(out_line) + '.end', "\n")

        out_line = out_line + 1

def pre_interface():

    global root

    global code

    global analysis

    root = Tk()

    code = Text(root, width=60, height=20, font=15)

    analysis = Text(root, width=60, height=20, font=15)

    t = StringVar()

    t.set('Patrick的词法分析器')

    label = Label(root, textvariable=t, font=15)

    Analysis = Button(root, text='词法分析', command=lexer, font=15)

    load = Button(root, text='   载入代码    ', command=fileloader, font=15)

    root.title("LEXER")

    label.pack(side=TOP)

    Analysis.pack(side=BOTTOM)

    load.pack(side=BOTTOM)

    code.pack(side=LEFT)

    analysis.pack(side=RIGHT)

    root.mainloop()

def main():

    pre_interface()

# lexer()

if __name__ == '__main__':

    main()

Python版C语言词法分析器的更多相关文章

编码的秘密（python版）
编码(python版) 最近在学习python的过程中,被不同的编码搞得有点晕,于是看了前人的留下的文档,加上自己的理解,准备写下来,分享给正在为编码苦苦了挣扎的你. 编码的概念编码就是将信息从一种 ...
豆瓣top250（go版以及python版）
最近学习go,就找了一个例子练习[go语言爬虫]go语言爬取豆瓣电影top250,思路大概就是获取网页,然后根据页面元素,用正则表达式匹配电影名称.评分.评论人数.原文有个地方需要修改下patte ...
【Python】《大话设计模式》Python版代码实现
<大话设计模式>Python版代码实现上一周把<大话设计模式>看完了,对面向对象技术有了新的理解,对于一个在C下写代码比较多.偶尔会用到一些脚本语言写脚本的人来说,很是开阔眼 ...
python之 python 起源、语言特点
一. 1.1 什么是 PythonPython 是一门优雅而健壮的编程语言,它继承了传统编译语言的强大性和通用性,同时也借鉴了简单脚本和解释语言的易用性.它可以帮你完成工作,而且一段时间以后,你还能 ...
ROS Learning-011 beginner_Tutorials （编程）编写 ROS 话题版的 Hello World 程序（Python版）
ROS Indigo beginner_Tutorials-10 编写 ROS 话题版的 Hello World 程序(Python版) 我使用的虚拟机软件:VMware Workstation 11 ...
《大话设计模式》Python版代码实现
上一周把<大话设计模式>看完了,对面向对象技术有了新的理解,对于一个在C下写代码比较多.偶尔会用到一些脚本语言写脚本的人来说,很是开阔眼界.<大话设计模式>的代码使用C#写成的 ...
自己动手实现智能家居之树莓派GPIO简介（Python版）
[前言] 一个热爱技术的人一定向往有一个科技感十足的环境吧,那何不亲自实践一下属于技术人的座右铭:“技术改变世界”. 就让我们一步步动手搭建一个属于自己的“智能家居平台”吧(不要对这个名词抬杠啦,技术 ...
移动端自动化测试Appium 从入门到项目实战Python版☝☝☝
移动端自动化测试Appium 从入门到项目实战Python版 (一个人学习或许会很枯燥,但是寻找更多志同道合的朋友一起,学习将会变得更加有意义✌✌) 说到APP自动化测试,Appium可是说是非常流 ...
python调用C语言接口
python调用C语言接口注:本文所有示例介绍基于linux平台在底层开发中,一般是使用C或者C++,但是有时候为了开发效率或者在写测试脚本的时候,会经常使用到python,所以这就涉及到一个问题 ...

随机推荐

java 转成字符串 json 数组和迭代
当你需要转成一串一串的json 排列 .当内容和遍历它们. 首页进口 net.sf.json.JSONArray和net.sf.json.JSONObject 两个jar 包 String str = ...
进击的Android注入术《二》
继续在<一>里,我把基本思路描写叙述了一遍,接下为我们先从注入開始入手. 注入分类我们平时所说的代码注入,主要静态和动态两种方式静态注入,针对是可运行文件,比方平时我们改动ELF, ...
readonly和const的区别
readonly与const的区别1.const常量在声明的同时必须赋值,readonly在声明时可以不赋值2.readonly只能在声明时或在构造方法中赋值(readonly的成员变量可以根据调用不 ...
ESB 设计
ESB 设计最近为公司完成了一个 ESB 的设计.下面简要说明一下具体的设计方案. 企业 SOA 整体方案在前一篇<SOA.ESB.NServiceBus.云计算总结>中说到,SOA ...
《剑指Offer》面试题-用两个栈实现队列
题目描述: 用两个栈来实现一个队列,完成队列的Push和Pop操作.队列中的元素为int类型. 输入: 每个输入文件包含一个测试样例.对于每个测试样例,第一行输入一个n(1<=n<=100 ...
Oracle常用操作
比较时间 select * from up_date where update < to_date('2007-09-07 00:00:00','yyyy-mm-dd hh24:mi:ss' ...
sql 数据库的备份还原问题
今天工作中犯了一个严重的错误,就是在sql中写了一个update语句,还没写条件呢,结果误按了F5,唉,太佩服自己啦...这个脑子怎么不管用了呢?? 唉不说了,我在网上翻来覆去的找资料,最终想是不是可 ...
jquery 分页控件1
jquery 分页控件(一) 以前一直都是用别人的分页控件,虽然用得很爽,但总觉的还是自己写个小插件比较好,这个插件效果.代码等都有参照别人完成的控件.即便功能并不是那么完善,扩展性也不好,bug或许 ...
C#程序调用cmd.exe执行命令
代码部分 using System.Diagnostics; public class CmdHelper { private static string CmdPath = @"C:\Wi ...
SAX解析xml浅析
SAX解析XML文件采用事件驱动的方式进行,也就是说,SAX是逐行扫描文件,遇到符合条件的设定条件后就会触发特定的事件,回调你写好的事件处理程序.使用SAX的优势在于其解析速度较快,占用内存较少(相对 ...

Python版C语言词法分析器

Python版C语言词法分析器的更多相关文章

随机推荐

热门专题