前端编译原理 parser.js源码解读
前面已经介绍了一个jison的使用,在正常开发中其实已经够用下,下面主要是看了下parser.js代码解读下,作为一些了解。
下面以最简单的文法产生的parser做一些代码注释
下面是一些注释,标示了编译过程,能够了解jison产生的编译器有哪些处理
var a = (function() {
var o = function(k, v, o, l) {
for (o = o || {}, l = k.length; l--; o[k[l]] = v);
return o;
};
var parser = {
trace: function trace() {},
//编译期共享对象
yy: {},
//标示符,$accept: 0, $end: 1,error: 2是默认的符号,E: 3, NAT: 4是分析文法获得的
symbols_: { error: 2, E: 3, NAT: 4, $accept: 0, $end: 1 },
//终结符
terminals_: { 2: 'error', 4: 'NAT' },
//对应文法规则
productions_: [
0, // e` => E $end
[
3, //规约成符号2(E)
1 //堆栈移除一个状态即移除NAT,压入E
] // E => NAT
],
//规约的时候执行的代码
performAction: function anonymous(
yytext,
yyleng,
yylineno,
yy,
yystate /* action[1] */,
$$ /* vstack */,
_$ /* lstack */
) {
var $0 = $$.length - 1;
switch (yystate) {
case 1:
this.$ = $$[$0] * 1;
break;
}
},
//移入规约自动机 lalr(1)分析表,每一值表示一个状态
table: [
/*状态0 I0*/{ 3: 1/**goto 遇到符号3(E),跳转到状态1 */, 4: [1, 2] /**移入,遇到符号4(NAT),移入进入状态2 */},
/*状态1 I1*/{ 1: [3] /**接受 遇到符号1($end),结束程序,完成编译 */},
/*状态2 I2*/{ 1: [2, 1] /**规约 遇到符号1($end),按照productions[1],进行规则 */ }
],
defaultActions: {
2: [2, 1] //状态2进行规则规约,不用向前看符号是什么(逻辑优化)
},
//错误提示
parseError: function parseError(str, hash) {
if (hash.recoverable) {
this.trace(str);
} else {
var error = new Error(str);
error.hash = hash;
throw error;
}
},
//编译 input为处理的字符串
parse: function parse(input) {
var self = this,
stack = [0],
tstack = [],
vstack = [null],
lstack = [],
table = this.table,
yytext = '',
yylineno = 0,
yyleng = 0,
recovering = 0,
TERROR = 2,
EOF = 1;
var args = lstack.slice.call(arguments, 1);
var lexer = Object.create(this.lexer);
var sharedState = { yy: {} };
for (var k in this.yy) {
if (Object.prototype.hasOwnProperty.call(this.yy, k)) {
sharedState.yy[k] = this.yy[k];
}
}
lexer.setInput(input, sharedState.yy);
sharedState.yy.lexer = lexer;
sharedState.yy.parser = this;
if (typeof lexer.yylloc === 'undefined') {
lexer.yylloc = {};
}
var yyloc = lexer.yylloc;
lstack.push(yyloc);
//位置信息采用区间的形式显示
var ranges = lexer.options && lexer.options.ranges;
if (typeof sharedState.yy.parseError === 'function') {
this.parseError = sharedState.yy.parseError;
} else {
this.parseError = Object.getPrototypeOf(this).parseError;
}
//规约的时候需要弹出堆栈
function popStack(n) {
//stack [0,标识符,状态,标识符,状态...],所以出栈是2倍
stack.length = stack.length - 2 * n;
vstack.length = vstack.length - n;
lstack.length = lstack.length - n;
}
//词法分析,返回token即标示符
var lex = function() {
var token;
token = lexer.lex() || EOF;
if (typeof token !== 'number') {
token = self.symbols_[token] || token;
}
return token;
};
var symbol,
preErrorSymbol,
state,
action,
a,
r,
yyval = {},
p,
len,
newState,
expected;
while (true) {
state = stack[stack.length - 1];
if (this.defaultActions[state]) {
action = this.defaultActions[state];
} else {
if (symbol === null || typeof symbol === 'undefined') {
//通过正则获取下一个标示符
symbol = lex();
}
//action对应的动作
action = table[state] && table[state][symbol];
}
//动作错误,语法分析错误
if (typeof action === 'undefined' || !action.length || !action[0]) {
var errStr = '';
expected = [];
for (p in table[state]) {
if (this.terminals_[p] && p > TERROR) {
expected.push("'" + this.terminals_[p] + "'");
}
}
if (lexer.showPosition) {
errStr =
'Parse error on line ' +
(yylineno + 1) +
':\n' +
lexer.showPosition() +
'\nExpecting ' +
expected.join(', ') +
", got '" +
(this.terminals_[symbol] || symbol) +
"'";
} else {
errStr =
'Parse error on line ' +
(yylineno + 1) +
': Unexpected ' +
(symbol == EOF ? 'end of input' : "'" + (this.terminals_[symbol] || symbol) + "'");
}
this.parseError(errStr, {
text: lexer.match,
token: this.terminals_[symbol] || symbol,
line: lexer.yylineno,
loc: yyloc,
expected: expected
});
}
//规约-规约冲突,移入规约冲突
if (action[0] instanceof Array && action.length > 1) {
throw new Error('Parse Error: multiple actions possible at state: ' + state + ', token: ' + symbol);
}
switch (action[0]) {
//移入
case 1:
stack.push(symbol);
vstack.push(lexer.yytext);
lstack.push(lexer.yylloc);
stack.push(action[1]);
symbol = null;
if (!preErrorSymbol) {
yyleng = lexer.yyleng;
yytext = lexer.yytext;
yylineno = lexer.yylineno;
yyloc = lexer.yylloc;
if (recovering > 0) {
recovering--;
}
} else {
symbol = preErrorSymbol;
preErrorSymbol = null;
}
break;
//规约
case 2:
len = this.productions_[action[1]][1];
yyval.$ = vstack[vstack.length - len];
yyval._$ = {
first_line: lstack[lstack.length - (len || 1)].first_line,
last_line: lstack[lstack.length - 1].last_line,
first_column: lstack[lstack.length - (len || 1)].first_column,
last_column: lstack[lstack.length - 1].last_column
};
if (ranges) {
yyval._$.range = [
lstack[lstack.length - (len || 1)].range[0],
lstack[lstack.length - 1].range[1]
];
}
//执行文法定义的代码
r = this.performAction.apply(
yyval,
[yytext, yyleng, yylineno, sharedState.yy, action[1], vstack, lstack].concat(args)
);
if (typeof r !== 'undefined') {
return r;
}
if (len) {
stack = stack.slice(0, -1 * len * 2);
vstack = vstack.slice(0, -1 * len);
lstack = lstack.slice(0, -1 * len);
}
stack.push(this.productions_[action[1]][0]);
vstack.push(yyval.$);
lstack.push(yyval._$);
newState = table[stack[stack.length - 2]][stack[stack.length - 1]];
stack.push(newState);
break;
//接受
case 3:
return true;
}
}
return true;
}
};
/* 词法分析 */
var lexer = (function() {
var lexer = {
EOF: 1, parseError: function parseError(str, hash) {
if (this.yy.parser) {
this.yy.parser.parseError(str, hash);
} else {
throw new Error(str);
}
}, // resets the lexer, sets new input
setInput: function(input, yy) {
this.yy = yy || this.yy || {};
this._input = input;
this._more = this._backtrack = this.done = false;
this.yylineno = this.yyleng = 0;
this.yytext = this.matched = this.match = '';
this.conditionStack = ['INITIAL'];
this.yylloc = {
first_line: 1,
first_column: 0,
last_line: 1,
last_column: 0
};
if (this.options.ranges) {
this.yylloc.range = [0, 0];
}
this.offset = 0;
return this;
},
input: function() {
var ch = this._input[0];
this.yytext += ch;
this.yyleng++;
this.offset++;
this.match += ch;
this.matched += ch;
var lines = ch.match(/(?:\r\n?|\n).*/g);
if (lines) {
this.yylineno++;
this.yylloc.last_line++;
} else {
this.yylloc.last_column++;
}
if (this.options.ranges) {
this.yylloc.range[1]++;
} this._input = this._input.slice(1);
return ch;
},
unput: function(ch) {
var len = ch.length;
var lines = ch.split(/(?:\r\n?|\n)/g); this._input = ch + this._input;
this.yytext = this.yytext.substr(0, this.yytext.length - len);
//this.yyleng -= len;
this.offset -= len;
var oldLines = this.match.split(/(?:\r\n?|\n)/g);
this.match = this.match.substr(0, this.match.length - 1);
this.matched = this.matched.substr(0, this.matched.length - 1); if (lines.length - 1) {
this.yylineno -= lines.length - 1;
}
var r = this.yylloc.range; this.yylloc = {
first_line: this.yylloc.first_line,
last_line: this.yylineno + 1,
first_column: this.yylloc.first_column,
last_column: lines
? (lines.length === oldLines.length ? this.yylloc.first_column : 0) +
oldLines[oldLines.length - lines.length].length -
lines[0].length
: this.yylloc.first_column - len
}; if (this.options.ranges) {
this.yylloc.range = [r[0], r[0] + this.yyleng - len];
}
this.yyleng = this.yytext.length;
return this;
},
more: function() {
this._more = true;
return this;
},
reject: function() {
if (this.options.backtrack_lexer) {
this._backtrack = true;
} else {
return this.parseError(
'Lexical error on line ' +
(this.yylineno + 1) +
'. You can only invoke reject() in the lexer when the lexer is of the backtracking persuasion (options.backtrack_lexer = true).\n' +
this.showPosition(),
{
text: '',
token: null,
line: this.yylineno
}
);
}
return this;
},
less: function(n) {
this.unput(this.match.slice(n));
},
pastInput: function() {
var past = this.matched.substr(0, this.matched.length - this.match.length);
return (past.length > 20 ? '...' : '') + past.substr(-20).replace(/\n/g, '');
},
upcomingInput: function() {
var next = this.match;
if (next.length < 20) {
next += this._input.substr(0, 20 - next.length);
}
return (next.substr(0, 20) + (next.length > 20 ? '...' : '')).replace(/\n/g, '');
},
showPosition: function() {
var pre = this.pastInput();
var c = new Array(pre.length + 1).join('-');
return pre + this.upcomingInput() + '\n' + c + '^';
},
test_match: function(match, indexed_rule) {
var token, lines, backup; if (this.options.backtrack_lexer) {
// save context
backup = {
yylineno: this.yylineno,
yylloc: {
first_line: this.yylloc.first_line,
last_line: this.last_line,
first_column: this.yylloc.first_column,
last_column: this.yylloc.last_column
},
yytext: this.yytext,
match: this.match,
matches: this.matches,
matched: this.matched,
yyleng: this.yyleng,
offset: this.offset,
_more: this._more,
_input: this._input,
yy: this.yy,
conditionStack: this.conditionStack.slice(0),
done: this.done
};
if (this.options.ranges) {
backup.yylloc.range = this.yylloc.range.slice(0);
}
} lines = match[0].match(/(?:\r\n?|\n).*/g);
if (lines) {
this.yylineno += lines.length;
}
this.yylloc = {
first_line: this.yylloc.last_line,
last_line: this.yylineno + 1,
first_column: this.yylloc.last_column,
last_column: lines
? lines[lines.length - 1].length - lines[lines.length - 1].match(/\r?\n?/)[0].length
: this.yylloc.last_column + match[0].length
};
this.yytext += match[0];
this.match += match[0];
this.matches = match;
this.yyleng = this.yytext.length;
if (this.options.ranges) {
this.yylloc.range = [this.offset, (this.offset += this.yyleng)];
}
this._more = false;
this._backtrack = false;
this._input = this._input.slice(match[0].length);
this.matched += match[0];
token = this.performAction.call(
this,
this.yy,
this,
indexed_rule,
this.conditionStack[this.conditionStack.length - 1]
);
if (this.done && this._input) {
this.done = false;
}
if (token) {
return token;
} else if (this._backtrack) {
for (var k in backup) {
this[k] = backup[k];
}
return false;
}
return false;
},
next: function() {
if (this.done) {
return this.EOF;
}
if (!this._input) {
this.done = true;
} var token, match, tempMatch, index;
if (!this._more) {
this.yytext = '';
this.match = '';
}
//获取词法规则
//没看出来conditionStack有什么用,可能跟我的文法定义比较简单,没有使用一些内置的begin,pushState, popState之类的有点关系
var rules = this._currentRules();
for (var i = 0; i < rules.length; i++) {
tempMatch = this._input.match(this.rules[rules[i]]);
if (tempMatch && (!match || tempMatch[0].length > match[0].length)) {
match = tempMatch;
index = i;
//backtrack_lexer,会先尝试返回token,如果想尝试下面的规则的话,通过reject()方法
if (this.options.backtrack_lexer) {
token = this.test_match(tempMatch, rules[i]);
if (token !== false) {
return token;
} else if (this._backtrack) {
match = false;
continue;
} else {
return false;
}
//flex 设置true 会匹配符合正则多的文本
//flex 设置false 根据文法顺序返回结果
} else if (!this.options.flex) {
break;
}
}
}
if (match) {
token = this.test_match(match, rules[index]);
if (token !== false) {
return token;
}
return false;
}
if (this._input === '') {
return this.EOF;
} else {
//输入字符串不存在词法对应
return this.parseError(
'Lexical error on line ' + (this.yylineno + 1) + '. Unrecognized text.\n' + this.showPosition(),
{
text: '',
token: null,
line: this.yylineno
}
);
}
},
lex: function lex() {
var r = this.next();
if (r) {
return r;
} else {
return this.lex();
}
},
begin: function begin(condition) {
this.conditionStack.push(condition);
},
popState: function popState() {
var n = this.conditionStack.length - 1;
if (n > 0) {
return this.conditionStack.pop();
} else {
return this.conditionStack[0];
}
},
_currentRules: function _currentRules() {
if (this.conditionStack.length && this.conditionStack[this.conditionStack.length - 1]) {
return this.conditions[this.conditionStack[this.conditionStack.length - 1]].rules;
} else {
return this.conditions['INITIAL'].rules;
}
},
topState: function topState(n) {
n = this.conditionStack.length - 1 - Math.abs(n || 0);
if (n >= 0) {
return this.conditionStack[n];
} else {
return 'INITIAL';
}
},
pushState: function pushState(condition) {
this.begin(condition);
},
stateStackSize: function stateStackSize() {
return this.conditionStack.length;
},
options: {},
performAction: function anonymous(yy, yy_, $avoiding_name_collisions, YY_START) {
var YYSTATE = YY_START;
switch ($avoiding_name_collisions) {
case 0 /* skip whitespace */:
break;
case 1:
return 4;
break;
case 2:
return '+';
break;
}
},
//词法中的规则
rules: [
/^(?:\s+)/,
/^(?:[0-9]+)/,
/^(?:\+)/
],
conditions: {
INITIAL: { rules: [0, 1, 2], inclusive: true }
}
};
return lexer;
})();
parser.lexer = lexer;
function Parser() {
this.yy = {};
}
Parser.prototype = parser;
parser.Parser = Parser;
return new Parser();
})();
前端编译原理 parser.js源码解读的更多相关文章
- js便签笔记(10) - 分享:json2.js源码解读笔记
1. 如何理解“json” 首先应该意识到,json是一种数据转换格式,既然是个“格式”,就是个抽象的东西.它不是js对象,也不是字符串,它只是一种格式,一种规定而已. 这个格式规定了如何将js对象转 ...
- js便签笔记(10) - 分享:json.js源码解读笔记
1. 如何理解“json” 首先应该意识到,json是一种数据转换格式,既然是个“格式”,就是个抽象的东西.它不是js对象,也不是字符串,它只是一种格式,一种规定而已. 这个格式规定了如何将js对象转 ...
- fastclick.js源码解读分析
阅读优秀的js插件和库源码,可以加深我们对web开发的理解和提高js能力,本人能力有限,只能粗略读懂一些小型插件,这里带来对fastclick源码的解读,望各位大神不吝指教~! fastclick诞生 ...
- prototype.js 源码解读(02)
如果你想研究一些比较大型的js框架的源码的话,本人建议你从其最初的版本开始研读,因为最初的版本东西少,易于研究,而后的版本基本都是在其基础上不断扩充罢了,所以,接下来我不准备完全解读prototype ...
- prototype.js 源码解读(01)
prototype.js是一个设计的非常优雅且很有实用价值的js基础类库,其源码非常值得研究.研究它的源码不仅能提升个人水平,而且对你打下坚实的js基础也很有帮助.因本人技术水平有限,该解读仅供参考. ...
- require.js 源码解读——配置默认上下文
首先,我们先来简单说一下,require.js的原理: 1.载入模块 2.通过模块名解析出模块信息,以及计算出URL 3.通过创建SCRIPT的形式把模块加载到页面中. 4.判断被加载的脚本,如 ...
- json2.js源码解读记录
相关内容:json详细用法.js语法.unicode.正则 json特点--最简单.最小巧的经典js库. json作者:道克拉斯.克劳福德(Douglas Crockford)--js大牛 出 ...
- 亚马逊左侧菜单延迟z三角 jquery插件jquery.menu-aim.js源码解读
关于亚马逊的左侧菜单延迟,之前一直不知道它的实现原理.梦神提到了z三角,我也不知道这是什么东西.13号那天很有空,等领导们签字完我就可以走了.下午的时候,找到了一篇博客:http://jayuh.co ...
- doT.js源码解读
doT.js非常的简洁.全部代码也就200行不到.它的基本思路就是通过强大的正则表达式,把模块转变成可执行的函数,动态生成html字符串.核心new Function(c.varname, str); ...
随机推荐
- Python3基础 print 格式化输出 %f %d 保留浮点数的位数 整数的位数不够零来凑
Python : 3.7.3 OS : Ubuntu 18.04.2 LTS IDE : pycharm-community-2019.1.3 ...
- C++ STL中常见容器的时间复杂度和比较和分析
C++ STL中常见容器的时间复杂度 map, set, multimap, and multiset 上述四种容器采用红黑树实现,红黑树是平衡二叉树的一种.不同操作的时间复杂度近似为: 插入: O( ...
- Windows 下使用OpenSSL生成RSA公钥和私钥
Windows 下使用OpenSSL生成RSA公钥和私钥 (1)下载OpenSSL 可到该地址下载OpenSSL: https://www.openssl.org/source/(https://ww ...
- C#DbHelperOleDb,Access数据库帮助类 (转)
/// <summary>/// 编 码 人:苏飞/// 联系方式:361983679 /// 更新网站:[url=http://www.sufeinet.com/thread-655- ...
- Dockerfile-server1
[root@lab2 docker-file]# cd server1/ [root@lab2 server1]# ls a.sh ddbes-server1-0.0.1-SNAPSHOT.jar D ...
- Codeforces Round #556 (Div. 2) D. Three Religions 题解 动态规划
题目链接:http://codeforces.com/contest/1150/problem/D 题目大意: 你有一个参考串 s 和三个装载字符串的容器 vec[0..2] ,然后还有 q 次操作, ...
- 【Leetcode_easy】1042. Flower Planting With No Adjacent
problem 1042. Flower Planting With No Adjacent 参考 1. Leetcode_easy_1042. Flower Planting With No Adj ...
- 学习开始记录一下,java 还是python?
2019.11.24开始正式开始学习JAVA. 在 bilibili站看了三天,大神们的对此问题的分析,介绍,我选择了JAVA开发语言. 在看了高淇老师的JAVA300视频,感觉比较对路,特别是第一章 ...
- MySQL(一)面试集合
1. 什么是索引? 索引是一种数据结构(存储数据),可以帮助我们快速的进行数据的查找. 索引是帮助高效获取数据的数据结构,索引是一个文件 1)索引有哪些类型: hash 二叉树 ...
- IDEA 的操作与使用
idea 设置syso File –> Setting –> Editor –> Live Templates debug 调试: F7 在 Debug 模式下,进入下一步,如果当前 ...