Skip to content

Commit a3e602c

Browse files
committed
feat: add Lua plugin with LR parsing support
1 parent d3fe07e commit a3e602c

File tree

7 files changed

+801
-1
lines changed

7 files changed

+801
-1
lines changed

README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ You can get an introductory overview of the tool in [this article](https://mediu
2424
- [Rust plugin](#rust-plugin)
2525
- [Java plugin](#java-plugin)
2626
- [Julia plugin](#julia-plugin)
27+
- [Lua plugin](#lua-plugin)
2728
- [Grammar format](#grammar-format)
2829
- [JSON-like notation](#json-like-notation)
2930
- [Yacc/Bison notation](#yaccbison-notation)
@@ -321,6 +322,21 @@ For complex Julia parser implementations it is recommended to leverage the JSON-
321322
}
322323
```
323324

325+
#### Lua Plugin
326+
Syntax supports Lua as a target language. See its [calculator example](https://github.com/DmitrySoshnikov/syntax/blob/master/examples/calc.lua.g):
327+
328+
```
329+
./bin/syntax -g examples/calc.lua.g -m lalr1 -o calcparser.lua
330+
```
331+
332+
Then callers can use the module as:
333+
334+
```lua
335+
Parser = require("calcparser")
336+
parser = Parser.new()
337+
print(parser:parse("2^2^2^2")) -- 65536
338+
```
339+
324340
### Grammar format
325341

326342
_Syntax_ support two main notations to define grammars: _JSON-like_ notation, and _Yacc/Bison-style_ notation.

examples/calc.lua.g

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/**
2+
* Generated parser in Lua.
3+
*
4+
* ./bin/syntax -g examples/calc.lua.g -m lalr1 -o calcparser.lua
5+
*
6+
* > Parser = require("calcparser")
7+
* > parser = Parser.new()
8+
* > print(parser:parse("2^2^2^2"))
9+
* 65536
10+
*/
11+
12+
{
13+
"lex": {
14+
"rules": [
15+
["%s+", "-- skip whitespace"],
16+
["%d+", "return 'NUMBER'"],
17+
["%*", "return '*'"],
18+
["%+", "return '+'"],
19+
["%(", "return '('"],
20+
["%)", "return ')'"],
21+
["%^", "return '^'"],
22+
]
23+
},
24+
25+
"operators": [
26+
["left", "+"],
27+
["left", "*"],
28+
["right", "^"],
29+
],
30+
31+
"bnf": {
32+
"E": [
33+
["E + E", "$$ = $1 + $3"],
34+
["E * E", "$$ = $1 * $3"],
35+
["E ^ E", "$$ = $1 ^ $3"],
36+
["NUMBER", "$$ = tonumber($1)"],
37+
["( E )", "$$ = $2"],
38+
],
39+
},
40+
}

src/bin/syntax.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,8 @@ const parsers = {
402402
.default,
403403
jl: require(ROOT + 'plugins/julia/lr/lr-parser-generator-julia.js')
404404
.default,
405+
lua: require(ROOT + 'plugins/lua/lr/lr-parser-generator-lua.js')
406+
.default,
405407
};
406408

407409
const LRParserGenerator = GENERATORS[language] || GENERATORS.js;
@@ -587,7 +589,7 @@ function getLexGrammarData(options) {
587589

588590
// If explicit lexical grammar file was passed, use it.
589591
if (options.lex) {
590-
data = Grammar.dataFromGrammarFile(options.lex, { grammarType: 'lex' });
592+
data = Grammar.dataFromGrammarFile(options.lex, {grammarType: 'lex'});
591593
}
592594

593595
if (options['ignore-whitespaces'] && !data) {
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/**
2+
* The MIT License (MIT)
3+
* Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
4+
*/
5+
6+
const LRParserGeneratorDefault = require(ROOT + 'lr/lr-parser-generator-default').default;
7+
const LuaParserGeneratorTrait = require('../lua-parser-generator-trait');
8+
9+
import fs from 'fs';
10+
import path from 'path';
11+
12+
const LUA_LR_PARSER_TEMPLATE = fs.readFileSync(
13+
`${__dirname}/../templates/lr.template.lua`,
14+
'utf-8',
15+
);
16+
17+
/**
18+
* LR parser generator for Lua.
19+
*/
20+
export default class LRParserGeneratorLua extends LRParserGeneratorDefault {
21+
22+
/**
23+
* Instance constructor.
24+
*/
25+
constructor({
26+
grammar,
27+
outputFile,
28+
options = {},
29+
}) {
30+
super({grammar, outputFile, options})
31+
.setTemplate(LUA_LR_PARSER_TEMPLATE);
32+
33+
/**
34+
* Contains the lexical rule handlers: _lexRule1, _lexRule2, etc.
35+
* It's populated by the trait file.
36+
*/
37+
this._lexHandlers = [];
38+
this._productionHandlers = [];
39+
40+
/**
41+
* Actual class name of your parser. Here we infer from the output filename.
42+
*/
43+
this._parserClassName = path.basename(
44+
outputFile,
45+
path.extname(outputFile),
46+
);
47+
48+
Object.assign(this, LuaParserGeneratorTrait);
49+
}
50+
51+
/**
52+
* Generates parser code.
53+
*/
54+
generateParserData() {
55+
super.generateParserData();
56+
this.generateLexHandlers();
57+
this.generateProductionHandlers();
58+
this.generateParserClassName(this._parserClassName);
59+
}
60+
};
Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
/**
2+
* The MIT License (MIT)
3+
* Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
4+
*/
5+
6+
import fs from 'fs';
7+
8+
const LUA_TOKENIZER_TEMPLATE = fs.readFileSync(
9+
`${__dirname}/templates/tokenizer.template.lua`,
10+
'utf-8'
11+
);
12+
13+
const LuaParserGeneratorTrait = {
14+
15+
/**
16+
* Generates parser class name.
17+
*/
18+
generateParserClassName(className) {
19+
this.writeData('PARSER_CLASS_NAME', className);
20+
},
21+
22+
generateParseTable() {
23+
this.writeData(
24+
'TABLE',
25+
this._toLuaMap(this.generateParseTableData()),
26+
);
27+
},
28+
29+
/**
30+
* Generates tokens table in Lua Map format.
31+
*/
32+
generateTokensTable() {
33+
this.writeData(
34+
'TOKENS',
35+
this._toLuaMap(this._tokens),
36+
);
37+
},
38+
39+
buildSemanticAction(production) {
40+
let action = this.getSemanticActionCode(production);
41+
42+
if (!action) {
43+
return null;
44+
}
45+
46+
action += ';';
47+
48+
const args = this
49+
.getSemanticActionParams(production)
50+
.join(',');
51+
52+
this._productionHandlers.push({args, action});
53+
return `_handler${this._productionHandlers.length}`;
54+
},
55+
56+
generateProductionsData() {
57+
return this.generateRawProductionsData()
58+
.map(data => {
59+
return `{ ${data.map((item, index) => {
60+
// quote
61+
if (index >= 2) {
62+
return `"${item}"`;
63+
}
64+
return item;
65+
}).join(',')} }`;
66+
});
67+
},
68+
69+
generateBuiltInTokenizer() {
70+
this.writeData('TOKENIZER', LUA_TOKENIZER_TEMPLATE);
71+
},
72+
73+
generateLexRules() {
74+
let lexRules = this._grammar.getLexGrammar().getRules().map(lexRule => {
75+
76+
const action = lexRule.getRawHandler() + ';';
77+
78+
this._lexHandlers.push({args: '', action});
79+
80+
const flags = [];
81+
82+
if (lexRule.isCaseInsensitive()) {
83+
flags.push('i');
84+
}
85+
86+
// Example: ["^\s+", "_lexRule1"],
87+
return `{[[${lexRule.getRawMatcher()}${flags.join('')}]], ` +
88+
`"_lexRule${this._lexHandlers.length}"}`;
89+
});
90+
91+
this.writeData('LEX_RULES', `{ ${lexRules.join(',\n')} }`);
92+
},
93+
94+
generateLexRulesByStartConditions() {
95+
const lexGrammar = this._grammar.getLexGrammar();
96+
const lexRulesByConditions = lexGrammar.getRulesByStartConditions();
97+
const result = {};
98+
99+
for (const condition in lexRulesByConditions) {
100+
result[condition] = lexRulesByConditions[condition].map(lexRule =>
101+
lexGrammar.getRuleIndex(lexRule)
102+
);
103+
}
104+
105+
this.writeData(
106+
'LEX_RULES_BY_START_CONDITIONS',
107+
`${this._toLuaMap(result)}`,
108+
);
109+
},
110+
111+
/**
112+
* Converts JS object to Lua's table representation.
113+
* E.g. converts {foo: 10, bar: 20} into {foo = 10, bar = 20}
114+
*/
115+
_toLuaMap(value) {
116+
function _toLuaMapInner(value) {
117+
if (value === null) return "nil";
118+
if (typeof value === "number" || typeof value === "boolean") return value.toString();
119+
if (typeof value === "string") return `"${value.replace(/"/g, '\\"')}"`;
120+
121+
if (Array.isArray(value)) {
122+
const items = value.map(_toLuaMapInner).join(", ");
123+
return `{${items}}`;
124+
}
125+
126+
if (typeof value === "object") {
127+
const entries = Object.entries(value).map(([k, v]) => {
128+
const key = /^[a-zA-Z_][a-zA-Z0-9_]*$/.test(k) ? k : `["${k}"]`;
129+
return `${key} = ${_toLuaMapInner(v)}`;
130+
}).join(", ");
131+
return `{${entries}}`;
132+
}
133+
134+
return "nil"; // fallback
135+
}
136+
137+
return _toLuaMapInner(value);
138+
},
139+
140+
/**
141+
* Lua lex rules handler declarations.
142+
*/
143+
generateLexHandlers() {
144+
const handlers = this._generateHandlers(
145+
this._lexHandlers,
146+
'Tokenizer:',
147+
'_lexRule',
148+
'' /* return type, you can use e.g. 'string' */
149+
);
150+
this.writeData('LEX_RULE_HANDLERS', handlers.join('\n\n'));
151+
},
152+
153+
/**
154+
* Lua parser handler declarations.
155+
*/
156+
generateProductionHandlers() {
157+
const handlers = this._generateHandlers(
158+
this._productionHandlers,
159+
'parser:',
160+
'_handler',
161+
'', /* return type */
162+
);
163+
this.writeData('PRODUCTION_HANDLERS', handlers.join('\n'));
164+
},
165+
166+
/**
167+
* Productions array in the Lua format.
168+
*
169+
* An array of arrays, see `generateProductionsData` for details.
170+
*/
171+
generateProductions() {
172+
this.writeData(
173+
'PRODUCTIONS',
174+
`{ ${this.generateProductionsData().join(',\n')} }`
175+
);
176+
},
177+
178+
/**
179+
* Injects the code passed in the module include directive.
180+
*/
181+
generateModuleInclude() {
182+
let moduleInclude = this._grammar.getModuleInclude();
183+
184+
if (!moduleInclude) {
185+
// Example: add some default module include if needed.
186+
moduleInclude = `
187+
let foo = 'Example module include';
188+
`;
189+
}
190+
191+
this.writeData('MODULE_INCLUDE', moduleInclude);
192+
},
193+
194+
_generateHandlers(handlers, class_prefix, name, returnType = '') {
195+
return handlers.map(({args, action}, index) => {
196+
return `\nfunction ${class_prefix}${name}${index + 1}` +
197+
`(${args})\n\t\t${action}\nend`
198+
});
199+
},
200+
};
201+
202+
module.exports = LuaParserGeneratorTrait;

0 commit comments

Comments
 (0)