Skip to content

Commit b540b2d

Browse files
committed
Implement case-insensitive literal matching
1 parent 88c50a3 commit b540b2d

File tree

7 files changed

+112
-44
lines changed

7 files changed

+112
-44
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ There are several types of parsing expressions, some of them containing subexpre
128128

129129
#### "*literal*"<br>'*literal*'
130130

131-
Match exact literal string and return it. The string syntax is the same as in JavaScript.
131+
Match exact literal string and return it. The string syntax is the same as in JavaScript. Appending `i` right after the literal makes the match case-insensitive.
132132

133133
#### .
134134

src/emitter.js

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -585,12 +585,28 @@ PEG.compiler.emitter = function(ast) {
585585
'#if node.value.length === 0',
586586
' #{resultVar} = "";',
587587
'#else',
588-
' #if node.value.length === 1',
589-
' if (input.charCodeAt(pos) === #{node.value.charCodeAt(0)}) {',
588+
' #if !node.ignoreCase',
589+
' #if node.value.length === 1',
590+
' if (input.charCodeAt(pos) === #{node.value.charCodeAt(0)}) {',
591+
' #else',
592+
' if (input.substr(pos, #{node.value.length}) === #{string(node.value)}) {',
593+
' #end',
590594
' #else',
591-
' if (input.substr(pos, #{node.value.length}) === #{string(node.value)}) {',
595+
/*
596+
* One-char literals are not optimized when case-insensitive
597+
* matching is enabled. This is because there is no simple way to
598+
* lowercase a character code that works for character outside ASCII
599+
* letters. Moreover, |toLowerCase| can change string length,
600+
* meaning the result of lowercasing a character can be more
601+
* characters.
602+
*/
603+
' if (input.substr(pos, #{node.value.length}).toLowerCase() === #{string(node.value.toLowerCase())}) {',
592604
' #end',
593-
' #{resultVar} = #{string(node.value)};',
605+
' #if !node.ignoreCase',
606+
' #{resultVar} = #{string(node.value)};',
607+
' #else',
608+
' #{resultVar} = input.substr(pos, #{node.value.length});',
609+
' #end',
594610
' pos += #{node.value.length};',
595611
' } else {',
596612
' #{resultVar} = null;',

src/parser.js

Lines changed: 39 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/parser.pegjs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,10 +195,11 @@ identifier "identifier"
195195
* vaguely).
196196
*/
197197
literal "literal"
198-
= value:string {
198+
= value:(doubleQuotedString / singleQuotedString) flags:"i"? __ {
199199
return {
200-
type: "literal",
201-
value: value
200+
type: "literal",
201+
value: value,
202+
ignoreCase: flags === "i"
202203
};
203204
}
204205

test/compiler-test.js

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -190,17 +190,33 @@ test("literals", function() {
190190
parses(zeroCharParser, "", "");
191191
doesNotParse(zeroCharParser, "a");
192192

193-
var oneCharParser = PEG.buildParser('start = "a"');
194-
parses(oneCharParser, "a", "a");
195-
doesNotParse(oneCharParser, "");
196-
doesNotParse(oneCharParser, "b");
197-
198-
var multiCharParser = PEG.buildParser('start = "abcd"');
199-
parses(multiCharParser, "abcd", "abcd");
200-
doesNotParse(multiCharParser, "");
201-
doesNotParse(multiCharParser, "abc");
202-
doesNotParse(multiCharParser, "abcde");
203-
doesNotParse(multiCharParser, "efgh");
193+
var oneCharCaseSensitiveParser = PEG.buildParser('start = "a"');
194+
parses(oneCharCaseSensitiveParser, "a", "a");
195+
doesNotParse(oneCharCaseSensitiveParser, "");
196+
doesNotParse(oneCharCaseSensitiveParser, "A");
197+
doesNotParse(oneCharCaseSensitiveParser, "b");
198+
199+
var multiCharCaseSensitiveParser = PEG.buildParser('start = "abcd"');
200+
parses(multiCharCaseSensitiveParser, "abcd", "abcd");
201+
doesNotParse(multiCharCaseSensitiveParser, "");
202+
doesNotParse(multiCharCaseSensitiveParser, "abc");
203+
doesNotParse(multiCharCaseSensitiveParser, "abcde");
204+
doesNotParse(multiCharCaseSensitiveParser, "ABCD");
205+
doesNotParse(multiCharCaseSensitiveParser, "efgh");
206+
207+
var oneCharCaseInsensitiveParser = PEG.buildParser('start = "a"i');
208+
parses(oneCharCaseInsensitiveParser, "a", "a");
209+
parses(oneCharCaseInsensitiveParser, "A", "A");
210+
doesNotParse(oneCharCaseInsensitiveParser, "");
211+
doesNotParse(oneCharCaseInsensitiveParser, "b");
212+
213+
var multiCharCaseInsensitiveParser = PEG.buildParser('start = "abcd"i');
214+
parses(multiCharCaseInsensitiveParser, "abcd", "abcd");
215+
parses(multiCharCaseInsensitiveParser, "ABCD", "ABCD");
216+
doesNotParse(multiCharCaseInsensitiveParser, "");
217+
doesNotParse(multiCharCaseInsensitiveParser, "abc");
218+
doesNotParse(multiCharCaseInsensitiveParser, "abcde");
219+
doesNotParse(multiCharCaseInsensitiveParser, "efgh");
204220

205221
/*
206222
* Test that the parsing position moves forward after successful parsing of

test/parser-test.js

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,11 @@ function ruleRef(name) {
8383
};
8484
}
8585

86-
function literal(value) {
86+
function literal(value, ignoreCase) {
8787
return {
88-
type: "literal",
89-
value: value
88+
type: "literal",
89+
value: value,
90+
ignoreCase: ignoreCase
9091
};
9192
}
9293

@@ -103,9 +104,9 @@ function klass(inverted, parts, rawText) {
103104
};
104105
}
105106

106-
var literalAbcd = literal("abcd");
107-
var literalEfgh = literal("efgh");
108-
var literalIjkl = literal("ijkl");
107+
var literalAbcd = literal("abcd", false);
108+
var literalEfgh = literal("efgh", false);
109+
var literalIjkl = literal("ijkl", false);
109110

110111
var optionalLiteral = optional(literalAbcd);
111112

@@ -128,15 +129,15 @@ function oneRuleGrammar(expression) {
128129
};
129130
}
130131

131-
var simpleGrammar = oneRuleGrammar(literal("abcd"));
132+
var simpleGrammar = oneRuleGrammar(literal("abcd", false));
132133

133134
function identifierGrammar(identifier) {
134135
return oneRuleGrammar(ruleRef(identifier));
135136
}
136137

137138
var literal_ = literal;
138139
function literalGrammar(literal) {
139-
return oneRuleGrammar(literal_(literal));
140+
return oneRuleGrammar(literal_(literal, false));
140141
}
141142

142143
function classGrammar(inverted, parts, rawText) {
@@ -147,7 +148,7 @@ var anyGrammar = oneRuleGrammar(any());
147148

148149
var action_ = action;
149150
function actionGrammar(action) {
150-
return oneRuleGrammar(action_(literal("a"), action));
151+
return oneRuleGrammar(action_(literal("a", false), action));
151152
}
152153

153154
var initializerGrammar = {
@@ -334,6 +335,8 @@ test("parses identifier", function() {
334335
/* Canonical literal is "\"abcd\"". */
335336
test("parses literal", function() {
336337
parserParses('start = "abcd"', literalGrammar("abcd"));
338+
parserParses("start = 'abcd'", literalGrammar("abcd"));
339+
parserParses('start = "abcd"i', oneRuleGrammar(literal("abcd", true)));
337340
});
338341

339342
/* Canonical string is "\"abcd\"". */

test/passes-test.js

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ test("removes proxy rules", function() {
1616
type: "rule",
1717
name: "proxied",
1818
displayName: null,
19-
expression: { type: "literal", value: "a" }
19+
expression: { type: "literal", value: "a", ignoreCase: false }
2020
};
2121

2222
var proxiedRuleRef = {
@@ -50,8 +50,8 @@ test("removes proxy rules", function() {
5050
type: "choice",
5151
alternatives: [
5252
proxiedRuleRef,
53-
{ type: "literal", value: "a" },
54-
{ type: "literal", value: "b" }
53+
{ type: "literal", value: "a", ignoreCase: false },
54+
{ type: "literal", value: "b", ignoreCase: false }
5555
]
5656
})
5757
},
@@ -60,8 +60,8 @@ test("removes proxy rules", function() {
6060
ast: simpleGrammarWithStartAndProxied({
6161
type: "choice",
6262
alternatives: [
63-
{ type: "literal", value: "a" },
64-
{ type: "literal", value: "b" },
63+
{ type: "literal", value: "a", ignoreCase: false },
64+
{ type: "literal", value: "b", ignoreCase: false },
6565
proxiedRuleRef
6666
]
6767
})
@@ -72,8 +72,8 @@ test("removes proxy rules", function() {
7272
type: "sequence",
7373
elements: [
7474
proxiedRuleRef,
75-
{ type: "literal", value: "a" },
76-
{ type: "literal", value: "b" }
75+
{ type: "literal", value: "a", ignoreCase: false },
76+
{ type: "literal", value: "b", ignoreCase: false }
7777
]
7878
})
7979
},
@@ -82,8 +82,8 @@ test("removes proxy rules", function() {
8282
ast: simpleGrammarWithStartAndProxied({
8383
type: "sequence",
8484
elements: [
85-
{ type: "literal", value: "a" },
86-
{ type: "literal", value: "b" },
85+
{ type: "literal", value: "a", ignoreCase: false },
86+
{ type: "literal", value: "b", ignoreCase: false },
8787
proxiedRuleRef
8888
]
8989
})

0 commit comments

Comments
 (0)