Skip to content

Commit 273e860

Browse files
author
lrn@chromium.org
committed
Changed keyword token recognition to be done inline in the identifier scanner.
Review URL: http://codereview.chromium.org/360048 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@3221 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
1 parent 877db0f commit 273e860

4 files changed

Lines changed: 384 additions & 15 deletions

File tree

src/scanner.cc

Lines changed: 140 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,139 @@ void TwoByteStringUTF16Buffer::SeekForward(int pos) {
193193
}
194194

195195

196+
// ----------------------------------------------------------------------------
197+
// Keyword Matcher
198+
KeywordMatcher::FirstState KeywordMatcher::first_states_[] = {
199+
{ "break", KEYWORD_PREFIX, Token::BREAK },
200+
{ NULL, C, Token::ILLEGAL },
201+
{ NULL, D, Token::ILLEGAL },
202+
{ "else", KEYWORD_PREFIX, Token::ELSE },
203+
{ NULL, F, Token::ILLEGAL },
204+
{ NULL, UNMATCHABLE, Token::ILLEGAL },
205+
{ NULL, UNMATCHABLE, Token::ILLEGAL },
206+
{ NULL, I, Token::ILLEGAL },
207+
{ NULL, UNMATCHABLE, Token::ILLEGAL },
208+
{ NULL, UNMATCHABLE, Token::ILLEGAL },
209+
{ NULL, UNMATCHABLE, Token::ILLEGAL },
210+
{ NULL, UNMATCHABLE, Token::ILLEGAL },
211+
{ NULL, N, Token::ILLEGAL },
212+
{ NULL, UNMATCHABLE, Token::ILLEGAL },
213+
{ NULL, UNMATCHABLE, Token::ILLEGAL },
214+
{ NULL, UNMATCHABLE, Token::ILLEGAL },
215+
{ "return", KEYWORD_PREFIX, Token::RETURN },
216+
{ "switch", KEYWORD_PREFIX, Token::SWITCH },
217+
{ NULL, T, Token::ILLEGAL },
218+
{ NULL, UNMATCHABLE, Token::ILLEGAL },
219+
{ NULL, V, Token::ILLEGAL },
220+
{ NULL, W, Token::ILLEGAL }
221+
};
222+
223+
224+
void KeywordMatcher::Step(uc32 input) {
225+
switch (state_) {
226+
case INITIAL: {
227+
// matching the first character is the only state with significant fanout.
228+
// Match only lower-case letters in range 'b'..'w'.
229+
unsigned int offset = input - kFirstCharRangeMin;
230+
if (offset < kFirstCharRangeLength) {
231+
state_ = first_states_[offset].state;
232+
if (state_ == KEYWORD_PREFIX) {
233+
keyword_ = first_states_[offset].keyword;
234+
counter_ = 1;
235+
keyword_token_ = first_states_[offset].token;
236+
}
237+
return;
238+
}
239+
break;
240+
}
241+
case KEYWORD_PREFIX:
242+
if (keyword_[counter_] == input) {
243+
ASSERT_NE(input, '\0');
244+
counter_++;
245+
if (keyword_[counter_] == '\0') {
246+
state_ = KEYWORD_MATCHED;
247+
token_ = keyword_token_;
248+
}
249+
return;
250+
}
251+
break;
252+
case KEYWORD_MATCHED:
253+
token_ = Token::IDENTIFIER;
254+
break;
255+
case C:
256+
if (MatchState(input, 'a', CA)) return;
257+
if (MatchState(input, 'o', CO)) return;
258+
break;
259+
case CA:
260+
if (MatchKeywordStart(input, "case", 2, Token::CASE)) return;
261+
if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return;
262+
break;
263+
case CO:
264+
if (MatchState(input, 'n', CON)) return;
265+
break;
266+
case CON:
267+
if (MatchKeywordStart(input, "const", 3, Token::CONST)) return;
268+
if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return;
269+
break;
270+
case D:
271+
if (MatchState(input, 'e', DE)) return;
272+
if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return;
273+
break;
274+
case DE:
275+
if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return;
276+
if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return;
277+
if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return;
278+
break;
279+
case F:
280+
if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return;
281+
if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return;
282+
if (MatchKeywordStart(input, "for", 1, Token::FOR)) return;
283+
if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return;
284+
break;
285+
case I:
286+
if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return;
287+
if (MatchKeyword(input, 'n', IN, Token::IN)) return;
288+
break;
289+
case IN:
290+
token_ = Token::IDENTIFIER;
291+
if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) {
292+
return;
293+
}
294+
break;
295+
case N:
296+
if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return;
297+
if (MatchKeywordStart(input, "new", 1, Token::NEW)) return;
298+
if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return;
299+
break;
300+
case T:
301+
if (MatchState(input, 'h', TH)) return;
302+
if (MatchState(input, 'r', TR)) return;
303+
if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return;
304+
break;
305+
case TH:
306+
if (MatchKeywordStart(input, "this", 2, Token::THIS)) return;
307+
if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return;
308+
break;
309+
case TR:
310+
if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return;
311+
if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return;
312+
break;
313+
case V:
314+
if (MatchKeywordStart(input, "var", 1, Token::VAR)) return;
315+
if (MatchKeywordStart(input, "void", 1, Token::VOID)) return;
316+
break;
317+
case W:
318+
if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return;
319+
if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;
320+
break;
321+
default:
322+
UNREACHABLE();
323+
}
324+
// On fallthrough, it's a failure.
325+
state_ = UNMATCHABLE;
326+
}
327+
328+
196329
// ----------------------------------------------------------------------------
197330
// Scanner
198331

@@ -855,48 +988,40 @@ uc32 Scanner::ScanIdentifierUnicodeEscape() {
855988

856989
Token::Value Scanner::ScanIdentifier() {
857990
ASSERT(kIsIdentifierStart.get(c0_));
858-
bool has_escapes = false;
859991

860992
StartLiteral();
993+
KeywordMatcher keyword_match;
994+
861995
// Scan identifier start character.
862996
if (c0_ == '\\') {
863-
has_escapes = true;
864997
uc32 c = ScanIdentifierUnicodeEscape();
865998
// Only allow legal identifier start characters.
866999
if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;
8671000
AddChar(c);
1001+
keyword_match.Fail();
8681002
} else {
8691003
AddChar(c0_);
1004+
keyword_match.AddChar(c0_);
8701005
Advance();
8711006
}
8721007

8731008
// Scan the rest of the identifier characters.
8741009
while (kIsIdentifierPart.get(c0_)) {
8751010
if (c0_ == '\\') {
876-
has_escapes = true;
8771011
uc32 c = ScanIdentifierUnicodeEscape();
8781012
// Only allow legal identifier part characters.
8791013
if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;
8801014
AddChar(c);
1015+
keyword_match.Fail();
8811016
} else {
8821017
AddChar(c0_);
1018+
keyword_match.AddChar(c0_);
8831019
Advance();
8841020
}
8851021
}
8861022
TerminateLiteral();
8871023

888-
// We don't have any 1-letter keywords (this is probably a common case).
889-
if ((next_.literal_end - next_.literal_pos) == 1) {
890-
return Token::IDENTIFIER;
891-
}
892-
893-
// If the identifier contains unicode escapes, it must not be
894-
// resolved to a keyword.
895-
if (has_escapes) {
896-
return Token::IDENTIFIER;
897-
}
898-
899-
return Token::Lookup(&literals_.data()[next_.literal_pos]);
1024+
return keyword_match.token();
9001025
}
9011026

9021027

src/scanner.h

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,121 @@ class TwoByteStringUTF16Buffer: public UTF16Buffer {
123123
};
124124

125125

126+
class KeywordMatcher {
127+
// Incrementally recognize keywords.
128+
//
129+
// Recognized keywords:
130+
// break case catch const* continue debugger* default delete do else
131+
// finally false for function if in instanceof native* new null
132+
// return switch this throw true try typeof var void while with
133+
//
134+
// *: Actually "future reserved keywords". These are the only ones we
135+
// recognized, the remaining are allowed as identifiers.
136+
public:
137+
KeywordMatcher() : state_(INITIAL), token_(Token::IDENTIFIER) {}
138+
139+
Token::Value token() { return token_; }
140+
141+
inline void AddChar(uc32 input) {
142+
if (state_ != UNMATCHABLE) {
143+
Step(input);
144+
}
145+
}
146+
147+
void Fail() {
148+
token_ = Token::IDENTIFIER;
149+
state_ = UNMATCHABLE;
150+
}
151+
152+
private:
153+
enum State {
154+
UNMATCHABLE,
155+
INITIAL,
156+
KEYWORD_PREFIX,
157+
KEYWORD_MATCHED,
158+
C,
159+
CA,
160+
CO,
161+
CON,
162+
D,
163+
DE,
164+
F,
165+
I,
166+
IN,
167+
N,
168+
T,
169+
TH,
170+
TR,
171+
V,
172+
W
173+
};
174+
175+
struct FirstState {
176+
const char* keyword;
177+
State state;
178+
Token::Value token;
179+
};
180+
181+
// Range of possible first characters of a keyword.
182+
static const unsigned int kFirstCharRangeMin = 'b';
183+
static const unsigned int kFirstCharRangeMax = 'w';
184+
static const unsigned int kFirstCharRangeLength =
185+
kFirstCharRangeMax - kFirstCharRangeMin + 1;
186+
// State map for first keyword character range.
187+
static FirstState first_states_[kFirstCharRangeLength];
188+
189+
// Current state.
190+
State state_;
191+
// Token for currently added characters.
192+
Token::Value token_;
193+
194+
// Matching a specific keyword string (there is only one possible valid
195+
// keyword with the current prefix).
196+
const char* keyword_;
197+
int counter_;
198+
Token::Value keyword_token_;
199+
200+
// If input equals keyword's character at position, continue matching keyword
201+
// from that position.
202+
inline bool MatchKeywordStart(uc32 input,
203+
const char* keyword,
204+
int position,
205+
Token::Value token_if_match) {
206+
if (input == keyword[position]) {
207+
state_ = KEYWORD_PREFIX;
208+
this->keyword_ = keyword;
209+
this->counter_ = position + 1;
210+
this->keyword_token_ = token_if_match;
211+
return true;
212+
}
213+
return false;
214+
}
215+
216+
// If input equals match character, transition to new state and return true.
217+
inline bool MatchState(uc32 input, char match, State new_state) {
218+
if (input == match) {
219+
state_ = new_state;
220+
return true;
221+
}
222+
return false;
223+
}
224+
225+
inline bool MatchKeyword(uc32 input,
226+
char match,
227+
State new_state,
228+
Token::Value keyword_token) {
229+
if (input == match) { // Matched "do".
230+
state_ = new_state;
231+
token_ = keyword_token;
232+
return true;
233+
}
234+
return false;
235+
}
236+
237+
void Step(uc32 input);
238+
};
239+
240+
126241
class Scanner {
127242
public:
128243

test/cctest/SConscript

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ SOURCES = {
5252
'test-log.cc',
5353
'test-log-utils.cc',
5454
'test-mark-compact.cc',
55+
'test-parsing.cc',
5556
'test-regexp.cc',
5657
'test-serialize.cc',
5758
'test-sockets.cc',

0 commit comments

Comments
 (0)