Skip to content

Commit 72f5a4e

Browse files
committed
Cleaned up token generation.
1 parent 1381662 commit 72f5a4e

File tree

2 files changed

+79
-52
lines changed

2 files changed

+79
-52
lines changed

main.swift

Lines changed: 77 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,8 @@ class Lexer : Sequence
136136
return lexOperator();
137137
case "a"..."z", "A"..."Z", "_", "$":
138138
return lexIdentifier()
139+
case "`":
140+
return lexEscapedIdentifier()
139141
default:
140142
if char.isIdentifierHead
141143
{
@@ -146,11 +148,7 @@ class Lexer : Sequence
146148
}
147149
}
148150

149-
return Token(
150-
type: .EOF,
151-
content: "",
152-
range: self.index..<self.index
153-
)
151+
return makeToken(type: .EOF, numberOfChars: 0)
154152
}
155153

156154
func advance()
@@ -241,24 +239,15 @@ class Lexer : Sequence
241239
.withNote("comment started here", range: start..<start)
242240
}
243241

244-
return Token(
245-
type: .Comment(true),
246-
content: String(self.characters[start..<self.index]),
247-
range: start..<self.index
248-
)
242+
return makeToken(type: .Comment(true), range: start..<self.index)
249243
}
250244

251245
func lexAllMatching(as type: TokenType, pred: (UnicodeScalar) -> Bool) -> Token
252246
{
253247
let start = self.index
254-
255248
self.advanceWhile(pred: pred)
256249

257-
return Token(
258-
type: type,
259-
content: String(self.characters[start..<self.index]),
260-
range: start..<self.index
261-
)
250+
return makeToken(type: type, range: start..<self.index)
262251
}
263252

264253
func lexUntilEndOfLine(as type: TokenType) -> Token
@@ -299,11 +288,47 @@ class Lexer : Sequence
299288
let content = String(self.characters[start..<self.index])
300289
let type = TokenType(forIdentifier: content)
301290

302-
return Token(
303-
type: type,
304-
content: content,
305-
range: start..<self.index
306-
)
291+
return Token(type: type, content: content, range: start..<self.index)
292+
}
293+
294+
func lexEscapedIdentifier() -> Token
295+
{
296+
assert(self.currentChar != nil, "Cannot lex identifier at EOF")
297+
assert(self.currentChar! == "`", "Not a valid starting point for an escaped identifier")
298+
299+
let start = self.index
300+
self.advance()
301+
let contentStart = self.index
302+
303+
if self.currentChar!.isIdentifierHead
304+
{
305+
while let char = self.currentChar
306+
{
307+
if char.isIdentifierBody
308+
{
309+
self.advance()
310+
}
311+
else
312+
{
313+
break
314+
}
315+
}
316+
317+
if self.currentChar == "`"
318+
{
319+
let contentEnd = self.index
320+
self.advance()
321+
322+
return Token(
323+
type: .Identifier(true),
324+
content: String(self.characters[contentStart..<contentEnd]),
325+
range: start..<self.index
326+
)
327+
}
328+
}
329+
330+
self.index = start
331+
return makeTokenAndAdvance(type: .Punctuator("`"))
307332
}
308333

309334
func lexOperator() -> Token
@@ -391,12 +416,7 @@ class Lexer : Sequence
391416
let name = String(self.characters[nameStart..<self.index])
392417
if let type = TokenType(forPoundKeyword: name)
393418
{
394-
let content = String(self.characters[start..<self.index])
395-
return Token(
396-
type: type,
397-
content: content,
398-
range: start..<self.index
399-
)
419+
return Token(type: type, content: name, range: start..<self.index)
400420
}
401421
}
402422

@@ -410,20 +430,32 @@ class Lexer : Sequence
410430
return makeTokenAndAdvance(type: .Newline, numberOfChars: isCRLF ? 2 : 1)
411431
}
412432

413-
func makeTokenAndAdvance(type: TokenType, numberOfChars: Int = 1) -> Token
433+
func makeToken(type: TokenType, range: Range<Index>) -> Token
434+
{
435+
return Token(
436+
type: type,
437+
content: String(self.characters[range]),
438+
range: range
439+
)
440+
}
441+
442+
func makeToken(type: TokenType, numberOfChars: Int = 1) -> Token
414443
{
415444
let start = self.index
445+
var end = self.index
416446

417-
for _ in 1...numberOfChars
447+
for _ in 0..<numberOfChars
418448
{
419-
self.index = self.characters.index(after: self.index)
449+
end = self.characters.index(after: end)
420450
}
421451

422-
let token = Token(
423-
type: type,
424-
content: String(self.characters[start..<self.index]),
425-
range: start..<self.index
426-
)
452+
return makeToken(type: type, range: start..<end)
453+
}
454+
455+
func makeTokenAndAdvance(type: TokenType, numberOfChars: Int = 1) -> Token
456+
{
457+
let token = makeToken(type: type, numberOfChars: numberOfChars)
458+
self.index = token.range.upperBound
427459

428460
return token
429461
}
@@ -497,8 +529,16 @@ if let source = Source(path: relativePath("tests/test.swift")!)
497529
{
498530
var l = Lexer(source)
499531

500-
for _ in l {
501-
532+
for token in l.filter({
533+
switch $0.type {
534+
case .Whitespace:
535+
return false
536+
default:
537+
return true
538+
}
539+
})
540+
{
541+
print("\(token.type): \(token.content.literalString)")
502542
}
503543

504544
for diag in l.diagnoses
@@ -539,17 +579,4 @@ if let source = Source(path: relativePath("tests/test.swift")!)
539579
}
540580
}
541581
}
542-
/*
543-
for token in l.filter({
544-
switch $0.type {
545-
case .Whitespace:
546-
return false
547-
default:
548-
return true
549-
}
550-
})
551-
{
552-
print("\(token.type): \(token.content.literalString)")
553-
}
554-
*/
555582
}

token.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ public enum TokenType
33
{
44
case Unknown
55
case EOF
6-
case Identifier
6+
case Identifier(Bool)
77
case Operator(String)
88
case IntegerLiteral
99
case FloatLiteral
@@ -36,7 +36,7 @@ public enum TokenType
3636
"super", "self", "Self", "throw", "throws", "true", "try", "_":
3737
self = .Keyword(identifier)
3838
default:
39-
self = .Identifier
39+
self = .Identifier(false)
4040
}
4141
}
4242

0 commit comments

Comments
 (0)