Skip to content

Commit 5c91826

Browse files
committed
Added string literal lexing (without interpolation atm)
1 parent f5ea1b6 commit 5c91826

File tree

4 files changed

+211
-56
lines changed

4 files changed

+211
-56
lines changed

diagnostics.swift

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,20 +43,20 @@ class Diagnose
4343
return self.source.getContext(index: self.range.lowerBound)
4444
}
4545

46-
func withFixIt(range: Range<Index>, replacement: String) -> Diagnose
46+
func withFixIt(replacement: String, range: Range<Index>? = nil) -> Diagnose
4747
{
48-
self.fixIts.append(FixIt(range: range, replacement: replacement))
48+
self.fixIts.append(FixIt(range: range ?? self.range, replacement: replacement))
4949
return self
5050
}
5151

52-
func withInsertFix(at: Index, insert: String) -> Diagnose
52+
func withInsertFix(insert: String, at: Index) -> Diagnose
5353
{
54-
return self.withFixIt(range: at..<at, replacement: insert)
54+
return self.withFixIt(replacement: insert, range: at..<at)
5555
}
5656

5757
func withRemoveFix(range: Range<Index>? = nil) -> Diagnose
5858
{
59-
return self.withFixIt(range: range ?? self.range, replacement: "")
59+
return self.withFixIt(replacement: "", range: range ?? self.range)
6060
}
6161

6262
func withNote(_ message: String, range: Range<Index>, source: Source? = nil) -> Diagnose

helpers.swift

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
private let zero : UnicodeScalar = "0"
2+
private let lowerA : UnicodeScalar = "a"
3+
private let upperA : UnicodeScalar = "A"
4+
15
extension UnicodeScalar
26
{
37
var isIdentifierHead : Bool
@@ -142,6 +146,32 @@ extension UnicodeScalar
142146
}
143147
}
144148
}
149+
150+
var decimalValue : UInt32?
151+
{
152+
get
153+
{
154+
return self.isDigit ? ((self.value - zero.value) as UInt32?) : nil
155+
}
156+
}
157+
158+
var hexValue : UInt32?
159+
{
160+
get
161+
{
162+
switch self
163+
{
164+
case "0"..."9":
165+
return self.value - zero.value
166+
case "a"..."f":
167+
return 10 + self.value - lowerA.value
168+
case "A"..."F":
169+
return 10 + self.value - upperA.value
170+
default:
171+
return nil
172+
}
173+
}
174+
}
145175
}
146176

147177
func * (str: String, times: Int) -> String

main.swift

Lines changed: 165 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -148,13 +148,15 @@ class Lexer : Sequence
148148
case "#":
149149
return self.lexHash()
150150
case "=", "-", "+", "*", ">", "&", "|", "^", "~", ".":
151-
return lexOperator();
151+
return lexOperator()
152152
case "a"..."z", "A"..."Z", "_":
153153
return lexIdentifier()
154154
case "$":
155155
return lexDollarIdentifier()
156156
case "`":
157157
return lexEscapedIdentifier()
158+
case "\"", "'":
159+
return lexStringLiteral()
158160
default:
159161
if char.isIdentifierHead
160162
{
@@ -269,7 +271,7 @@ class Lexer : Sequence
269271
}
270272

271273
diagnose("unterminated '/*' comment", type: .Error, start: endIndex)
272-
.withInsertFix(at: endIndex, insert: "*/" * depth)
274+
.withInsertFix(insert: "*/" * depth, at: endIndex)
273275
.withNote("comment started here", range: start..<start)
274276
}
275277

@@ -461,13 +463,13 @@ class Lexer : Sequence
461463
{
462464
case "=":
463465
if (leftBound != rightBound) {
464-
let d = diagnose("'=' must have consistent whitespace on both sides", type: .Error, start: start, end: self.index);
466+
let d = diagnose("'=' must have consistent whitespace on both sides", type: .Error, start: start, end: self.index)
465467
if (leftBound) {
466-
d.withInsertFix(at: start, insert: " ")
468+
d.withInsertFix(insert: " ", at: start)
467469
}
468470
else
469471
{
470-
d.withInsertFix(at: self.index, insert: " ")
472+
d.withInsertFix(insert: " ", at: self.index)
471473
}
472474
}
473475
return Token(type: .Punctuator(.EqualSign), content: "=", range: start..<self.index)
@@ -820,6 +822,163 @@ class Lexer : Sequence
820822

821823
return Token(type: .FloatLiteral(.Decimal), content: content, range: start..<self.index)
822824
}
825+
826+
func lexUnicodeEscape() -> UnicodeScalar?
827+
{
828+
assert(self.currentChar == "{", "Invalid unicode escape")
829+
self.advance()
830+
831+
var hexValue : UInt32 = 0
832+
var numDigits : UInt = 0
833+
834+
while let digitValue = self.currentChar?.hexValue
835+
{
836+
hexValue = (hexValue << 4) | digitValue
837+
numDigits += 1
838+
self.advance()
839+
}
840+
841+
if self.currentChar != "}"
842+
{
843+
diagnose("expected '}' in \\u{...} escape sequence", type: .Error)
844+
return nil
845+
}
846+
self.advance()
847+
848+
if numDigits < 1 || numDigits > 8
849+
{
850+
diagnose("\\u{...} escape sequence expects between 1 and 8 hex digits", type: .Error)
851+
return nil
852+
}
853+
854+
return UnicodeScalar(hexValue)
855+
}
856+
857+
func makeDoubleQuotedLiteral(singleQuoted: String) -> String
858+
{
859+
var replacement = ""
860+
var i = singleQuoted.startIndex
861+
862+
while i != singleQuoted.endIndex
863+
{
864+
var nextIndex = singleQuoted.index(after: i)
865+
866+
if singleQuoted[i] == "\""
867+
{
868+
replacement += "\\\""
869+
}
870+
else if nextIndex != singleQuoted.endIndex && singleQuoted[i] == "\\"
871+
{
872+
if singleQuoted[nextIndex] != "'"
873+
{
874+
replacement += String(singleQuoted[i])
875+
}
876+
replacement += String(singleQuoted[nextIndex])
877+
nextIndex = singleQuoted.index(after: nextIndex)
878+
}
879+
else if nextIndex == singleQuoted.endIndex || singleQuoted[i] != "\\" || singleQuoted[nextIndex] != "'"
880+
{
881+
replacement += String(singleQuoted[i])
882+
}
883+
884+
i = nextIndex
885+
}
886+
887+
return replacement
888+
}
889+
890+
func lexStringLiteral() -> Token
891+
{
892+
assert(self.currentChar == "\"" || self.currentChar == "\'", "Invalid starting point for a string literal")
893+
894+
let quoteType = self.currentChar!
895+
let start = self.index
896+
var wasErroneous = false
897+
var content = ""
898+
899+
self.advance()
900+
let charactersStartIndex = self.index
901+
902+
characterLoop: while true
903+
{
904+
guard self.currentChar != nil else
905+
{
906+
diagnose("unterminated string literal", type: .Error)
907+
return makeToken(type: .Unknown, range: start..<self.index)
908+
}
909+
910+
switch self.currentChar!
911+
{
912+
case "\r", "\n":
913+
diagnose("unterminated string literal", type: .Error)
914+
return makeToken(type: .Unknown, range: start..<self.index)
915+
case "\\":
916+
guard self.nextChar != nil else
917+
{
918+
diagnose("unterminated string literal", type: .Error)
919+
return makeToken(type: .Unknown, range: start..<self.index)
920+
}
921+
switch self.nextChar!
922+
{
923+
case "\\", "\"", "'":
924+
content += String(self.nextChar!)
925+
case "t":
926+
content += "\t"
927+
case "n":
928+
content += "\t"
929+
case "r":
930+
content += "\t"
931+
case "0":
932+
content += "\0"
933+
case "u":
934+
self.advance()
935+
self.advance()
936+
if self.currentChar != "{"
937+
{
938+
diagnose("expected hexadecimal code in braces after unicode escape", type: .Error)
939+
wasErroneous = true
940+
}
941+
else if let char = self.lexUnicodeEscape()
942+
{
943+
content += String(char)
944+
}
945+
else
946+
{
947+
wasErroneous = true
948+
}
949+
continue characterLoop
950+
default:
951+
diagnose("invalid escape sequence in literal", type: .Error)
952+
wasErroneous = true
953+
}
954+
self.advance()
955+
case "\"", "'":
956+
if self.currentChar! == quoteType
957+
{
958+
break characterLoop
959+
}
960+
fallthrough
961+
default:
962+
content += String(self.currentChar!)
963+
}
964+
965+
self.advance()
966+
}
967+
968+
self.advance()
969+
970+
if quoteType == "'"
971+
{
972+
let charactersEndIndex = self.characters.index(before: self.index)
973+
let str = String(self.characters[charactersStartIndex..<charactersEndIndex])
974+
let replacement = "\"\(makeDoubleQuotedLiteral(singleQuoted: str))\""
975+
976+
diagnose("single-quoted string literal found, use '\"'", type: .Error, start: start, end: self.index)
977+
.withFixIt(replacement: replacement)
978+
}
979+
980+
return Token(type: wasErroneous ? .Unknown : .StringLiteral, content: content, range: start..<self.index)
981+
}
823982
}
824983

825984
func relativePath(_ path: String) -> String?
@@ -831,14 +990,7 @@ if let source = Source(path: relativePath("tests/test.swift")!)
831990
{
832991
var l = Lexer(source)
833992

834-
for token in l.filter({
835-
switch $0.type {
836-
case .Whitespace:
837-
return false
838-
default:
839-
return true
840-
}
841-
})
993+
for token in l.filter({ $0.type != .Whitespace })
842994
{
843995
print("\(token.type): \(token.content.literalString)")
844996
}

tests/test.swift

Lines changed: 11 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,11 @@
1-
var `if` = "foo"
2-
var a ="fubar"
3-
a.
4-
x . y
5-
z.d
6-
b */ c
7-
b */* d */
8-
var $ = 5
9-
$0
10-
$125 + x
11-
$abc - 8
12-
13-
0b01
14-
15-
0b
16-
17-
0b_123
18-
19-
0b0_1__2
20-
0b001_
21-
22-
0xABCp+5
23-
0xABC
24-
0xABCDEFG
25-
26-
a.0.5
27-
0.5
28-
8e9
29-
10e-4.8
30-
10e+566.5
31-
1.1e1
32-
5.6x8
33-
-19
34-
35-
36-
/*
37-
38-
bar
1+
"foobar"
2+
'fubar blubb \' " bar\"\\\t'
3+
"\u"
4+
"\u{"
5+
"\u{1"
6+
"\u{}"
7+
"\u{FX"
8+
"\u{20}"
9+
"\u{20}"
10+
"\u{20X}"
11+
"\u{FFFFFFFFF}"

0 commit comments

Comments
 (0)