@@ -148,13 +148,15 @@ class Lexer : Sequence
148
148
case " # " :
149
149
return self . lexHash ( )
150
150
case " = " , " - " , " + " , " * " , " > " , " & " , " | " , " ^ " , " ~ " , " . " :
151
- return lexOperator ( ) ;
151
+ return lexOperator ( )
152
152
case " a " ... " z " , " A " ... " Z " , " _ " :
153
153
return lexIdentifier ( )
154
154
case " $ " :
155
155
return lexDollarIdentifier ( )
156
156
case " ` " :
157
157
return lexEscapedIdentifier ( )
158
+ case " \" " , " ' " :
159
+ return lexStringLiteral ( )
158
160
default :
159
161
if char. isIdentifierHead
160
162
{
@@ -269,7 +271,7 @@ class Lexer : Sequence
269
271
}
270
272
271
273
diagnose ( " unterminated '/*' comment " , type: . Error, start: endIndex)
272
- . withInsertFix ( at : endIndex , insert: " */ " * depth)
274
+ . withInsertFix ( insert: " */ " * depth, at : endIndex )
273
275
. withNote ( " comment started here " , range: start..< start)
274
276
}
275
277
@@ -461,13 +463,13 @@ class Lexer : Sequence
461
463
{
462
464
case " = " :
463
465
if ( leftBound != rightBound) {
464
- let d = diagnose ( " '=' must have consistent whitespace on both sides " , type: . Error, start: start, end: self . index) ;
466
+ let d = diagnose ( " '=' must have consistent whitespace on both sides " , type: . Error, start: start, end: self . index)
465
467
if ( leftBound) {
466
- d. withInsertFix ( at : start , insert: " " )
468
+ d. withInsertFix ( insert: " " , at : start )
467
469
}
468
470
else
469
471
{
470
- d. withInsertFix ( at : self . index , insert: " " )
472
+ d. withInsertFix ( insert: " " , at : self . index )
471
473
}
472
474
}
473
475
return Token ( type: . Punctuator( . EqualSign) , content: " = " , range: start..< self . index)
@@ -820,6 +822,163 @@ class Lexer : Sequence
820
822
821
823
return Token ( type: . FloatLiteral( . Decimal) , content: content, range: start..< self . index)
822
824
}
825
+
826
+ func lexUnicodeEscape( ) -> UnicodeScalar ?
827
+ {
828
+ assert ( self . currentChar == " { " , " Invalid unicode escape " )
829
+ self . advance ( )
830
+
831
+ var hexValue : UInt32 = 0
832
+ var numDigits : UInt = 0
833
+
834
+ while let digitValue = self . currentChar? . hexValue
835
+ {
836
+ hexValue = ( hexValue << 4 ) | digitValue
837
+ numDigits += 1
838
+ self . advance ( )
839
+ }
840
+
841
+ if self . currentChar != " } "
842
+ {
843
+ diagnose ( " expected '}' in \\ u{...} escape sequence " , type: . Error)
844
+ return nil
845
+ }
846
+ self . advance ( )
847
+
848
+ if numDigits < 1 || numDigits > 8
849
+ {
850
+ diagnose ( " \\ u{...} escape sequence expects between 1 and 8 hex digits " , type: . Error)
851
+ return nil
852
+ }
853
+
854
+ return UnicodeScalar ( hexValue)
855
+ }
856
+
857
+ func makeDoubleQuotedLiteral( singleQuoted: String ) -> String
858
+ {
859
+ var replacement = " "
860
+ var i = singleQuoted. startIndex
861
+
862
+ while i != singleQuoted. endIndex
863
+ {
864
+ var nextIndex = singleQuoted. index ( after: i)
865
+
866
+ if singleQuoted [ i] == " \" "
867
+ {
868
+ replacement += " \\ \" "
869
+ }
870
+ else if nextIndex != singleQuoted. endIndex && singleQuoted [ i] == " \\ "
871
+ {
872
+ if singleQuoted [ nextIndex] != " ' "
873
+ {
874
+ replacement += String ( singleQuoted [ i] )
875
+ }
876
+ replacement += String ( singleQuoted [ nextIndex] )
877
+ nextIndex = singleQuoted. index ( after: nextIndex)
878
+ }
879
+ else if nextIndex == singleQuoted. endIndex || singleQuoted [ i] != " \\ " || singleQuoted [ nextIndex] != " ' "
880
+ {
881
+ replacement += String ( singleQuoted [ i] )
882
+ }
883
+
884
+ i = nextIndex
885
+ }
886
+
887
+ return replacement
888
+ }
889
+
890
+ func lexStringLiteral( ) -> Token
891
+ {
892
+ assert ( self . currentChar == " \" " || self . currentChar == " \' " , " Invalid starting point for a string literal " )
893
+
894
+ let quoteType = self . currentChar!
895
+ let start = self . index
896
+ var wasErroneous = false
897
+ var content = " "
898
+
899
+ self . advance ( )
900
+ let charactersStartIndex = self . index
901
+
902
+ characterLoop: while true
903
+ {
904
+ guard self . currentChar != nil else
905
+ {
906
+ diagnose ( " unterminated string literal " , type: . Error)
907
+ return makeToken ( type: . Unknown, range: start..< self . index)
908
+ }
909
+
910
+ switch self . currentChar!
911
+ {
912
+ case " \r " , " \n " :
913
+ diagnose ( " unterminated string literal " , type: . Error)
914
+ return makeToken ( type: . Unknown, range: start..< self . index)
915
+ case " \\ " :
916
+ guard self . nextChar != nil else
917
+ {
918
+ diagnose ( " unterminated string literal " , type: . Error)
919
+ return makeToken ( type: . Unknown, range: start..< self . index)
920
+ }
921
+ switch self . nextChar!
922
+ {
923
+ case " \\ " , " \" " , " ' " :
924
+ content += String ( self . nextChar!)
925
+ case " t " :
926
+ content += " \t "
927
+ case " n " :
928
+ content += " \t "
929
+ case " r " :
930
+ content += " \t "
931
+ case " 0 " :
932
+ content += " \0 "
933
+ case " u " :
934
+ self . advance ( )
935
+ self . advance ( )
936
+ if self . currentChar != " { "
937
+ {
938
+ diagnose ( " expected hexadecimal code in braces after unicode escape " , type: . Error)
939
+ wasErroneous = true
940
+ }
941
+ else if let char = self . lexUnicodeEscape ( )
942
+ {
943
+ content += String ( char)
944
+ }
945
+ else
946
+ {
947
+ wasErroneous = true
948
+ }
949
+ continue characterLoop
950
+ default :
951
+ diagnose ( " invalid escape sequence in literal " , type: . Error)
952
+ wasErroneous = true
953
+ }
954
+ self . advance ( )
955
+ case " \" " , " ' " :
956
+ if self . currentChar! == quoteType
957
+ {
958
+ break characterLoop
959
+ }
960
+ fallthrough
961
+ default :
962
+ content += String ( self . currentChar!)
963
+ }
964
+
965
+ self . advance ( )
966
+ }
967
+
968
+ self . advance ( )
969
+
970
+ if quoteType == " ' "
971
+ {
972
+ let charactersEndIndex = self . characters. index ( before: self . index)
973
+ let str = String ( self . characters [ charactersStartIndex..< charactersEndIndex] )
974
+ let replacement = " \" \( makeDoubleQuotedLiteral ( singleQuoted: str) ) \" "
975
+
976
+ diagnose ( " single-quoted string literal found, use ' \" ' " , type: . Error, start: start, end: self . index)
977
+ . withFixIt ( replacement: replacement)
978
+ }
979
+
980
+ return Token ( type: wasErroneous ? . Unknown : . StringLiteral, content: content, range: start..< self . index)
981
+ }
823
982
}
824
983
825
984
func relativePath( _ path: String ) -> String ?
@@ -831,14 +990,7 @@ if let source = Source(path: relativePath("tests/test.swift")!)
831
990
{
832
991
var l = Lexer ( source)
833
992
834
- for token in l. filter ( {
835
- switch $0. type {
836
- case . Whitespace:
837
- return false
838
- default :
839
- return true
840
- }
841
- } )
993
+ for token in l. filter ( { $0. type != . Whitespace } )
842
994
{
843
995
print ( " \( token. type) : \( token. content. literalString) " )
844
996
}
0 commit comments