Skip to content

Commit

Permalink
Sebastian review
Browse files Browse the repository at this point in the history
  • Loading branch information
kmill committed Dec 5, 2023
1 parent 9613374 commit a4bc0e1
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 17 deletions.
6 changes: 4 additions & 2 deletions src/Init/Meta.lean
Original file line number Diff line number Diff line change
Expand Up @@ -773,10 +773,12 @@ def decodeQuotedChar (s : String) (i : String.Pos) : Option (Char × String.Pos)
else
none

/-- Decode a valid string gap after the `\`.
/--
Decodes a valid string gap after the `\`.
Note that this function matches `"\" whitespace+` rather than
the more restrictive `"\" newline whitespace*` since this simplifies the implementation.
Justification: this does not overlap with any other sequences beginning with `\`. -/
Justification: this does not overlap with any other sequences beginning with `\`.
-/
def decodeStringGap (s : String) (i : String.Pos) : Option String.Pos := do
guard <| (s.get i).isWhitespace
s.nextWhile Char.isWhitespace (s.next i)
Expand Down
27 changes: 17 additions & 10 deletions src/Lean/Parser/Basic.lean
Original file line number Diff line number Diff line change
Expand Up @@ -564,10 +564,12 @@ def hexDigitFn : ParserFn := fun c s =>
if curr.isDigit || ('a' <= curr && curr <= 'f') || ('A' <= curr && curr <= 'F') then s.setPos i
else s.mkUnexpectedError "invalid hexadecimal numeral"

/-- Parses the whitespace after the `\` when there is a string gap.
/--
Parses the whitespace after the `\` when there is a string gap.
Raises an error if the whitespace does not contain exactly one newline character.
Processes `\r\n` as a newline. -/
partial def stringGap (seenNewline afterCR : Bool) : ParserFn := fun c s =>
Processes `\r\n` as a newline.
-/
partial def stringGapFn (seenNewline afterCR : Bool) : ParserFn := fun c s =>
let i := s.pos
if h : c.input.atEnd i then s -- let strLitFnAux handle the EOI error if !seenNewline
else
Expand All @@ -577,22 +579,24 @@ partial def stringGap (seenNewline afterCR : Bool) : ParserFn := fun c s =>
-- Having more than one newline in a string gap is visually confusing
s.mkUnexpectedError "unexpected additional newline in string gap"
else
stringGap true false c (s.next' c.input i h)
stringGapFn true false c (s.next' c.input i h)
else if curr == '\r' then
stringGap seenNewline true c (s.next' c.input i h)
stringGapFn seenNewline true c (s.next' c.input i h)
else if afterCR then
s.mkUnexpectedError "expecting newline after carriage return"
else if curr.isWhitespace then
stringGap seenNewline false c (s.next' c.input i h)
stringGapFn seenNewline false c (s.next' c.input i h)
else if seenNewline then
s
else
s.mkUnexpectedError "expecting newline in string gap"

/-- Parse a string quotation after a `\`.
/--
Parses a string quotation after a `\`.
- `isQuotable` determines which characters are valid escapes
- `inString` enables features that are only valid within strings,
in particular `"\" newline whitespace*` gaps. -/
in particular `"\" newline whitespace*` gaps.
-/
def quotedCharCoreFn (isQuotable : Char → Bool) (inString : Bool) : ParserFn := fun c s =>
let input := c.input
let i := s.pos
Expand All @@ -606,7 +610,7 @@ def quotedCharCoreFn (isQuotable : Char → Bool) (inString : Bool) : ParserFn :
else if curr == 'u' then
andthenFn hexDigitFn (andthenFn hexDigitFn (andthenFn hexDigitFn hexDigitFn)) c (s.next' input i h)
else if inString && (curr == '\n' || curr == '\r') then
stringGap false false c s
stringGapFn false false c s
else
s.mkUnexpectedError "invalid escape sequence"

Expand All @@ -616,7 +620,10 @@ def isQuotableCharDefault (c : Char) : Bool :=
def quotedCharFn : ParserFn :=
quotedCharCoreFn isQuotableCharDefault false

/-- Like `quotedCharFn` but enables escapes that are only valid inside strings. In particular, gaps ("\" newline whitespace*). -/
/--
Like `quotedCharFn` but enables escapes that are only valid inside strings.
In particular, string gaps (`"\" newline whitespace*`).
-/
def quotedStringFn : ParserFn :=
quotedCharCoreFn isQuotableCharDefault true

Expand Down
68 changes: 66 additions & 2 deletions tests/lean/string_gaps.lean
Original file line number Diff line number Diff line change
@@ -1,59 +1,123 @@
import Lean
import Lean.Parser.Extension
import Lean.Elab.Term

/-!
# Testing string gaps in string literals
String gaps are described in RFC #2838
-/

/-!
A string gap with no trailing space.
-/
#eval "a\
b"

/-!
A string gap with trailing space before the `b`, which is consumed.
-/
#eval "a\
b"

/-!
A string gap with space before the gap, which is not consumed.
-/
#eval "a \
b"

/-!
Multiple string gaps in a row.
-/
#eval "a \
\
\
b"

/-!
Two tests from the RFC.
-/
#eval "this is \
a string"
#eval "this is \
a string"

/-!
Two examples of how spaces are accounted for in string gaps. `\x20` is a way to force a leading space.
-/
#eval "there are three spaces between the brackets < \
>"
#eval "there are three spaces between the brackets <\
\x20 >"

/-!
Using `\n` to terminate a string gap, which is a technique suggested by Mario for using string gaps to write
multiline literals in an indented context.
-/
#eval "this is\
\n a string with two space indent"

/-!
Similar tests but for interpolated strings.
-/
#eval s!"a\
b"
#eval s!"a\
b"
#eval s!"a\
b"

/-!
The `{` terminates the string gap.
-/
#eval s!"a\
{"b"}\
"

open Lean

/-!
## Testing whitespace handling with specific line terminators
-/

/-!
Standard string gap, with LF
-/
#eval show MetaM String from do
let stx ← ofExcept <| Parser.runParserCategory (← getEnv) `term "\"a\\\n b\""
let some s := stx.isStrLit? | failure
return s

/-!
Standard string gap, with CRLF
-/
#eval show MetaM String from do
let stx ← ofExcept <| Parser.runParserCategory (← getEnv) `term "\"a\\\r\n b\""
let some s := stx.isStrLit? | failure
return s

/-!
Isolated CR, which is an error
-/
#eval show MetaM String from do
let stx ← ofExcept <| Parser.runParserCategory (← getEnv) `term "\"a\\\r b\""
let some s := stx.isStrLit? | failure
return s

/-!
Not a string gap since there's no end-of-line.
-/
#eval show MetaM String from do
let stx ← ofExcept <| Parser.runParserCategory (← getEnv) `term "\"a\\ b\""
let some s := stx.isStrLit? | failure
return s

-- Scala-style stripMargin
/-!
## Scala-style stripMargin
This is a test that string gaps could be paired with a new string elaboration syntax
for indented multiline string literals.
-/

def String.dedent (s : String) : Option String :=
let parts := s.split (· == '\n') |>.map String.trimLeft
match parts with
Expand Down
4 changes: 2 additions & 2 deletions tests/lean/string_gaps.lean.expected.out
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"ab"
"ab"

string_gaps.lean:46:0-49:10: error: <input>:1:4: expecting newline after carriage return
string_gaps.lean:101:0-104:10: error: <input>:1:4: expecting newline after carriage return

string_gaps.lean:51:0-54:10: error: <input>:1:3: invalid escape sequence
string_gaps.lean:109:0-112:10: error: <input>:1:3: invalid escape sequence
"this is line 1\n line 2, indented\nline 3"
6 changes: 6 additions & 0 deletions tests/lean/string_gaps_err_newline.lean
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
/-!
# String gaps error test
A string gap shouldn't have more than one trailing newline according to RFC #2838
-/

#eval "a\
b"
2 changes: 1 addition & 1 deletion tests/lean/string_gaps_err_newline.lean.expected.out
Original file line number Diff line number Diff line change
@@ -1 +1 @@
string_gaps_err_newline.lean:2:0: error: unexpected additional newline in string gap
string_gaps_err_newline.lean:8:0: error: unexpected additional newline in string gap

0 comments on commit a4bc0e1

Please sign in to comment.