Skip to content

Tighten up some syntax rules #393

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 13, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Reserve <{...}> for interpolation syntax
Ban a balanced set of `<{...}>` delimiters for a
potential future interpolation syntax.
  • Loading branch information
hamishknight committed May 10, 2022
commit db58c1bb26c050509ef29fd6d05ac1eadbc83000
16 changes: 15 additions & 1 deletion Sources/_RegexParser/Regex/AST/AST.swift
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ extension AST {
/// Comments, non-semantic whitespace, etc
case trivia(Trivia)

/// Intepolation `<{...}>`, currently reserved for future use.
case interpolation(Interpolation)

case atom(Atom)

case customCharacterClass(CustomCharacterClass)
Expand All @@ -78,6 +81,7 @@ extension AST.Node {
case let .quantification(v): return v
case let .quote(v): return v
case let .trivia(v): return v
case let .interpolation(v): return v
case let .atom(v): return v
case let .customCharacterClass(v): return v
case let .empty(v): return v
Expand Down Expand Up @@ -128,7 +132,7 @@ extension AST.Node {
case .group, .conditional, .customCharacterClass, .absentFunction:
return true
case .alternation, .concatenation, .quantification, .quote, .trivia,
.empty:
.empty, .interpolation:
return false
}
}
Expand Down Expand Up @@ -192,6 +196,16 @@ extension AST {
}
}

public struct Interpolation: Hashable, _ASTNode {
public let contents: String
public let location: SourceLocation

public init(_ contents: String, _ location: SourceLocation) {
self.contents = contents
self.location = location
}
}

public struct Empty: Hashable, _ASTNode {
public let location: SourceLocation

Expand Down
2 changes: 1 addition & 1 deletion Sources/_RegexParser/Regex/AST/Atom.swift
Original file line number Diff line number Diff line change
Expand Up @@ -822,7 +822,7 @@ extension AST.Node {
case .alternation, .concatenation, .group,
.conditional, .quantification, .quote,
.trivia, .customCharacterClass, .empty,
.absentFunction:
.absentFunction, .interpolation:
return nil
}
}
Expand Down
2 changes: 1 addition & 1 deletion Sources/_RegexParser/Regex/Parse/CaptureList.swift
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ extension AST.Node {
break
}

case .quote, .trivia, .atom, .customCharacterClass, .empty:
case .quote, .trivia, .atom, .customCharacterClass, .empty, .interpolation:
break
}
}
Expand Down
20 changes: 20 additions & 0 deletions Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,26 @@ extension Source {
return AST.Quote(str.value, str.location)
}

/// Try to consume an interpolation sequence.
///
/// Interpolation -> '<{' String '}>'
///
mutating func lexInterpolation() throws -> AST.Interpolation? {
let contents = try recordLoc { src -> String? in
try src.tryEating { src in
guard src.tryEat(sequence: "<{") else { return nil }
_ = src.lexUntil { $0.isEmpty || $0.starts(with: "}>") }
guard src.tryEat(sequence: "}>") else { return nil }

// Not currently supported. We error here instead of during Sema to
// get a better error for something like `(<{)}>`.
throw ParseError.unsupported("interpolation")
}
}
guard let contents = contents else { return nil }
return .init(contents.value, contents.location)
}

/// Try to consume a comment
///
/// Comment -> '(?#' [^')']* ')'
Expand Down
7 changes: 7 additions & 0 deletions Sources/_RegexParser/Regex/Parse/Parse.swift
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,13 @@ extension Parser {
result.append(.quote(quote))
continue
}

// Interpolation -> `lexInterpolation`
if let interpolation = try source.lexInterpolation() {
result.append(.interpolation(interpolation))
continue
}

// Quantification -> QuantOperand Quantifier?
if let operand = try parseQuantifierOperand() {
if let (amt, kind, trivia) =
Expand Down
5 changes: 5 additions & 0 deletions Sources/_RegexParser/Regex/Parse/Sema.swift
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,11 @@ extension RegexValidator {
// These are Oniguruma specific.
throw error(.unsupported("absent function"), at: a.location)

case .interpolation(let i):
// This is currently rejected in the parser for better diagnostics, but
// reject here too until we get runtime support.
throw error(.unsupported("interpolation"), at: i.location)

case .quote, .trivia, .empty:
break
}
Expand Down
4 changes: 4 additions & 0 deletions Sources/_RegexParser/Regex/Printing/DumpAST.swift
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ extension AST.Trivia {
}
}

extension AST.Interpolation {
public var _dumpBase: String { "interpolation <\(contents)>" }
}

extension AST.Empty {
public var _dumpBase: String { "" }
}
Expand Down
9 changes: 9 additions & 0 deletions Sources/_RegexParser/Regex/Printing/PrintAsCanonical.swift
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ extension PrettyPrinter {
case let .trivia(t):
output(t._canonicalBase)

case let .interpolation(i):
output(i._canonicalBase)

case let .atom(a):
output(a._canonicalBase)

Expand Down Expand Up @@ -178,6 +181,12 @@ extension AST.Quote {
}
}

extension AST.Interpolation {
var _canonicalBase: String {
"<{\(contents)}>"
}
}

extension AST.Group.Kind {
var _canonicalBase: String {
switch self {
Expand Down
3 changes: 3 additions & 0 deletions Sources/_StringProcessing/Regex/ASTConversion.swift
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ extension AST.Node {
case let .trivia(v):
return .trivia(v.contents)

case .interpolation:
throw Unsupported("TODO: interpolation")

case let .atom(v):
switch v.kind {
case .scalarSequence(let seq):
Expand Down
23 changes: 23 additions & 0 deletions Tests/RegexTests/ParseTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,20 @@ extension RegexTests {
#"a(?#. comment)b"#,
concat("a", "b"))

// MARK: Interpolation

// These are literal as there's no closing '}>'
parseTest("<{", concat("<", "{"))
parseTest("<{a", concat("<", "{", "a"))
parseTest("<{a}", concat("<", "{", "a", "}"))
parseTest("<{<{}", concat("<", "{", "<", "{", "}"))

// Literal as escaped
parseTest(#"\<{}>"#, concat("<", "{", "}", ">"))

// A quantification
parseTest(#"<{2}"#, exactly(2, of: "<"))

// MARK: Quantification

parseTest("a*", zeroOrMore(of: "a"))
Expand Down Expand Up @@ -2574,6 +2588,15 @@ extension RegexTests {
diagnosticTest("|\u{360}", .confusableCharacter("|\u{360}"))
diagnosticTest(" \u{361}", .confusableCharacter(" \u{361}"))

// MARK: Interpolation (currently unsupported)

diagnosticTest("<{}>", .unsupported("interpolation"))
diagnosticTest("<{...}>", .unsupported("interpolation"))
diagnosticTest("<{)}>", .unsupported("interpolation"))
diagnosticTest("<{}}>", .unsupported("interpolation"))
diagnosticTest("<{<{}>", .unsupported("interpolation"))
diagnosticTest("(<{)}>", .unsupported("interpolation"))

// MARK: Character properties

diagnosticTest(#"\p{Lx}"#, .unknownProperty(key: nil, value: "Lx"))
Expand Down