Skip to content

Fix a few DSL APIs #580

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jul 15, 2022
Prev Previous commit
Next Next commit
Move AssertionKind onto the DSL
This enum will start including cases that only the
DSL can use, so move it off the AST.
  • Loading branch information
hamishknight committed Jul 14, 2022
commit 21ca2fb7f994ee3dd7c5e79f738f85ad9390b8c6
32 changes: 23 additions & 9 deletions Sources/RegexBuilder/Anchor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,30 @@ public struct Anchor {

@available(SwiftStdlib 5.7, *)
extension Anchor: RegexComponent {
var baseAssertion: DSLTree._AST.AssertionKind {
var baseAssertion: DSLTree.Atom.Assertion {
switch kind {
case .startOfSubject: return .startOfSubject(isInverted)
case .endOfSubjectBeforeNewline: return .endOfSubjectBeforeNewline(isInverted)
case .endOfSubject: return .endOfSubject(isInverted)
case .firstMatchingPositionInSubject: return .firstMatchingPositionInSubject(isInverted)
case .textSegmentBoundary: return .textSegmentBoundary(isInverted)
case .startOfLine: return .startOfLine(isInverted)
case .endOfLine: return .endOfLine(isInverted)
case .wordBoundary: return .wordBoundary(isInverted)
case .startOfSubject:
// FIXME: Inverted?
return .startOfSubject
case .endOfSubjectBeforeNewline:
// FIXME: Inverted?
return .endOfSubjectBeforeNewline
case .endOfSubject:
// FIXME: Inverted?
return .endOfSubject
case .firstMatchingPositionInSubject:
// FIXME: Inverted?
return .firstMatchingPositionInSubject
case .textSegmentBoundary:
return isInverted ? .notTextSegment : .textSegment
case .startOfLine:
// FIXME: Inverted?
return .caretAnchor
case .endOfLine:
// FIXME: Inverted?
return .dollarAnchor
case .wordBoundary:
return isInverted ? .notWordBoundary : .wordBoundary
}
}

Expand Down
61 changes: 0 additions & 61 deletions Sources/_RegexParser/Regex/AST/Atom.swift
Original file line number Diff line number Diff line change
Expand Up @@ -511,67 +511,6 @@ extension AST.Atom.CharacterProperty {
}
}

extension AST.Atom {
/// Anchors and other built-in zero-width assertions.
public enum AssertionKind: String, Hashable {
/// \A
case startOfSubject = #"\A"#

/// \Z
case endOfSubjectBeforeNewline = #"\Z"#

/// \z
case endOfSubject = #"\z"#

/// \K
case resetStartOfMatch = #"\K"#

/// \G
case firstMatchingPositionInSubject = #"\G"#

/// \y
case textSegment = #"\y"#

/// \Y
case notTextSegment = #"\Y"#

/// ^
case caretAnchor = #"^"#

/// $
case dollarAnchor = #"$"#

/// \b (from outside a custom character class)
case wordBoundary = #"\b"#

/// \B
case notWordBoundary = #"\B"#

}

public var assertionKind: AssertionKind? {
switch kind {
case .caretAnchor: return .caretAnchor
case .dollarAnchor: return .dollarAnchor

case .escaped(.wordBoundary): return .wordBoundary
case .escaped(.notWordBoundary): return .notWordBoundary
case .escaped(.startOfSubject): return .startOfSubject
case .escaped(.endOfSubject): return .endOfSubject
case .escaped(.textSegment): return .textSegment
case .escaped(.notTextSegment): return .notTextSegment
case .escaped(.endOfSubjectBeforeNewline):
return .endOfSubjectBeforeNewline
case .escaped(.firstMatchingPositionInSubject):
return .firstMatchingPositionInSubject

case .escaped(.resetStartOfMatch): return .resetStartOfMatch

default: return nil
}
}
}

extension AST.Atom {
public enum Callout: Hashable {
/// A PCRE callout written `(?C...)`
Expand Down
7 changes: 4 additions & 3 deletions Sources/_RegexParser/Regex/Printing/PrintAsCanonical.swift
Original file line number Diff line number Diff line change
Expand Up @@ -237,9 +237,6 @@ extension AST.Atom.Number {

extension AST.Atom {
var _canonicalBase: String {
if let anchor = self.assertionKind {
return anchor.rawValue
}
if let lit = self.literalStringValue {
// FIXME: We may have to re-introduce escapes
// For example, `\.` will come back as "." instead
Expand All @@ -248,6 +245,10 @@ extension AST.Atom {
return lit
}
switch self.kind {
case .caretAnchor:
return "^"
case .dollarAnchor:
return "$"
case .escaped(let e):
return "\\\(e.character)"
case .backreference(let br):
Expand Down
4 changes: 2 additions & 2 deletions Sources/_StringProcessing/ByteCodeGen.swift
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ fileprivate extension Compiler.ByteCodeGen {
}

case let .assertion(kind):
try emitAssertion(kind.ast)
try emitAssertion(kind)

case let .backreference(ref):
try emitBackreference(ref.ast)
Expand Down Expand Up @@ -146,7 +146,7 @@ fileprivate extension Compiler.ByteCodeGen {
}

mutating func emitAssertion(
_ kind: AST.Atom.AssertionKind
_ kind: DSLTree.Atom.Assertion
) throws {
// FIXME: Depends on API model we have... We may want to
// think through some of these with API interactions in mind
Expand Down
6 changes: 3 additions & 3 deletions Sources/_StringProcessing/PrintAsPattern.swift
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ extension StringLiteralBuilder: CustomStringConvertible {
var description: String { result }
}

extension AST.Atom.AssertionKind {
extension DSLTree.Atom.Assertion {
// TODO: Some way to integrate this with conversion...
var _patternBase: String {
switch self {
Expand Down Expand Up @@ -835,7 +835,7 @@ extension AST.Atom {
///
/// TODO: Some way to integrate this with conversion...
var _patternBase: (String, canBeWrapped: Bool) {
if let anchor = self.assertionKind {
if let anchor = self.dslAssertionKind {
return (anchor._patternBase, false)
}

Expand Down Expand Up @@ -1148,7 +1148,7 @@ extension DSLTree.Atom {
}

case .assertion(let a):
return (a.ast._patternBase, false)
return (a._patternBase, false)

case .backreference(_):
return ("/* TOOD: backreferences */", false)
Expand Down
32 changes: 30 additions & 2 deletions Sources/_StringProcessing/Regex/ASTConversion.swift
Original file line number Diff line number Diff line change
Expand Up @@ -208,10 +208,38 @@ extension AST.CustomCharacterClass {
}
}

extension AST.Atom.EscapedBuiltin {
var dslAssertionKind: DSLTree.Atom.Assertion? {
switch self {
case .wordBoundary: return .wordBoundary
case .notWordBoundary: return .notWordBoundary
case .startOfSubject: return .startOfSubject
case .endOfSubject: return .endOfSubject
case .textSegment: return .textSegment
case .notTextSegment: return .notTextSegment
case .endOfSubjectBeforeNewline: return .endOfSubjectBeforeNewline
case .firstMatchingPositionInSubject: return .firstMatchingPositionInSubject
case .resetStartOfMatch: return .resetStartOfMatch
default: return nil
}
}
}

extension AST.Atom {
var dslAssertionKind: DSLTree.Atom.Assertion? {
switch kind {
case .caretAnchor: return .caretAnchor
case .dollarAnchor: return .dollarAnchor
case .escaped(let b): return b.dslAssertionKind
default: return nil
}
}
}

extension AST.Atom {
var dslTreeAtom: DSLTree.Atom {
if let kind = assertionKind {
return .assertion(.init(ast: kind))
if let kind = dslAssertionKind {
return .assertion(kind)
}

switch self.kind {
Expand Down
74 changes: 39 additions & 35 deletions Sources/_StringProcessing/Regex/DSLTree.swift
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ extension DSLTree {
/// newlines unless single line mode is enabled.
case dot

case assertion(_AST.AssertionKind)
case assertion(Assertion)
case backreference(_AST.Reference)
case symbolicReference(ReferenceID)

Expand All @@ -183,6 +183,44 @@ extension DSLTree {
}
}

extension DSLTree.Atom {
@_spi(RegexBuilder)
public enum Assertion: Hashable {
/// \A
case startOfSubject

/// \Z
case endOfSubjectBeforeNewline

/// \z
case endOfSubject

/// \K
case resetStartOfMatch

/// \G
case firstMatchingPositionInSubject

/// \y
case textSegment

/// \Y
case notTextSegment

/// ^
case caretAnchor

/// $
case dollarAnchor

/// \b (from outside a custom character class)
case wordBoundary

/// \B
case notWordBoundary
}
}

extension Unicode.GeneralCategory {
var extendedGeneralCategory: Unicode.ExtendedGeneralCategory? {
switch self {
Expand Down Expand Up @@ -699,40 +737,6 @@ extension DSLTree {
internal var ast: AST.AbsentFunction
}

@_spi(RegexBuilder)
public struct AssertionKind {
internal var ast: AST.Atom.AssertionKind

public static func startOfSubject(_ inverted: Bool = false) -> Self {
.init(ast: .startOfSubject)
}
public static func endOfSubjectBeforeNewline(_ inverted: Bool = false) -> Self {
.init(ast: .endOfSubjectBeforeNewline)
}
public static func endOfSubject(_ inverted: Bool = false) -> Self {
.init(ast: .endOfSubject)
}
public static func firstMatchingPositionInSubject(_ inverted: Bool = false) -> Self {
.init(ast: .firstMatchingPositionInSubject)
}
public static func textSegmentBoundary(_ inverted: Bool = false) -> Self {
inverted
? .init(ast: .notTextSegment)
: .init(ast: .textSegment)
}
public static func startOfLine(_ inverted: Bool = false) -> Self {
.init(ast: .caretAnchor)
}
public static func endOfLine(_ inverted: Bool = false) -> Self {
.init(ast: .dollarAnchor)
}
public static func wordBoundary(_ inverted: Bool = false) -> Self {
inverted
? .init(ast: .notWordBoundary)
: .init(ast: .wordBoundary)
}
}

@_spi(RegexBuilder)
public struct Reference {
internal var ast: AST.Reference
Expand Down
2 changes: 1 addition & 1 deletion Sources/_StringProcessing/Utility/RegexFactory.swift
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public struct _RegexFactory {
@_spi(RegexBuilder)
@available(SwiftStdlib 5.7, *)
public func assertion<Output>(
_ kind: DSLTree._AST.AssertionKind
_ kind: DSLTree.Atom.Assertion
) -> Regex<Output> {
.init(node: .atom(.assertion(kind)))
}
Expand Down