Skip to content

[Integration] main (f779459) -> swift/main #400

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 29 commits into from
May 11, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
e748aea
Add NegativeLookahead and Anchor comments (#372)
natecook1000 May 2, 2022
13342eb
Add matching support for `\p{Lc}`
hamishknight May 3, 2022
925f51b
Add parser support for `\p{L&}`
hamishknight May 3, 2022
ade8f01
Merge pull request #373 from hamishknight/case-in-prop
hamishknight May 3, 2022
c44efeb
Update ProposalOverview.md
milseman May 3, 2022
9801855
Add tests for AnyRegexOutput (#371)
milseman May 3, 2022
0e5cfa8
Rename noAutoCapture -> namedCapturesOnly
hamishknight May 4, 2022
2a4b3a6
Implement the `(?n)` option
hamishknight May 4, 2022
f22cb4f
Merge pull request #377 from hamishknight/named-captures-only
hamishknight May 4, 2022
6d833aa
Improve Unicode/UTS18 and semantic level support (#268)
natecook1000 May 5, 2022
09a385b
Support Unicode scalar names in `\p{name=...}` (#382)
natecook1000 May 6, 2022
39c0ed5
Modify DSL test to test for uncaptured backreference (#355)
natecook1000 May 6, 2022
9740416
Introduce ASTStage parameter to `parse`
hamishknight May 9, 2022
4b31736
Implement semantic diagnostics
hamishknight May 9, 2022
466b375
Validate capture lists
hamishknight May 9, 2022
c95e862
Address review feedback
hamishknight May 9, 2022
7f068dc
Merge pull request #379 from hamishknight/sema
hamishknight May 9, 2022
c16e389
Implement \R, \v, \h for character/scalar modes (#384)
natecook1000 May 9, 2022
c13980f
De-deprecate MatchingOptions.matchLevel (#390)
natecook1000 May 9, 2022
61965c3
Restrict character property fuzzy matching to "pattern whitespace"
hamishknight May 10, 2022
05e610a
Improve the wording of a diagnostic
hamishknight May 10, 2022
7752015
Introduce AST.Atom.Scalar
hamishknight May 10, 2022
f436cca
Introduce scalar sequences `\u{AA BB CC}`
hamishknight May 10, 2022
0597164
Fix invalid indexing
hamishknight May 10, 2022
0872d16
Fix source location tracking in `lexUntil`
hamishknight May 10, 2022
5b30c5b
Merge pull request #386 from hamishknight/multiscalar
hamishknight May 10, 2022
b209e4f
Tidy up build flags and fix implicit import circular dependency (#392)
rxwei May 10, 2022
f779459
Catch more unquantifiable elements (#391)
natecook1000 May 10, 2022
f37fc9b
Merge branch 'main' into main-merge
hamishknight May 11, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Address review feedback
- Make `\h` and `\H` supported for now
- Check character class ranges
- Diagnose unquantifiable escape sequences
  • Loading branch information
hamishknight committed May 9, 2022
commit c95e8621dc9bfd3aadde0867ed7646b9335ec9a1
19 changes: 19 additions & 0 deletions Sources/_RegexParser/Regex/AST/Atom.swift
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,23 @@ extension AST.Atom.EscapedBuiltin {
return nil
}
}

public var isQuantifiable: Bool {
switch self {
case .alarm, .escape, .formfeed, .newline, .carriageReturn, .tab,
.singleDataUnit, .decimalDigit, .notDecimalDigit, .horizontalWhitespace,
.notHorizontalWhitespace, .notNewline, .newlineSequence, .whitespace,
.notWhitespace, .verticalTab, .notVerticalTab, .wordCharacter,
.notWordCharacter, .backspace, .graphemeCluster, .trueAnychar:
return true

case .wordBoundary, .notWordBoundary, .startOfSubject,
.endOfSubjectBeforeNewline, .endOfSubject,
.firstMatchingPositionInSubject, .resetStartOfMatch, .textSegment,
.notTextSegment:
return false
}
}
}

extension AST.Atom {
Expand Down Expand Up @@ -749,6 +766,8 @@ extension AST.Atom {
case .changeMatchingOptions:
return false
// TODO: Are callouts quantifiable?
case .escaped(let esc):
return esc.isQuantifiable
default:
return true
}
Expand Down
9 changes: 6 additions & 3 deletions Sources/_RegexParser/Regex/Parse/Diagnostics.swift
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ enum ParseError: Error, Hashable {

case cannotReferToWholePattern

case notQuantifiable
case quantifierRequiresOperand(String)

case backtrackingDirectiveMustHaveName(String)
Expand Down Expand Up @@ -83,6 +82,8 @@ enum ParseError: Error, Hashable {
case duplicateNamedCapture(String)
case invalidCharacterClassRangeOperand
case invalidQuantifierRange(Int, Int)
case invalidCharacterRange(from: Character, to: Character)
case notQuantifiable
}

extension IdentifierKind {
Expand Down Expand Up @@ -125,8 +126,6 @@ extension ParseError: CustomStringConvertible {
return "invalid escape sequence '\\\(c)'"
case .cannotReferToWholePattern:
return "cannot refer to whole pattern here"
case .notQuantifiable:
return "expression is not quantifiable"
case .quantifierRequiresOperand(let q):
return "quantifier '\(q)' must appear after expression"
case .backtrackingDirectiveMustHaveName(let b):
Expand Down Expand Up @@ -191,6 +190,10 @@ extension ParseError: CustomStringConvertible {
return "group named '\(str)' already exists"
case let .invalidQuantifierRange(lhs, rhs):
return "range lower bound '\(lhs)' must be less than or equal to upper bound '\(rhs)'"
case let .invalidCharacterRange(from: lhs, to: rhs):
return "character '\(lhs)' must compare less than or equal to '\(rhs)'"
case .notQuantifiable:
return "expression is not quantifiable"
}
}
}
Expand Down
3 changes: 0 additions & 3 deletions Sources/_RegexParser/Regex/Parse/Parse.swift
Original file line number Diff line number Diff line change
Expand Up @@ -227,9 +227,6 @@ extension Parser {
if let (amt, kind, trivia) =
try source.lexQuantifier(context: context) {
let location = loc(_start)
guard operand.isQuantifiable else {
throw Source.LocatedError(ParseError.notQuantifiable, location)
}
result.append(.quantification(
.init(amt, kind, operand, location, trivia: trivia)))
} else {
Expand Down
21 changes: 12 additions & 9 deletions Sources/_RegexParser/Regex/Parse/Sema.swift
Original file line number Diff line number Diff line change
Expand Up @@ -182,17 +182,15 @@ extension RegexValidator {
_ esc: AST.Atom.EscapedBuiltin, at loc: SourceLocation
) throws {
switch esc {
case .resetStartOfMatch, .singleDataUnit, .horizontalWhitespace,
.notHorizontalWhitespace, .verticalTab, .notVerticalTab,
case .resetStartOfMatch, .singleDataUnit, .verticalTab, .notVerticalTab,
// '\N' needs to be emitted using 'emitAny'.
.notNewline:
throw error(.unsupported("'\\\(esc.character)'"), at: loc)

// Character classes.
case .decimalDigit, .notDecimalDigit, .whitespace, .notWhitespace,
.wordCharacter, .notWordCharacter, .graphemeCluster, .trueAnychar:
// TODO: What about scalar matching mode for .graphemeCluster? We
// currently crash at runtime.
.wordCharacter, .notWordCharacter, .graphemeCluster, .trueAnychar,
.horizontalWhitespace, .notHorizontalWhitespace:
break

case .newlineSequence:
Expand Down Expand Up @@ -271,18 +269,20 @@ extension RegexValidator {
throw error(.invalidCharacterClassRangeOperand, at: rhs.location)
}

guard lhs.literalCharacterValue != nil else {
guard let lhsChar = lhs.literalCharacterValue else {
throw error(
.unsupported("character class range operand"), at: lhs.location)
}

guard rhs.literalCharacterValue != nil else {
guard let rhsChar = rhs.literalCharacterValue else {
throw error(
.unsupported("character class range operand"), at: rhs.location)
}

// TODO: Validate lhs <= rhs? That may require knowledge of case
// insensitivity though.
guard lhsChar <= rhsChar else {
throw error(
.invalidCharacterRange(from: lhsChar, to: rhsChar), at: range.dashLoc)
}
}

func validateCharacterClassMember(
Expand Down Expand Up @@ -341,6 +341,9 @@ extension RegexValidator {

func validateQuantification(_ quant: AST.Quantification) throws {
try validateNode(quant.child)
guard quant.child.isQuantifiable else {
throw error(.notQuantifiable, at: quant.child.location)
}
switch quant.amount.value {
case .range(let lhs, let rhs):
guard lhs.value <= rhs.value else {
Expand Down
32 changes: 25 additions & 7 deletions Tests/RegexTests/ParseTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,8 @@ extension RegexTests {
parseTest("[-a-]", charClass("-", "a", "-"))

parseTest("[a-z]", charClass(range_m("a", "z")))
parseTest("[a-a]", charClass(range_m("a", "a")))
parseTest("[B-a]", charClass(range_m("B", "a")))

// FIXME: AST builder helpers for custom char class types
parseTest("[a-d--a-c]", charClass(
Expand Down Expand Up @@ -2442,6 +2444,11 @@ extension RegexTests {

diagnosticTest(#"|([\d-c])?"#, .invalidCharacterClassRangeOperand)

diagnosticTest(#"[_-A]"#, .invalidCharacterRange(from: "_", to: "A"))
diagnosticTest(#"(?i)[_-A]"#, .invalidCharacterRange(from: "_", to: "A"))
diagnosticTest(#"[c-b]"#, .invalidCharacterRange(from: "c", to: "b"))
diagnosticTest(#"[\u{66}-\u{65}]"#, .invalidCharacterRange(from: "\u{66}", to: "\u{65}"))

// MARK: Bad escapes

diagnosticTest("\\", .expectedEscape)
Expand Down Expand Up @@ -2555,6 +2562,17 @@ extension RegexTests {
diagnosticTest("{1,3}", .quantifierRequiresOperand("{1,3}"))
diagnosticTest("a{3,2}", .invalidQuantifierRange(3, 2))

// These are not quantifiable.
diagnosticTest(#"\b?"#, .notQuantifiable)
diagnosticTest(#"\B*"#, .notQuantifiable)
diagnosticTest(#"\A+"#, .notQuantifiable)
diagnosticTest(#"\Z??"#, .notQuantifiable)
diagnosticTest(#"\G*?"#, .notQuantifiable)
diagnosticTest(#"\z+?"#, .notQuantifiable)
diagnosticTest(#"\K{1}"#, .unsupported(#"'\K'"#))
diagnosticTest(#"\y{2,5}"#, .notQuantifiable)
diagnosticTest(#"\Y{3,}"#, .notQuantifiable)

// MARK: Unicode scalars

diagnosticTest(#"\u{G}"#, .expectedNumber("G", kind: .hex))
Expand Down Expand Up @@ -2641,13 +2659,13 @@ extension RegexTests {

diagnosticTest("(*MARK)", .backtrackingDirectiveMustHaveName("MARK"))
diagnosticTest("(*:)", .expectedNonEmptyContents)
diagnosticTest("(*MARK:a)?", .notQuantifiable)
diagnosticTest("(*FAIL)+", .notQuantifiable)
diagnosticTest("(*COMMIT:b)*", .notQuantifiable)
diagnosticTest("(*PRUNE:a)??", .notQuantifiable)
diagnosticTest("(*SKIP:a)*?", .notQuantifiable)
diagnosticTest("(*F)+?", .notQuantifiable)
diagnosticTest("(*:a){2}", .notQuantifiable)
diagnosticTest("(*MARK:a)?", .unsupported("backtracking directive"))
diagnosticTest("(*FAIL)+", .unsupported("backtracking directive"))
diagnosticTest("(*COMMIT:b)*", .unsupported("backtracking directive"))
diagnosticTest("(*PRUNE:a)??", .unsupported("backtracking directive"))
diagnosticTest("(*SKIP:a)*?", .unsupported("backtracking directive"))
diagnosticTest("(*F)+?", .unsupported("backtracking directive"))
diagnosticTest("(*:a){2}", .unsupported("backtracking directive"))

// MARK: Oniguruma absent functions

Expand Down