Skip to content

[Integration] main (96fb215) -> swift/main #589

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 25 commits into from
Jul 20, 2022
Merged
Changes from 2 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
9212b43
Validate optimizations when a match fails
hamishknight Jul 7, 2022
33856e7
Merge pull request #559 from hamishknight/validate-test
hamishknight Jul 7, 2022
33acdeb
Break out of quantification loop if there is no forward progress (#560)
rctcwyvrn Jul 11, 2022
7752047
Optimize matching to match on scalar values when possible (#525)
rctcwyvrn Jul 12, 2022
8f93498
Rip out unused _CharacterClassModel API
hamishknight Jul 14, 2022
297a69d
Remove _CharacterClassModel conformance to RegexComponent
hamishknight Jul 14, 2022
7d5e86d
Internalize `_CharacterClassModel`
hamishknight Jul 14, 2022
99e5e51
Merge pull request #578 from hamishknight/internalize-character-model
hamishknight Jul 14, 2022
d5010fb
Fix `CharacterClass.newlineSequence`
hamishknight Jul 14, 2022
446bfd4
Rename `any` -> `dot`
hamishknight Jul 14, 2022
efe90d1
Re-introduce `DSLTree.Atom.any`
hamishknight Jul 14, 2022
8f8c7d0
Fix `CharacterClass.any`
hamishknight Jul 14, 2022
657351e
Rename `startOfLine`/`endOfLine` -> `caretAnchor`/`dollarAnchor`
hamishknight Jul 14, 2022
21ca2fb
Move AssertionKind onto the DSL
hamishknight Jul 14, 2022
210bfa3
Fix `Anchor.startOfLine` and `Anchor.endOfLine`
hamishknight Jul 14, 2022
f111a57
Add some tests for `CharacterClass.anyGraphemeCluster`
hamishknight Jul 14, 2022
9a545a0
Add some tests for `CharacterClass.horizontalWhitespace`
hamishknight Jul 14, 2022
9456c54
Implement `CharacterClass.anyNonNewline`
hamishknight Jul 14, 2022
9bcb72f
Rename various APIs
Azoy Jul 13, 2022
33566dc
Merge pull request #575 from Azoy/various-tidbits
Azoy Jul 14, 2022
9f1f309
Move options from RegexComponent to Regex
Azoy Jul 13, 2022
991d90c
Merge pull request #576 from Azoy/options-regex
Azoy Jul 14, 2022
1f2ae04
Merge pull request #580 from hamishknight/character-work
hamishknight Jul 15, 2022
96fb215
Benchmarker improvements and more benchmarks (#581)
rctcwyvrn Jul 15, 2022
3a2a785
Merge branch 'main' into main-merge
hamishknight Jul 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 42 additions & 40 deletions Tests/RegexTests/MatchTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,33 @@ func _firstMatch(
input: String,
validateOptimizations: Bool,
syntax: SyntaxOptions = .traditional
) throws -> (String, [String?]) {
) throws -> (String, [String?])? {
var regex = try Regex(regexStr, syntax: syntax)
guard let result = try regex.firstMatch(in: input) else {
throw MatchError("match not found for \(regexStr) in \(input)")
}
let caps = result.output.slices(from: input)

let result = try regex.firstMatch(in: input)

if validateOptimizations {
regex._setCompilerOptionsForTesting(.disableOptimizations)
guard let unoptResult = try regex.firstMatch(in: input) else {
let unoptResult = try regex.firstMatch(in: input)
if result != nil && unoptResult == nil {
throw MatchError("match not found for unoptimized \(regexStr) in \(input)")
}
XCTAssertEqual(
String(input[result.range]),
String(input[unoptResult.range]),
"Unoptimized regex returned a different result")
if result == nil && unoptResult != nil {
throw MatchError("match not found in optimized \(regexStr) in \(input)")
}
if let result = result, let unoptResult = unoptResult {
let optMatch = String(input[result.range])
let unoptMatch = String(input[unoptResult.range])
if optMatch != unoptMatch {
throw MatchError("""

Unoptimized regex returned: '\(unoptMatch)'
Optimized regex returned: '\(optMatch)'
""")
}
}
}
guard let result = result else { return nil }
let caps = result.output.slices(from: input)
return (String(input[result.range]), caps.map { $0.map(String.init) })
}

Expand Down Expand Up @@ -147,21 +157,19 @@ func firstMatchTest(
line: UInt = #line
) {
do {
let (found, _) = try _firstMatch(
let found = try _firstMatch(
regex,
input: input,
validateOptimizations: validateOptimizations,
syntax: syntax)
syntax: syntax)?.0

if xfail {
XCTAssertNotEqual(found, match, file: file, line: line)
} else {
XCTAssertEqual(found, match, file: file, line: line)
}
} catch {
// FIXME: This allows non-matches to succeed even when xfail'd
// When xfail == true, this should report failure for match == nil
if !xfail && match != nil {
if !xfail {
XCTFail("\(error)", file: file, line: line)
}
return
Expand Down Expand Up @@ -421,8 +429,7 @@ extension RegexTests {
"a++a",
("babc", nil),
("baaabc", nil),
("bb", nil),
xfail: true)
("bb", nil))
firstMatchTests(
"a+?a",
("babc", nil),
Expand Down Expand Up @@ -498,23 +505,19 @@ extension RegexTests {
("baabc", nil),
("bb", nil))

// XFAIL'd versions of the above
firstMatchTests(
"a{2,4}+a",
("baaabc", nil),
xfail: true)
("baaabc", nil))
firstMatchTests(
"a{,4}+a",
("babc", nil),
("baabc", nil),
("baaabc", nil),
xfail: true)
("baaabc", nil))
firstMatchTests(
"a{2,}+a",
("baaabc", nil),
("baaaaabc", nil),
("baaaaaaaabc", nil),
xfail: true)
("baaaaaaaabc", nil))

// XFAIL'd possessive tests
firstMatchTests(
Expand Down Expand Up @@ -709,6 +712,11 @@ extension RegexTests {
}
firstMatchTest(#"[\t-\t]"#, input: "\u{8}\u{A}\u{9}", match: "\u{9}")

// FIXME: This produces a different result with and without optimizations.
firstMatchTest(#"[1-2]"#, input: "1️⃣", match: nil, xfail: true)
firstMatchTest(#"[1-2]"#, input: "1️⃣", match: nil,
validateOptimizations: false)

// Currently not supported in the matching engine.
for c: UnicodeScalar in ["a", "b", "c"] {
firstMatchTest(#"[\c!-\C-#]"#, input: "def\(c)", match: "\(c)",
Expand Down Expand Up @@ -1054,8 +1062,8 @@ extension RegexTests {
// TODO: Oniguruma \y and \Y
firstMatchTests(
#"\u{65}"#, // Scalar 'e' is present in both
("Cafe\u{301}", nil), // but scalar mode requires boundary at end of match
xfail: true)
("Cafe\u{301}", nil)) // but scalar mode requires boundary at end of match

firstMatchTests(
#"\u{65}"#, // Scalar 'e' is present in both
("Sol Cafe", "e")) // standalone is okay
Expand Down Expand Up @@ -1647,19 +1655,15 @@ extension RegexTests {
firstMatchTest(#"\u{65 301}$"#, input: eComposed, match: eComposed)

// FIXME: Implicit \y at end of match
firstMatchTest(#"\u{65}"#, input: eDecomposed, match: nil,
xfail: true)
firstMatchTest(#"\u{65}"#, input: eDecomposed, match: nil)
firstMatchTest(#"\u{65}$"#, input: eDecomposed, match: nil)
// FIXME: \y is unsupported
firstMatchTest(#"\u{65}\y"#, input: eDecomposed, match: nil,
xfail: true)
firstMatchTest(#"\u{65}\y"#, input: eDecomposed, match: nil)

// FIXME: Unicode scalars are only matched at the start of a grapheme cluster
firstMatchTest(#"\u{301}"#, input: eDecomposed, match: "\u{301}",
xfail: true)
// FIXME: \y is unsupported
firstMatchTest(#"\y\u{301}"#, input: eDecomposed, match: nil,
xfail: true)

firstMatchTest(#"\y\u{301}"#, input: eDecomposed, match: nil)
}

func testCanonicalEquivalence() throws {
Expand Down Expand Up @@ -1717,13 +1721,11 @@ extension RegexTests {
// \s
firstMatchTest(#"\s"#, input: " ", match: " ")
// FIXME: \s shouldn't match a number composed with a non-number character
firstMatchTest(#"\s\u{305}"#, input: " ", match: nil,
xfail: true)
firstMatchTest(#"\s\u{305}"#, input: " ", match: nil)
// \p{Whitespace}
firstMatchTest(#"\s"#, input: " ", match: " ")
// FIXME: \p{Whitespace} shouldn't match whitespace composed with a non-whitespace character
firstMatchTest(#"\s\u{305}"#, input: " ", match: nil,
xfail: true)
// \p{Whitespace} shouldn't match whitespace composed with a non-whitespace character
firstMatchTest(#"\s\u{305}"#, input: " ", match: nil)
}

func testCanonicalEquivalenceCustomCharacterClass() throws {
Expand Down