Skip to content

Commit 4706e09

Browse files
committed
Search by unicode scalar when in that mode
Previously, searching via firstMatch or matches(of:) would only _start_ searches at a character index, even when a regex has Unicode scalar semantics.
1 parent a8bea8d commit 4706e09

File tree

2 files changed

+15
-1
lines changed

2 files changed

+15
-1
lines changed

Sources/_StringProcessing/Regex/Match.swift

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,11 @@ extension Regex {
158158
if let m = try _match(input, in: low..<high, mode: .partialFromFront) {
159159
return m
160160
}
161-
input.formIndex(after: &low)
161+
if regex.program.loweredProgram.initialOptions.semanticLevel == .graphemeCluster {
162+
input.formIndex(after: &low)
163+
} else {
164+
input.unicodeScalars.formIndex(after: &low)
165+
}
162166
}
163167
return nil
164168
}

Tests/RegexTests/MatchTests.swift

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1612,5 +1612,15 @@ extension RegexTests {
16121612

16131613
// TODO: Add test for grapheme boundaries at start/end of match
16141614

1615+
func testCase() {
1616+
let regex = try! Regex(#".\N{SPARKLING HEART}."#)
1617+
let input = "🧟‍♀️💖🧠 or 🧠💖☕️"
1618+
let characterMatches = input.matches(of: regex)
1619+
XCTAssertEqual(characterMatches.map { $0.0 }, ["🧟‍♀️💖🧠", "🧠💖☕️"])
1620+
1621+
let scalarMatches = input.matches(of: regex.matchingSemantics(.unicodeScalar))
1622+
let scalarExpected: [Substring] = ["\u{FE0F}💖🧠", "🧠💖☕"]
1623+
XCTAssertEqual(scalarMatches.map { $0.0 }, scalarExpected)
1624+
}
16151625
}
16161626

0 commit comments

Comments
 (0)