Skip to content

Overhaul quantification optimizations #716

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Feb 7, 2024
Merged
Prev Previous commit
Next Next commit
wip
  • Loading branch information
milseman committed Dec 14, 2023
commit 2d7a3914acd744e64ed65929a6b6fba985b815a4
339 changes: 302 additions & 37 deletions Sources/_StringProcessing/Engine/MEQuantify.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,49 +9,120 @@ extension Processor {
let produceSavePointRange = payload.quantKind == .eager
let isScalarSemantics = payload.isScalarSemantics

let isZeroOrMore = payload.minTrips == 0 && payload.maxExtraTrips == nil
let isOneOrMore = payload.minTrips == 1 && payload.maxExtraTrips == nil

let matchResult: (next: String.Index, savePointRange: Range<Position>?)?
switch payload.type {
case .asciiBitset:
matchResult = input.matchQuantifiedASCIIBitset(
registers[payload.bitset],
at: currentPosition,
limitedBy: end,
minMatches: minMatches,
maxMatches: maxMatches,
produceSavePointRange: produceSavePointRange,
isScalarSemantics: isScalarSemantics)
if isZeroOrMore {
matchResult = input.matchZeroOrMoreASCIIBitset(
registers[payload.bitset],
at: currentPosition,
limitedBy: end,
produceSavePointRange: produceSavePointRange,
isScalarSemantics: isScalarSemantics)
} else if isOneOrMore {
matchResult = input.matchOneOrMoreASCIIBitset(
registers[payload.bitset],
at: currentPosition,
limitedBy: end,
produceSavePointRange: produceSavePointRange,
isScalarSemantics: isScalarSemantics)
} else {
matchResult = input.matchQuantifiedASCIIBitset(
registers[payload.bitset],
at: currentPosition,
limitedBy: end,
minMatches: minMatches,
maxMatches: maxMatches,
produceSavePointRange: produceSavePointRange,
isScalarSemantics: isScalarSemantics)
}

case .asciiChar:
matchResult = input.matchQuantifiedScalar(
Unicode.Scalar(payload.asciiChar),
at: currentPosition,
limitedBy: end,
minMatches: minMatches,
maxMatches: maxMatches,
produceSavePointRange: produceSavePointRange,
isScalarSemantics: isScalarSemantics)
if isZeroOrMore {
matchResult = input.matchZeroOrMoreScalar(
Unicode.Scalar(payload.asciiChar),
at: currentPosition,
limitedBy: end,
produceSavePointRange: produceSavePointRange,
isScalarSemantics: isScalarSemantics)
} else if isOneOrMore {
matchResult = input.matchOneOrMoreScalar(
Unicode.Scalar(payload.asciiChar),
at: currentPosition,
limitedBy: end,
produceSavePointRange: produceSavePointRange,
isScalarSemantics: isScalarSemantics)
} else {
matchResult = input.matchQuantifiedScalar(
Unicode.Scalar(payload.asciiChar),
at: currentPosition,
limitedBy: end,
minMatches: minMatches,
maxMatches: maxMatches,
produceSavePointRange: produceSavePointRange,
isScalarSemantics: isScalarSemantics)
}

case .any:
matchResult = input.matchQuantifiedRegexDot(
at: currentPosition,
limitedBy: end,
minMatches: minMatches,
maxMatches: maxMatches,
produceSavePointRange: produceSavePointRange,
isScalarSemantics: isScalarSemantics,
anyMatchesNewline: payload.anyMatchesNewline)
if isZeroOrMore {
matchResult = input.matchZeroOrMoreRegexDot(
at: currentPosition,
limitedBy: end,
produceSavePointRange: produceSavePointRange,
anyMatchesNewline: payload.anyMatchesNewline,
isScalarSemantics: isScalarSemantics)
} else if isOneOrMore {
matchResult = input.matchOneOrMoreRegexDot(
at: currentPosition,
limitedBy: end,
produceSavePointRange: produceSavePointRange,
anyMatchesNewline: payload.anyMatchesNewline,
isScalarSemantics: isScalarSemantics)
} else {
matchResult = input.matchQuantifiedRegexDot(
at: currentPosition,
limitedBy: end,
minMatches: minMatches,
maxMatches: maxMatches,
produceSavePointRange: produceSavePointRange,
anyMatchesNewline: payload.anyMatchesNewline,
isScalarSemantics: isScalarSemantics)
}

case .builtin:
matchResult = input.matchQuantifiedBuiltinCC(
payload.builtin,
at: currentPosition,
limitedBy: end,
minMatches: minMatches,
maxMatches: maxMatches,
produceSavePointRange: produceSavePointRange,
isInverted: payload.builtinIsInverted,
isStrictASCII: payload.builtinIsStrict,
isScalarSemantics: isScalarSemantics)
if isZeroOrMore {
matchResult = input.matchZeroOrMoreBuiltinCC(
payload.builtin,
at: currentPosition,
limitedBy: end,
produceSavePointRange: produceSavePointRange,
isInverted: payload.builtinIsInverted,
isStrictASCII: payload.builtinIsStrict,
isScalarSemantics: isScalarSemantics)
} else if isOneOrMore {
matchResult = input.matchOneOrMoreBuiltinCC(
payload.builtin,
at: currentPosition,
limitedBy: end,
produceSavePointRange: produceSavePointRange,
isInverted: payload.builtinIsInverted,
isStrictASCII: payload.builtinIsStrict,
isScalarSemantics: isScalarSemantics)
} else {
matchResult = input.matchQuantifiedBuiltinCC(
payload.builtin,
at: currentPosition,
limitedBy: end,
minMatches: minMatches,
maxMatches: maxMatches,
produceSavePointRange: produceSavePointRange,
isInverted: payload.builtinIsInverted,
isStrictASCII: payload.builtinIsStrict,
isScalarSemantics: isScalarSemantics)
}
}

guard let (next, savePointRange) = matchResult else {
Expand Down Expand Up @@ -121,7 +192,55 @@ extension String {
// position, because newline-sequence in scalar semantic mode still
// matches two scalars

return (currentPosition, rangeStart..<rangeEnd)
return (currentPosition, rangeStart..<rangeEnd)
}

/// NOTE: [Zero|One]OrMore overloads are to specialize the inlined run loop,
/// which has a substantive perf impact (especially for zero-or-more)

fileprivate func matchZeroOrMoreASCIIBitset(
_ asciiBitset: ASCIIBitset,
at currentPosition: Index,
limitedBy end: Index,
produceSavePointRange: Bool,
isScalarSemantics: Bool
) -> (next: Index, savePointRange: Range<Index>?)? {
_runQuantLoop(
at: currentPosition,
limitedBy: end,
minMatches: 0,
maxMatches: UInt64.max,
produceSavePointRange: produceSavePointRange,
isScalarSemantics: isScalarSemantics
) { currentPosition, end, isScalarSemantics in
matchASCIIBitset(
asciiBitset,
at: currentPosition,
limitedBy: end,
isScalarSemantics: isScalarSemantics)
}
}
fileprivate func matchOneOrMoreASCIIBitset(
_ asciiBitset: ASCIIBitset,
at currentPosition: Index,
limitedBy end: Index,
produceSavePointRange: Bool,
isScalarSemantics: Bool
) -> (next: Index, savePointRange: Range<Index>?)? {
_runQuantLoop(
at: currentPosition,
limitedBy: end,
minMatches: 1,
maxMatches: UInt64.max,
produceSavePointRange: produceSavePointRange,
isScalarSemantics: isScalarSemantics
) { currentPosition, end, isScalarSemantics in
matchASCIIBitset(
asciiBitset,
at: currentPosition,
limitedBy: end,
isScalarSemantics: isScalarSemantics)
}
}

fileprivate func matchQuantifiedASCIIBitset(
Expand Down Expand Up @@ -149,6 +268,54 @@ extension String {
}
}

fileprivate func matchZeroOrMoreScalar(
_ scalar: Unicode.Scalar,
at currentPosition: Index,
limitedBy end: Index,
produceSavePointRange: Bool,
isScalarSemantics: Bool
) -> (next: Index, savePointRange: Range<Index>?)? {
_runQuantLoop(
at: currentPosition,
limitedBy: end,
minMatches: 0,
maxMatches: UInt64.max,
produceSavePointRange: produceSavePointRange,
isScalarSemantics: isScalarSemantics
) { currentPosition, end, isScalarSemantics in
matchScalar(
scalar,
at: currentPosition,
limitedBy: end,
boundaryCheck: !isScalarSemantics,
isCaseInsensitive: false)
}
}
fileprivate func matchOneOrMoreScalar(
_ scalar: Unicode.Scalar,
at currentPosition: Index,
limitedBy end: Index,
produceSavePointRange: Bool,
isScalarSemantics: Bool
) -> (next: Index, savePointRange: Range<Index>?)? {
_runQuantLoop(
at: currentPosition,
limitedBy: end,
minMatches: 1,
maxMatches: UInt64.max,
produceSavePointRange: produceSavePointRange,
isScalarSemantics: isScalarSemantics
) { currentPosition, end, isScalarSemantics in
matchScalar(
scalar,
at: currentPosition,
limitedBy: end,
boundaryCheck: !isScalarSemantics,
isCaseInsensitive: false)

}
}

fileprivate func matchQuantifiedScalar(
_ scalar: Unicode.Scalar,
at currentPosition: Index,
Expand Down Expand Up @@ -176,6 +343,59 @@ extension String {
}
}

fileprivate func matchZeroOrMoreBuiltinCC(
_ builtinCC: _CharacterClassModel.Representation,
at currentPosition: Index,
limitedBy end: Index,
produceSavePointRange: Bool,
isInverted: Bool,
isStrictASCII: Bool,
isScalarSemantics: Bool
) -> (next: Index, savePointRange: Range<Index>?)? {
_runQuantLoop(
at: currentPosition,
limitedBy: end,
minMatches: 0,
maxMatches: UInt64.max,
produceSavePointRange: produceSavePointRange,
isScalarSemantics: isScalarSemantics
) { currentPosition, end, isScalarSemantics in
matchBuiltinCC(
builtinCC,
at: currentPosition,
limitedBy: end,
isInverted: isInverted,
isStrictASCII: isStrictASCII,
isScalarSemantics: isScalarSemantics)
}
}
fileprivate func matchOneOrMoreBuiltinCC(
_ builtinCC: _CharacterClassModel.Representation,
at currentPosition: Index,
limitedBy end: Index,
produceSavePointRange: Bool,
isInverted: Bool,
isStrictASCII: Bool,
isScalarSemantics: Bool
) -> (next: Index, savePointRange: Range<Index>?)? {
_runQuantLoop(
at: currentPosition,
limitedBy: end,
minMatches: 1,
maxMatches: UInt64.max,
produceSavePointRange: produceSavePointRange,
isScalarSemantics: isScalarSemantics
) { currentPosition, end, isScalarSemantics in
matchBuiltinCC(
builtinCC,
at: currentPosition,
limitedBy: end,
isInverted: isInverted,
isStrictASCII: isStrictASCII,
isScalarSemantics: isScalarSemantics)
}
}

fileprivate func matchQuantifiedBuiltinCC(
_ builtinCC: _CharacterClassModel.Representation,
at currentPosition: Index,
Expand Down Expand Up @@ -205,14 +425,59 @@ extension String {
}
}

fileprivate func matchZeroOrMoreRegexDot(
at currentPosition: Index,
limitedBy end: Index,
produceSavePointRange: Bool,
anyMatchesNewline: Bool,
isScalarSemantics: Bool
) -> (next: Index, savePointRange: Range<Index>?)? {
_runQuantLoop(
at: currentPosition,
limitedBy: end,
minMatches: 0,
maxMatches: UInt64.max,
produceSavePointRange: produceSavePointRange,
isScalarSemantics: isScalarSemantics
) { currentPosition, end, isScalarSemantics in
matchRegexDot(
at: currentPosition,
limitedBy: end,
anyMatchesNewline: anyMatchesNewline,
isScalarSemantics: isScalarSemantics)
}
}
fileprivate func matchOneOrMoreRegexDot(
at currentPosition: Index,
limitedBy end: Index,
produceSavePointRange: Bool,
anyMatchesNewline: Bool,
isScalarSemantics: Bool
) -> (next: Index, savePointRange: Range<Index>?)? {
_runQuantLoop(
at: currentPosition,
limitedBy: end,
minMatches: 1,
maxMatches: UInt64.max,
produceSavePointRange: produceSavePointRange,
isScalarSemantics: isScalarSemantics
) { currentPosition, end, isScalarSemantics in
matchRegexDot(
at: currentPosition,
limitedBy: end,
anyMatchesNewline: anyMatchesNewline,
isScalarSemantics: isScalarSemantics)
}
}

fileprivate func matchQuantifiedRegexDot(
at currentPosition: Index,
limitedBy end: Index,
minMatches: UInt64,
maxMatches: UInt64,
produceSavePointRange: Bool,
isScalarSemantics: Bool,
anyMatchesNewline: Bool
anyMatchesNewline: Bool,
isScalarSemantics: Bool
) -> (next: Index, savePointRange: Range<Index>?)? {
_runQuantLoop(
at: currentPosition,
Expand Down