Skip to content

Commit 3920d3b

Browse files
committed
Suggestions from code review
1 parent 1b93768 commit 3920d3b

File tree

2 files changed

+37
-62
lines changed

2 files changed

+37
-62
lines changed

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexBuilder.cs

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -609,21 +609,6 @@ public bool TryCreateNewTransition(
609609
}
610610
}
611611

612-
/// <summary>Gets or creates a new DFA transition. This version takes and returns state IDs.</summary>
613-
public bool TryCreateNewTransition(
614-
int sourceStateId, int mintermId, int offset, bool checkThreshold, out int nextStateId)
615-
{
616-
Debug.Assert(sourceStateId > 0);
617-
Debug.Assert(_stateArray is not null);
618-
if (TryCreateNewTransition(_stateArray[sourceStateId], mintermId, offset, checkThreshold, out DfaMatchingState<TSet>? nextState))
619-
{
620-
nextStateId = nextState.Id;
621-
return true;
622-
}
623-
nextStateId = -1;
624-
return false;
625-
}
626-
627612
/// <summary>Gets or creates a new NFA transition.</summary>
628613
public int[] CreateNewNfaTransition(int nfaStateId, int mintermId, int nfaOffset)
629614
{

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs

Lines changed: 37 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,13 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan<char> input, i
315315
// the position of the last b: aacaaaabbbc. It additionally records the position of the first a after
316316
// the c as the low boundary for the starting position.
317317
int matchStartLowBoundary, matchStartLengthMarker;
318-
int matchEnd = SpecializedFindEndPosition(input, startat, timeoutOccursAt, mode, out matchStartLowBoundary, out matchStartLengthMarker, perThreadData);
318+
int matchEnd = (_findOpts is null, _pattern._info.ContainsSomeAnchor) switch
319+
{
320+
(false, false) => FindEndPosition<NoOptimizationsInitialStateHandler, NoAnchorsNullabilityHandler>(input, startat, timeoutOccursAt, mode, out matchStartLowBoundary, out matchStartLengthMarker, perThreadData),
321+
(true, false) => FindEndPosition<InitialStateFindOptimizationsHandler, NoAnchorsNullabilityHandler>(input, startat, timeoutOccursAt, mode, out matchStartLowBoundary, out matchStartLengthMarker, perThreadData),
322+
(false, true) => FindEndPosition<NoOptimizationsInitialStateHandler, FullNullabilityHandler>(input, startat, timeoutOccursAt, mode, out matchStartLowBoundary, out matchStartLengthMarker, perThreadData),
323+
(true, true) => FindEndPosition<InitialStateFindOptimizationsHandler, FullNullabilityHandler>(input, startat, timeoutOccursAt, mode, out matchStartLowBoundary, out matchStartLengthMarker, perThreadData),
324+
};
319325

320326
// If there wasn't a match, we're done.
321327
if (matchEnd == NoMatchExists)
@@ -349,8 +355,9 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan<char> input, i
349355
{
350356
Debug.Assert(matchEnd >= startat - 1);
351357
matchStart = matchEnd < startat ?
352-
startat :
353-
SpecializedFindStartPosition(input, matchEnd, matchStartLowBoundary, perThreadData);
358+
startat : _pattern._info.ContainsSomeAnchor ?
359+
FindStartPosition<FullNullabilityHandler>(input, matchEnd, matchStartLowBoundary, perThreadData) :
360+
FindStartPosition<NoAnchorsNullabilityHandler>(input, matchEnd, matchStartLowBoundary, perThreadData);
354361
}
355362

356363
// Phase 3:
@@ -367,25 +374,6 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan<char> input, i
367374
Registers endRegisters = FindSubcaptures(input, matchStart, matchEnd, perThreadData);
368375
return new SymbolicMatch(matchStart, matchEnd - matchStart, endRegisters.CaptureStarts, endRegisters.CaptureEnds);
369376
}
370-
371-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
372-
int SpecializedFindEndPosition(ReadOnlySpan<char> input, int pos, long timeoutOccursAt, RegexRunnerMode mode, out int initialStatePos, out int matchLength, PerThreadData perThreadData) =>
373-
_findOpts is null ?
374-
SpecializedFindEndPosition2<NoOptimizationsInitialStateHandler>(input, pos, timeoutOccursAt, mode, out initialStatePos, out matchLength, perThreadData) :
375-
SpecializedFindEndPosition2<InitialStateFindOptimizationsHandler>(input, pos, timeoutOccursAt, mode, out initialStatePos, out matchLength, perThreadData);
376-
377-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
378-
int SpecializedFindEndPosition2<TFindOptimizationsHandler>(ReadOnlySpan<char> input, int pos, long timeoutOccursAt, RegexRunnerMode mode, out int initialStatePos, out int matchLength, PerThreadData perThreadData)
379-
where TFindOptimizationsHandler : struct, IInitialStateHandler =>
380-
_pattern._info.ContainsSomeAnchor ?
381-
FindEndPosition<TFindOptimizationsHandler, FullNullabilityHandler>(input, pos, timeoutOccursAt, mode, out initialStatePos, out matchLength, perThreadData) :
382-
FindEndPosition<TFindOptimizationsHandler, NoAnchorsNullabilityHandler>(input, pos, timeoutOccursAt, mode, out initialStatePos, out matchLength, perThreadData);
383-
384-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
385-
int SpecializedFindStartPosition(ReadOnlySpan<char> input, int i, int matchStartBoundary, PerThreadData perThreadData) =>
386-
_pattern._info.ContainsSomeAnchor ?
387-
FindStartPosition<FullNullabilityHandler>(input, i, matchStartBoundary, perThreadData) :
388-
FindStartPosition<NoAnchorsNullabilityHandler>(input, i, matchStartBoundary, perThreadData);
389377
}
390378

391379
/// <summary>Performs the initial Phase 1 match to find the end position of the match, or first final state if this is an isMatch call.</summary>
@@ -429,31 +417,26 @@ private int FindEndPosition<TFindOptimizationsHandler, TNullabilityHandler>(Read
429417
FindEndPositionDeltas<NfaStateHandler, TFindOptimizationsHandler, TNullabilityHandler>(builder, input, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate) :
430418
FindEndPositionDeltas<DfaStateHandler, TFindOptimizationsHandler, TNullabilityHandler>(builder, input, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate);
431419

432-
if (done)
420+
// If the inner loop indicates that the search finished (for example due to reaching a deadend state) or
421+
// there is no more input available, then the whole search is done.
422+
if (done || pos >= input.Length)
433423
{
434-
// If we reached the end of input or a deadend state, we're done.
435424
break;
436425
}
437-
else
426+
427+
// The search did not finish, so we either failed to transition (which should only happen if we were in DFA mode and
428+
// need to switch over to NFA mode) or ran out of input in the inner loop. Check if the inner loop still had more
429+
// input available.
430+
if (pos < inputForInnerLoop.Length)
438431
{
439-
// The search did not finish, so we either failed to transition (which should only happen if we were in DFA mode and
440-
// need to switch over to NFA mode) or ran out of input in the inner loop. For the latter case check if there is more
441-
// input available.
442-
if (pos >= input.Length)
443-
{
444-
// We ran out of input.
445-
break;
446-
}
447-
else if (pos < inputForInnerLoop.Length)
448-
{
449-
// We failed to transition. Upgrade to DFA mode.
450-
Debug.Assert(pos < inputForInnerLoop.Length);
451-
DfaMatchingState<TSet>? dfaState = currentState.DfaState(_builder);
452-
Debug.Assert(dfaState is not null);
453-
NfaMatchingState nfaState = perThreadData.NfaState;
454-
nfaState.InitializeFrom(dfaState);
455-
currentState = new CurrentState(nfaState);
456-
}
432+
// Because there was still more input available, a failure to transition in DFA mode must be the cause
433+
// of the early exit. Upgrade to NFA mode.
434+
Debug.Assert(pos < inputForInnerLoop.Length);
435+
DfaMatchingState<TSet>? dfaState = currentState.DfaState(_builder);
436+
Debug.Assert(dfaState is not null);
437+
NfaMatchingState nfaState = perThreadData.NfaState;
438+
nfaState.InitializeFrom(dfaState);
439+
currentState = new CurrentState(nfaState);
457440
}
458441

459442
// Check for a timeout before continuing.
@@ -504,7 +487,7 @@ private bool FindEndPositionDeltas<TStateHandler, TFindOptimizationsHandler, TNu
504487
// Loop through each character in the input, transitioning from state to state for each.
505488
while (true)
506489
{
507-
var (isInitial, isDeadend, isNullable, canBeNullable) = TStateHandler.GetStateInfo(builder, ref state);
490+
(bool isInitial, bool isDeadend, bool isNullable, bool canBeNullable) = TStateHandler.GetStateInfo(builder, ref state);
508491
// Check if currentState represents an initial state. If it does, call into any possible find optimizations
509492
// to hopefully more quickly find the next possible starting location.
510493
if (isInitial)
@@ -632,7 +615,7 @@ private bool FindStartPositionDeltas<TStateHandler, TNullabilityHandler>(Symboli
632615
// Loop backwards through each character in the input, transitioning from state to state for each.
633616
while (true)
634617
{
635-
var (isInitial, isDeadend, isNullable, canBeNullable) = TStateHandler.GetStateInfo(builder, ref state);
618+
(bool isInitial, bool isDeadend, bool isNullable, bool canBeNullable) = TStateHandler.GetStateInfo(builder, ref state);
636619

637620
// If the state accepts the empty string, we found a valid starting position. Record it and keep going,
638621
// since we're looking for the earliest one to occur within bounds.
@@ -1012,13 +995,20 @@ public static bool TakeTransition(SymbolicRegexBuilder<TSet> builder, ref Curren
1012995
// hasn't been materialized, try to create it; if we can, move to it, and we're done.
1013996
int dfaOffset = (state.DfaStateId << builder._mintermsLog) | mintermId;
1014997
int nextStateId = builder._delta[dfaOffset];
1015-
if (nextStateId > 0 || builder.TryCreateNewTransition(state.DfaStateId, mintermId, dfaOffset, checkThreshold: true, out nextStateId))
998+
if (nextStateId > 0)
1016999
{
1017-
// There was an existing state for this transition or we were able to create one. Move to it and
1000+
// There was an existing DFA transition to some state. Move to it and
10181001
// return that we're still operating as a DFA and can keep going.
10191002
state.DfaStateId = nextStateId;
10201003
return true;
10211004
}
1005+
if (builder.TryCreateNewTransition(state.DfaState(builder)!, mintermId, dfaOffset, checkThreshold: true, out DfaMatchingState<TSet>? nextState))
1006+
{
1007+
// We were able to create a new DFA transition to some state. Move to it and
1008+
// return that we're still operating as a DFA and can keep going.
1009+
state.DfaStateId = nextState.Id;
1010+
return true;
1011+
}
10221012

10231013
return false;
10241014
}

0 commit comments

Comments
 (0)