@@ -315,7 +315,13 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan<char> input, i
315315 // the position of the last b: aacaaaabbbc. It additionally records the position of the first a after
316316 // the c as the low boundary for the starting position.
317317 int matchStartLowBoundary , matchStartLengthMarker ;
318- int matchEnd = SpecializedFindEndPosition ( input , startat , timeoutOccursAt , mode , out matchStartLowBoundary , out matchStartLengthMarker , perThreadData ) ;
318+ int matchEnd = ( _findOpts is null , _pattern . _info . ContainsSomeAnchor ) switch
319+ {
320+ ( false , false ) => FindEndPosition < NoOptimizationsInitialStateHandler , NoAnchorsNullabilityHandler > ( input , startat , timeoutOccursAt , mode , out matchStartLowBoundary , out matchStartLengthMarker , perThreadData ) ,
321+ ( true , false ) => FindEndPosition < InitialStateFindOptimizationsHandler , NoAnchorsNullabilityHandler > ( input , startat , timeoutOccursAt , mode , out matchStartLowBoundary , out matchStartLengthMarker , perThreadData ) ,
322+ ( false , true ) => FindEndPosition < NoOptimizationsInitialStateHandler , FullNullabilityHandler > ( input , startat , timeoutOccursAt , mode , out matchStartLowBoundary , out matchStartLengthMarker , perThreadData ) ,
323+ ( true , true ) => FindEndPosition < InitialStateFindOptimizationsHandler , FullNullabilityHandler > ( input , startat , timeoutOccursAt , mode , out matchStartLowBoundary , out matchStartLengthMarker , perThreadData ) ,
324+ } ;
319325
320326 // If there wasn't a match, we're done.
321327 if ( matchEnd == NoMatchExists )
@@ -349,8 +355,9 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan<char> input, i
349355 {
350356 Debug . Assert ( matchEnd >= startat - 1 ) ;
351357 matchStart = matchEnd < startat ?
352- startat :
353- SpecializedFindStartPosition ( input , matchEnd , matchStartLowBoundary , perThreadData ) ;
358+ startat : _pattern . _info . ContainsSomeAnchor ?
359+ FindStartPosition < FullNullabilityHandler > ( input , matchEnd , matchStartLowBoundary , perThreadData ) :
360+ FindStartPosition < NoAnchorsNullabilityHandler > ( input , matchEnd , matchStartLowBoundary , perThreadData ) ;
354361 }
355362
356363 // Phase 3:
@@ -367,25 +374,6 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan<char> input, i
367374 Registers endRegisters = FindSubcaptures ( input , matchStart , matchEnd , perThreadData ) ;
368375 return new SymbolicMatch ( matchStart , matchEnd - matchStart , endRegisters . CaptureStarts , endRegisters . CaptureEnds ) ;
369376 }
370-
371- [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
372- int SpecializedFindEndPosition ( ReadOnlySpan < char > input , int pos , long timeoutOccursAt , RegexRunnerMode mode , out int initialStatePos , out int matchLength , PerThreadData perThreadData ) =>
373- _findOpts is null ?
374- SpecializedFindEndPosition2 < NoOptimizationsInitialStateHandler > ( input , pos , timeoutOccursAt , mode , out initialStatePos , out matchLength , perThreadData ) :
375- SpecializedFindEndPosition2 < InitialStateFindOptimizationsHandler > ( input , pos , timeoutOccursAt , mode , out initialStatePos , out matchLength , perThreadData ) ;
376-
377- [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
378- int SpecializedFindEndPosition2 < TFindOptimizationsHandler > ( ReadOnlySpan < char > input , int pos , long timeoutOccursAt , RegexRunnerMode mode , out int initialStatePos , out int matchLength , PerThreadData perThreadData )
379- where TFindOptimizationsHandler : struct , IInitialStateHandler =>
380- _pattern . _info . ContainsSomeAnchor ?
381- FindEndPosition < TFindOptimizationsHandler , FullNullabilityHandler > ( input , pos , timeoutOccursAt , mode , out initialStatePos , out matchLength , perThreadData ) :
382- FindEndPosition < TFindOptimizationsHandler , NoAnchorsNullabilityHandler > ( input , pos , timeoutOccursAt , mode , out initialStatePos , out matchLength , perThreadData ) ;
383-
384- [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
385- int SpecializedFindStartPosition ( ReadOnlySpan < char > input , int i , int matchStartBoundary , PerThreadData perThreadData ) =>
386- _pattern . _info . ContainsSomeAnchor ?
387- FindStartPosition < FullNullabilityHandler > ( input , i , matchStartBoundary , perThreadData ) :
388- FindStartPosition < NoAnchorsNullabilityHandler > ( input , i , matchStartBoundary , perThreadData ) ;
389377 }
390378
391379 /// <summary>Performs the initial Phase 1 match to find the end position of the match, or first final state if this is an isMatch call.</summary>
@@ -429,31 +417,26 @@ private int FindEndPosition<TFindOptimizationsHandler, TNullabilityHandler>(Read
429417 FindEndPositionDeltas < NfaStateHandler , TFindOptimizationsHandler , TNullabilityHandler > ( builder , input , mode , ref pos , ref currentState , ref endPos , ref endStateId , ref initialStatePos , ref initialStatePosCandidate ) :
430418 FindEndPositionDeltas < DfaStateHandler , TFindOptimizationsHandler , TNullabilityHandler > ( builder , input , mode , ref pos , ref currentState , ref endPos , ref endStateId , ref initialStatePos , ref initialStatePosCandidate ) ;
431419
432- if ( done )
420+ // If the inner loop indicates that the search finished (for example due to reaching a deadend state) or
421+ // there is no more input available, then the whole search is done.
422+ if ( done || pos >= input . Length )
433423 {
434- // If we reached the end of input or a deadend state, we're done.
435424 break ;
436425 }
437- else
426+
427+ // The search did not finish, so we either failed to transition (which should only happen if we were in DFA mode and
428+ // need to switch over to NFA mode) or ran out of input in the inner loop. Check if the inner loop still had more
429+ // input available.
430+ if ( pos < inputForInnerLoop . Length )
438431 {
439- // The search did not finish, so we either failed to transition (which should only happen if we were in DFA mode and
440- // need to switch over to NFA mode) or ran out of input in the inner loop. For the latter case check if there is more
441- // input available.
442- if ( pos >= input . Length )
443- {
444- // We ran out of input.
445- break ;
446- }
447- else if ( pos < inputForInnerLoop . Length )
448- {
449- // We failed to transition. Upgrade to DFA mode.
450- Debug . Assert ( pos < inputForInnerLoop . Length ) ;
451- DfaMatchingState < TSet > ? dfaState = currentState . DfaState ( _builder ) ;
452- Debug . Assert ( dfaState is not null ) ;
453- NfaMatchingState nfaState = perThreadData . NfaState ;
454- nfaState . InitializeFrom ( dfaState ) ;
455- currentState = new CurrentState ( nfaState ) ;
456- }
432+ // Because there was still more input available, a failure to transition in DFA mode must be the cause
433+ // of the early exit. Upgrade to NFA mode.
434+ Debug . Assert ( pos < inputForInnerLoop . Length ) ;
435+ DfaMatchingState < TSet > ? dfaState = currentState . DfaState ( _builder ) ;
436+ Debug . Assert ( dfaState is not null ) ;
437+ NfaMatchingState nfaState = perThreadData . NfaState ;
438+ nfaState . InitializeFrom ( dfaState ) ;
439+ currentState = new CurrentState ( nfaState ) ;
457440 }
458441
459442 // Check for a timeout before continuing.
@@ -504,7 +487,7 @@ private bool FindEndPositionDeltas<TStateHandler, TFindOptimizationsHandler, TNu
504487 // Loop through each character in the input, transitioning from state to state for each.
505488 while ( true )
506489 {
507- var ( isInitial , isDeadend , isNullable , canBeNullable ) = TStateHandler . GetStateInfo ( builder , ref state ) ;
490+ ( bool isInitial , bool isDeadend , bool isNullable , bool canBeNullable ) = TStateHandler . GetStateInfo ( builder , ref state ) ;
508491 // Check if currentState represents an initial state. If it does, call into any possible find optimizations
509492 // to hopefully more quickly find the next possible starting location.
510493 if ( isInitial )
@@ -632,7 +615,7 @@ private bool FindStartPositionDeltas<TStateHandler, TNullabilityHandler>(Symboli
632615 // Loop backwards through each character in the input, transitioning from state to state for each.
633616 while ( true )
634617 {
635- var ( isInitial , isDeadend , isNullable , canBeNullable ) = TStateHandler . GetStateInfo ( builder , ref state ) ;
618+ ( bool isInitial , bool isDeadend , bool isNullable , bool canBeNullable ) = TStateHandler . GetStateInfo ( builder , ref state ) ;
636619
637620 // If the state accepts the empty string, we found a valid starting position. Record it and keep going,
638621 // since we're looking for the earliest one to occur within bounds.
@@ -1012,13 +995,20 @@ public static bool TakeTransition(SymbolicRegexBuilder<TSet> builder, ref Curren
1012995 // hasn't been materialized, try to create it; if we can, move to it, and we're done.
1013996 int dfaOffset = ( state . DfaStateId << builder . _mintermsLog ) | mintermId ;
1014997 int nextStateId = builder . _delta [ dfaOffset ] ;
1015- if ( nextStateId > 0 || builder . TryCreateNewTransition ( state . DfaStateId , mintermId , dfaOffset , checkThreshold : true , out nextStateId ) )
998+ if ( nextStateId > 0 )
1016999 {
1017- // There was an existing state for this transition or we were able to create one. Move to it and
1000+ // There was an existing DFA transition to some state. Move to it and
10181001 // return that we're still operating as a DFA and can keep going.
10191002 state . DfaStateId = nextStateId ;
10201003 return true ;
10211004 }
1005+ if ( builder . TryCreateNewTransition ( state . DfaState ( builder ) ! , mintermId , dfaOffset , checkThreshold : true , out DfaMatchingState < TSet > ? nextState ) )
1006+ {
1007+ // We were able to create a new DFA transition to some state. Move to it and
1008+ // return that we're still operating as a DFA and can keep going.
1009+ state . DfaStateId = nextState . Id ;
1010+ return true ;
1011+ }
10221012
10231013 return false ;
10241014 }
0 commit comments