Skip to content

Commit e2cefa2

Browse files
authored
Revert "[release/6.0-preview7] Eliminate backtracking in the interpreter for patterns with .* (#55960)"
This reverts commit d885650.
1 parent d885650 commit e2cefa2

File tree

3 files changed

+18
-210
lines changed

3 files changed

+18
-210
lines changed

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs

Lines changed: 17 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ internal abstract class RegexCompiler
6464
private static readonly MethodInfo s_spanSliceIntIntMethod = typeof(ReadOnlySpan<char>).GetMethod("Slice", new Type[] { typeof(int), typeof(int) })!;
6565
private static readonly MethodInfo s_spanStartsWith = typeof(MemoryExtensions).GetMethod("StartsWith", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) })!.MakeGenericMethod(typeof(char));
6666
private static readonly MethodInfo s_stringAsSpanMethod = typeof(MemoryExtensions).GetMethod("AsSpan", new Type[] { typeof(string) })!;
67-
private static readonly MethodInfo s_spanLastIndexOfMethod = typeof(MemoryExtensions).GetMethod("LastIndexOf", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) })!.MakeGenericMethod(typeof(char));
6867
private static readonly MethodInfo s_stringAsSpanIntIntMethod = typeof(MemoryExtensions).GetMethod("AsSpan", new Type[] { typeof(string), typeof(int), typeof(int) })!;
6968
private static readonly MethodInfo s_stringGetCharsMethod = typeof(string).GetMethod("get_Chars", new Type[] { typeof(int) })!;
7069
private static readonly MethodInfo s_stringIndexOfCharInt = typeof(string).GetMethod("IndexOf", new Type[] { typeof(char), typeof(int) })!;
@@ -91,7 +90,6 @@ internal abstract class RegexCompiler
9190
private LocalBuilder? _runstackLocal;
9291
private LocalBuilder? _textInfoLocal; // cached to avoid extraneous TLS hits from CurrentCulture and virtual calls to TextInfo
9392
private LocalBuilder? _loopTimeoutCounterLocal; // timeout counter for setrep and setloop
94-
private LocalBuilder? _maxBacktrackPositionLocal;
9593

9694
protected RegexOptions _options; // options
9795
protected RegexCode? _code; // the RegexCode object
@@ -893,8 +891,6 @@ private void GenerateForwardSection()
893891
Mvfldloc(s_runtrackposField, _runtrackposLocal!);
894892
Mvfldloc(s_runstackField, _runstackLocal!);
895893
Mvfldloc(s_runstackposField, _runstackposLocal!);
896-
Ldc(-1);
897-
Stloc(_maxBacktrackPositionLocal!);
898894

899895
_backpos = -1;
900896

@@ -1709,7 +1705,7 @@ protected void GenerateFindFirstChar()
17091705
// if (!CharInClass(textSpan[i + 2], prefix[2], "...")) goto returnFalse;
17101706
// ...
17111707
Debug.Assert(charClassIndex == 0 || charClassIndex == 1);
1712-
for (; charClassIndex < _leadingCharClasses.Length; charClassIndex++)
1708+
for ( ; charClassIndex < _leadingCharClasses.Length; charClassIndex++)
17131709
{
17141710
Debug.Assert(needLoop);
17151711
Ldloca(textSpanLocal);
@@ -3314,7 +3310,6 @@ protected void GenerateGo()
33143310
}
33153311
_runtextbegLocal = DeclareInt32();
33163312
_runtextendLocal = DeclareInt32();
3317-
_maxBacktrackPositionLocal = DeclareInt32();
33183313

33193314
InitializeCultureForGoIfNecessary();
33203315

@@ -4263,61 +4258,7 @@ private void GenerateOneCode()
42634258
//: break Backward;
42644259
{
42654260
string str = _strings![Operand(0)];
4266-
Label multiCode = DefineLabel();
4267-
if (!IsRightToLeft())
4268-
{
4269-
// if (runtextend - runtextpos < c)
4270-
Ldloc(_runtextendLocal!);
4271-
Ldloc(_runtextposLocal!);
4272-
Sub();
4273-
Ldc(str.Length);
4274-
BgeFar(multiCode);
4275-
// if (!caseInsensitive && _maxBacktrackPosition != -1 && runtextpos > _maxBacktrackPosition)
4276-
if (!IsCaseInsensitive())
4277-
{
4278-
Ldloc(_maxBacktrackPositionLocal!);
4279-
Ldc(-1);
4280-
BeqFar(_backtrack);
4281-
Ldloc(_runtextposLocal!);
4282-
Ldloc(_maxBacktrackPositionLocal!);
4283-
BleFar(_backtrack);
4284-
// runtextpos = _maxBacktrackPosition;
4285-
Ldloc(_maxBacktrackPositionLocal!);
4286-
Stloc(_runtextposLocal!);
4287-
// ReadOnlySpan<char> runtextSpan = runtext.AsSpan(_maxBacktrackPosition, runtextend - _maxBacktractPosition);
4288-
Ldloc(_runtextLocal!);
4289-
Ldloc(_maxBacktrackPositionLocal!);
4290-
Ldloc(_runtextendLocal!);
4291-
Ldloc(_maxBacktrackPositionLocal!);
4292-
Sub();
4293-
using (RentedLocalBuilder runtextSpanLocal = RentReadOnlySpanCharLocal())
4294-
{
4295-
Call(s_stringAsSpanIntIntMethod);
4296-
Stloc(runtextSpanLocal);
4297-
using (RentedLocalBuilder lastIndexOfLocal = RentInt32Local())
4298-
{
4299-
// int lastIndexOf = runtextSpan.LastIndexOf(str.AsSpan());
4300-
Ldloc(runtextSpanLocal);
4301-
Ldstr(str);
4302-
Call(s_stringAsSpanMethod);
4303-
Call(s_spanLastIndexOfMethod);
4304-
Stloc(lastIndexOfLocal);
4305-
// if (lastIndexOf > -1)
4306-
Ldloc(lastIndexOfLocal);
4307-
Ldc(-1);
4308-
BleFar(_backtrack);
4309-
// runtextpos = lastIndexOf + _maxBacktrackPosition;
4310-
Ldloc(lastIndexOfLocal);
4311-
Ldloc(_maxBacktrackPositionLocal!);
4312-
Add();
4313-
Stloc(_runtextposLocal!);
4314-
BrFar(_backtrack);
4315-
}
4316-
}
4317-
}
4318-
}
43194261

4320-
MarkLabel(multiCode);
43214262
Ldc(str.Length);
43224263
Ldloc(_runtextendLocal!);
43234264
Ldloc(_runtextposLocal!);
@@ -4657,9 +4598,6 @@ private void GenerateOneCode()
46574598

46584599
using RentedLocalBuilder lenLocal = RentInt32Local();
46594600
using RentedLocalBuilder iLocal = RentInt32Local();
4660-
using RentedLocalBuilder tempMaxBacktrackPositionLocal = RentInt32Local();
4661-
Ldloc(_runtextposLocal!);
4662-
Stloc(tempMaxBacktrackPositionLocal);
46634601

46644602
if (!IsRightToLeft())
46654603
{
@@ -4909,12 +4847,6 @@ private void GenerateOneCode()
49094847
DoPush();
49104848

49114849
Track();
4912-
// if (_operator == RegexCode.Notoneloop) maxBacktrackPosition = tempMaxBacktrackPosition
4913-
if (_regexopcode == RegexCode.Notoneloop)
4914-
{
4915-
Ldloc(tempMaxBacktrackPositionLocal);
4916-
Stloc(_maxBacktrackPositionLocal!);
4917-
}
49184850
}
49194851
break;
49204852
}
@@ -4938,66 +4870,28 @@ private void GenerateOneCode()
49384870
//: if (i > 0)
49394871
//: Track(i - 1, pos - 1);
49404872
//: Advance(2);
4941-
Label noBacktrackPositionBranch = DefineLabel();
4873+
PopTrack();
4874+
Stloc(_runtextposLocal!);
49424875
PopTrack();
49434876
using (RentedLocalBuilder posLocal = RentInt32Local())
49444877
{
49454878
Stloc(posLocal);
4946-
PopTrack();
4947-
using (RentedLocalBuilder iBacktrackLocal = RentInt32Local())
4948-
{
4949-
Stloc(iBacktrackLocal);
4950-
// if (!caseInsensitive && maxBacktrackPosition != -1 && pos > maxBacktrackPosition && runtextpos < pos && _operator == (RegexCode.Notoneloop | RegexCode.Back) && !_rightToLeft)
4951-
if (!IsCaseInsensitive() && _regexopcode == (RegexCode.Notoneloop | RegexCode.Back) && !IsRightToLeft())
4952-
{
4953-
Ldloc(_maxBacktrackPositionLocal!);
4954-
Ldc(-1);
4955-
Beq(noBacktrackPositionBranch);
4956-
Ldloc(posLocal);
4957-
Ldloc(_maxBacktrackPositionLocal!);
4958-
Ble(noBacktrackPositionBranch);
4959-
Ldloc(_runtextposLocal!);
4960-
Ldloc(posLocal);
4961-
Bge(noBacktrackPositionBranch);
4962-
/*
4963-
int difference = pos - maxBacktrackPosition;
4964-
pos = runtextpos;
4965-
i -= difference;
4966-
maxBacktrackPosition = -1;
4967-
*/
4968-
// int difference = pos - maxBacktrackPosition;
4969-
Ldloc(iBacktrackLocal);
4970-
Ldloc(posLocal);
4971-
Ldloc(_maxBacktrackPositionLocal!);
4972-
Sub();
4973-
Sub();
4974-
Stloc(iBacktrackLocal);
4975-
Ldloc(_runtextposLocal!);
4976-
Stloc(posLocal);
4977-
Ldc(-1);
4978-
Stloc(_maxBacktrackPositionLocal!);
4979-
}
4980-
4981-
MarkLabel(noBacktrackPositionBranch);
4982-
Ldloc(posLocal);
4983-
Stloc(_runtextposLocal!);
4984-
Ldloc(iBacktrackLocal);
4985-
Ldc(0);
4986-
BleFar(AdvanceLabel());
4987-
ReadyPushTrack();
4988-
Ldloc(iBacktrackLocal);
4989-
}
4990-
Ldc(1);
4991-
Sub();
4992-
DoPush();
4879+
Ldloc(posLocal);
4880+
Ldc(0);
4881+
BleFar(AdvanceLabel());
49934882
ReadyPushTrack();
4994-
Ldloc(_runtextposLocal!);
4995-
Ldc(1);
4996-
Sub(IsRightToLeft());
4997-
DoPush();
4998-
Trackagain();
4999-
Advance();
4883+
Ldloc(posLocal);
50004884
}
4885+
Ldc(1);
4886+
Sub();
4887+
DoPush();
4888+
ReadyPushTrack();
4889+
Ldloc(_runtextposLocal!);
4890+
Ldc(1);
4891+
Sub(IsRightToLeft());
4892+
DoPush();
4893+
Trackagain();
4894+
Advance();
50014895
break;
50024896

50034897
case RegexCode.Onelazy:

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ internal sealed class RegexInterpreter : RegexRunner
2020
private int _codepos;
2121
private bool _rightToLeft;
2222
private bool _caseInsensitive;
23-
private int _maxBacktrackPosition = -1;
2423

2524
public RegexInterpreter(RegexCode code, CultureInfo culture)
2625
{
@@ -224,20 +223,6 @@ private bool MatchString(string str)
224223
{
225224
if (runtextend - runtextpos < c)
226225
{
227-
// If MatchString was called after a greedy op such as a .*, we would have zipped runtextpos to the end without really examining any characters. Reset to maxBacktrackPos here as an optimization
228-
if (!_caseInsensitive && _maxBacktrackPosition != -1 && runtextpos > _maxBacktrackPosition)
229-
{
230-
// If lastIndexOf is -1, we backtrack to the max extent possible.
231-
runtextpos = _maxBacktrackPosition;
232-
ReadOnlySpan<char> runtextSpan = runtext.AsSpan(_maxBacktrackPosition, runtextend - _maxBacktrackPosition);
233-
int lastIndexOf = runtextSpan.LastIndexOf(str);
234-
if (lastIndexOf > -1)
235-
{
236-
// Found the next position to match. Move runtextpos here
237-
runtextpos = _maxBacktrackPosition + lastIndexOf;
238-
}
239-
}
240-
241226
return false;
242227
}
243228

@@ -1200,7 +1185,6 @@ protected override void Go()
12001185
int len = Math.Min(Operand(1), Forwardchars());
12011186
char ch = (char)Operand(0);
12021187
int i;
1203-
int tempMaxBacktrackPosition = runtextpos;
12041188

12051189
if (!_rightToLeft && !_caseInsensitive)
12061190
{
@@ -1233,7 +1217,6 @@ protected override void Go()
12331217
if (len > i && _operator == RegexCode.Notoneloop)
12341218
{
12351219
TrackPush(len - i - 1, runtextpos - Bump());
1236-
_maxBacktrackPosition = tempMaxBacktrackPosition;
12371220
}
12381221
}
12391222
advance = 2;
@@ -1278,16 +1261,6 @@ protected override void Go()
12781261
{
12791262
int i = TrackPeek();
12801263
int pos = TrackPeek(1);
1281-
if (!_caseInsensitive && _maxBacktrackPosition != -1 && pos > _maxBacktrackPosition && runtextpos < pos && _operator == (RegexCode.Notoneloop | RegexCode.Back) && !_rightToLeft)
1282-
{
1283-
// The Multi node has bumped us along already
1284-
int difference = pos - _maxBacktrackPosition;
1285-
Debug.Assert(difference > 0);
1286-
pos = runtextpos;
1287-
i -= difference;
1288-
// We shouldn't be backtracking anymore.
1289-
_maxBacktrackPosition = -1;
1290-
}
12911264
runtextpos = pos;
12921265
if (i > 0)
12931266
{

0 commit comments

Comments
 (0)