Skip to content

Commit 0746bc9

Browse files
committed
Address PR feedback
1 parent dd22b7f commit 0746bc9

15 files changed

+143
-95
lines changed

src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -180,31 +180,31 @@ private static ImmutableArray<Diagnostic> EmitRegexMethod(IndentedTextWriter wri
180180
writer.WriteLine($" base.roptions = {optionsExpression};");
181181
writer.WriteLine($" base.internalMatchTimeout = {timeoutExpression};");
182182
writer.WriteLine($" base.factory = new RunnerFactory();");
183-
if (rm.Tree.Caps is not null)
183+
if (rm.Tree.CaptureNumberSparseMapping is not null)
184184
{
185185
writer.Write(" base.Caps = new global::System.Collections.Hashtable {");
186-
AppendHashtableContents(writer, rm.Tree.Caps);
186+
AppendHashtableContents(writer, rm.Tree.CaptureNumberSparseMapping);
187187
writer.WriteLine(" };");
188188
}
189-
if (rm.Tree.CapNames is not null)
189+
if (rm.Tree.CaptureNameToNumberMapping is not null)
190190
{
191191
writer.Write(" base.CapNames = new global::System.Collections.Hashtable {");
192-
AppendHashtableContents(writer, rm.Tree.CapNames);
192+
AppendHashtableContents(writer, rm.Tree.CaptureNameToNumberMapping);
193193
writer.WriteLine(" };");
194194
}
195-
if (rm.Tree.CapsList is not null)
195+
if (rm.Tree.CaptureNames is not null)
196196
{
197197
writer.Write(" base.capslist = new string[] {");
198198
string separator = "";
199-
foreach (string s in rm.Tree.CapsList)
199+
foreach (string s in rm.Tree.CaptureNames)
200200
{
201201
writer.Write(separator);
202202
writer.Write(Literal(s));
203203
separator = ", ";
204204
}
205205
writer.WriteLine(" };");
206206
}
207-
writer.WriteLine($" base.capsize = {rm.Tree.CapSize};");
207+
writer.WriteLine($" base.capsize = {rm.Tree.CaptureCount};");
208208
writer.WriteLine($" }}");
209209
writer.WriteLine(" ");
210210
writer.WriteLine($" private sealed class RunnerFactory : global::System.Text.RegularExpressions.RegexRunnerFactory");
@@ -1365,7 +1365,7 @@ void EmitBackreference(RegexNode node)
13651365
{
13661366
Debug.Assert(node.Kind is RegexNodeKind.Backreference, $"Unexpected type: {node.Kind}");
13671367

1368-
int capnum = RegexParser.MapCaptureNumber(node.M, rm.Tree.Caps);
1368+
int capnum = RegexParser.MapCaptureNumber(node.M, rm.Tree.CaptureNumberSparseMapping);
13691369

13701370
if (sliceStaticPos > 0)
13711371
{
@@ -1447,7 +1447,7 @@ void EmitBackreferenceConditional(RegexNode node)
14471447
TransferSliceStaticPosToPos();
14481448

14491449
// Get the capture number to test.
1450-
int capnum = RegexParser.MapCaptureNumber(node.M, rm.Tree.Caps);
1450+
int capnum = RegexParser.MapCaptureNumber(node.M, rm.Tree.CaptureNumberSparseMapping);
14511451

14521452
// Get the "yes" branch and the "no" branch. The "no" branch is optional in syntax and is thus
14531453
// somewhat likely to be Empty.
@@ -1758,8 +1758,8 @@ void EmitCapture(RegexNode node, RegexNode? subsequent = null)
17581758
Debug.Assert(node.Kind is RegexNodeKind.Capture, $"Unexpected type: {node.Kind}");
17591759
Debug.Assert(node.ChildCount() == 1, $"Expected 1 child, found {node.ChildCount()}");
17601760

1761-
int capnum = RegexParser.MapCaptureNumber(node.M, rm.Tree.Caps);
1762-
int uncapnum = RegexParser.MapCaptureNumber(node.N, rm.Tree.Caps);
1761+
int capnum = RegexParser.MapCaptureNumber(node.M, rm.Tree.CaptureNumberSparseMapping);
1762+
int uncapnum = RegexParser.MapCaptureNumber(node.N, rm.Tree.CaptureNumberSparseMapping);
17631763
bool isAtomic = analysis.IsAtomicByAncestor(node);
17641764

17651765
TransferSliceStaticPosToPos();
@@ -3723,7 +3723,7 @@ private static string DescribeNode(RegexNode node, AnalysisResults analysis) =>
37233723
private static string DescribeCapture(int capNum, AnalysisResults analysis)
37243724
{
37253725
// If we can get a capture name from the captures collection and it's not just a numerical representation of the group, use it.
3726-
string name = RegexParser.GroupNameFromNumber(analysis.RegexTree.Caps, analysis.RegexTree.CapsList, analysis.RegexTree.CapSize, capNum);
3726+
string name = RegexParser.GroupNameFromNumber(analysis.RegexTree.CaptureNumberSparseMapping, analysis.RegexTree.CaptureNames, analysis.RegexTree.CaptureCount, capNum);
37273727
if (!string.IsNullOrEmpty(name) &&
37283728
(!int.TryParse(name, out int id) || id != capNum))
37293729
{

src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
<Compile Include="..\src\System\Threading\StackHelper.cs" Link="Production\StackHelper.cs" />
3434
<Compile Include="..\src\System\Text\RegularExpressions\RegexCharClass.cs" Link="Production\RegexCharClass.cs" />
3535
<Compile Include="..\src\System\Text\RegularExpressions\RegexCharClass.MappingTable.cs" Link="Production\RegexCharClass.MappingTable.cs" />
36-
<Compile Include="..\src\System\Text\RegularExpressions\RegexCode.cs" Link="Production\RegexCode.cs" />
3736
<Compile Include="..\src\System\Text\RegularExpressions\RegexFindOptimizations.cs" Link="Production\RegexFindOptimizations.cs" />
3837
<Compile Include="..\src\System\Text\RegularExpressions\RegexNode.cs" Link="Production\RegexNode.cs" />
3938
<Compile Include="..\src\System\Text\RegularExpressions\RegexNodeKind.cs" Link="Production\RegexNodeKind.cs" />
@@ -45,7 +44,6 @@
4544
<Compile Include="..\src\System\Text\RegularExpressions\RegexPrefixAnalyzer.cs" Link="Production\RegexPrefixAnalyzer.cs" />
4645
<Compile Include="..\src\System\Text\RegularExpressions\RegexTree.cs" Link="Production\RegexTree.cs" />
4746
<Compile Include="..\src\System\Text\RegularExpressions\RegexTreeAnalyzer.cs" Link="Production\RegexTreeAnalyzer.cs" />
48-
<Compile Include="..\src\System\Text\RegularExpressions\RegexWriter.cs" Link="Production\RegexWriter.cs" />
4947
<Compile Include="..\src\System\Collections\HashtableExtensions.cs" Link="Production\HashtableExtensions.cs" />
5048
</ItemGroup>
5149

src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@
2626
<Compile Include="System\Text\RegularExpressions\Regex.Timeout.cs" />
2727
<Compile Include="System\Text\RegularExpressions\RegexCharClass.cs" />
2828
<Compile Include="System\Text\RegularExpressions\RegexCharClass.MappingTable.cs" />
29-
<Compile Include="System\Text\RegularExpressions\RegexCode.cs" />
3029
<Compile Include="System\Text\RegularExpressions\RegexCompilationInfo.cs" />
3130
<Compile Include="System\Text\RegularExpressions\RegexFindOptimizations.cs" />
3231
<Compile Include="System\Text\RegularExpressions\RegexGeneratorAttribute.cs" />
3332
<Compile Include="System\Text\RegularExpressions\RegexInterpreter.cs" />
33+
<Compile Include="System\Text\RegularExpressions\RegexInterpreterCode.cs" />
3434
<Compile Include="System\Text\RegularExpressions\RegexMatchTimeoutException.cs" />
3535
<Compile Include="System\Text\RegularExpressions\RegexNode.cs" />
3636
<Compile Include="System\Text\RegularExpressions\RegexNodeKind.cs" />

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -60,20 +60,13 @@ public Regex([StringSyntax(StringSyntaxAttribute.Regex, "options")] string patte
6060

6161
internal Regex(string pattern, CultureInfo? culture)
6262
{
63-
// Validate and store the arguments.
63+
// Validate arguments.
6464
ValidatePattern(pattern);
65-
this.pattern = pattern;
66-
culture ??= CultureInfo.CurrentCulture;
67-
internalMatchTimeout = s_defaultMatchTimeout;
6865

69-
// Parse the pattern.
70-
RegexTree tree = RegexParser.Parse(pattern, roptions, culture);
66+
// Parse and store the argument information.
67+
RegexTree tree = Init(pattern, RegexOptions.None, s_defaultMatchTimeout, ref culture);
7168

72-
// Store the relevant information, including a factory for using the interpreter.
73-
capnames = tree.CapNames;
74-
capslist = tree.CapsList;
75-
caps = tree.Caps;
76-
capsize = tree.CapSize;
69+
// Create the interpreter factory.
7770
factory = new RegexInterpreterFactory(tree, culture);
7871

7972
// NOTE: This overload _does not_ delegate to the one that takes options, in order
@@ -83,23 +76,15 @@ internal Regex(string pattern, CultureInfo? culture)
8376

8477
internal Regex(string pattern, RegexOptions options, TimeSpan matchTimeout, CultureInfo? culture)
8578
{
86-
// Validate and store the arguments.
79+
// Validate arguments.
8780
ValidatePattern(pattern);
8881
ValidateOptions(options);
8982
ValidateMatchTimeout(matchTimeout);
90-
this.pattern = pattern;
91-
roptions = options;
92-
internalMatchTimeout = matchTimeout;
93-
culture ??= RegexParser.GetTargetCulture(options);
9483

95-
// Parse the pattern.
96-
RegexTree tree = RegexParser.Parse(pattern, roptions, culture);
84+
// Parse and store the argument information.
85+
RegexTree tree = Init(pattern, options, matchTimeout, ref culture);
9786

98-
// Store the relevant information, constructing the appropriate factory.
99-
capnames = tree.CapNames;
100-
capslist = tree.CapsList;
101-
caps = tree.Caps;
102-
capsize = tree.CapSize;
87+
// Create the appropriate factory.
10388
if ((options & RegexOptions.NonBacktracking) != 0)
10489
{
10590
// If we're in non-backtracking mode, create the appropriate factory.
@@ -120,6 +105,26 @@ internal Regex(string pattern, RegexOptions options, TimeSpan matchTimeout, Cult
120105
}
121106
}
122107

108+
/// <summary>Stores the supplied arguments and capture information, returning the parsed expression.</summary>
109+
private RegexTree Init(string pattern, RegexOptions options, TimeSpan matchTimeout, [NotNull] ref CultureInfo? culture)
110+
{
111+
this.pattern = pattern;
112+
roptions = options;
113+
internalMatchTimeout = matchTimeout;
114+
culture ??= RegexParser.GetTargetCulture(options);
115+
116+
// Parse the pattern.
117+
RegexTree tree = RegexParser.Parse(pattern, options, culture);
118+
119+
// Store the relevant information, constructing the appropriate factory.
120+
capnames = tree.CaptureNameToNumberMapping;
121+
capslist = tree.CaptureNames;
122+
caps = tree.CaptureNumberSparseMapping;
123+
capsize = tree.CaptureCount;
124+
125+
return tree;
126+
}
127+
123128
internal static void ValidatePattern(string pattern)
124129
{
125130
if (pattern is null)

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1470,7 +1470,7 @@ void EmitBackreference(RegexNode node)
14701470
{
14711471
Debug.Assert(node.Kind is RegexNodeKind.Backreference, $"Unexpected type: {node.Kind}");
14721472

1473-
int capnum = RegexParser.MapCaptureNumber(node.M, _regexTree!.Caps);
1473+
int capnum = RegexParser.MapCaptureNumber(node.M, _regexTree!.CaptureNumberSparseMapping);
14741474

14751475
TransferSliceStaticPosToPos();
14761476

@@ -1569,7 +1569,7 @@ void EmitBackreferenceConditional(RegexNode node)
15691569
TransferSliceStaticPosToPos();
15701570

15711571
// Get the capture number to test.
1572-
int capnum = RegexParser.MapCaptureNumber(node.M, _regexTree!.Caps);
1572+
int capnum = RegexParser.MapCaptureNumber(node.M, _regexTree!.CaptureNumberSparseMapping);
15731573

15741574
// Get the "yes" branch and the "no" branch. The "no" branch is optional in syntax and is thus
15751575
// somewhat likely to be Empty.
@@ -1889,8 +1889,8 @@ void EmitCapture(RegexNode node, RegexNode? subsequent = null)
18891889
Debug.Assert(node.Kind is RegexNodeKind.Capture, $"Unexpected type: {node.Kind}");
18901890
Debug.Assert(node.ChildCount() == 1, $"Expected 1 child, found {node.ChildCount()}");
18911891

1892-
int capnum = RegexParser.MapCaptureNumber(node.M, _regexTree!.Caps);
1893-
int uncapnum = RegexParser.MapCaptureNumber(node.N, _regexTree.Caps);
1892+
int capnum = RegexParser.MapCaptureNumber(node.M, _regexTree!.CaptureNumberSparseMapping);
1893+
int uncapnum = RegexParser.MapCaptureNumber(node.N, _regexTree.CaptureNumberSparseMapping);
18941894
bool isAtomic = analysis.IsAtomicByAncestor(node);
18951895

18961896
// pos += sliceStaticPos;

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public RegexInterpreterFactory(RegexTree tree, CultureInfo culture) =>
1919

2020
protected internal override RegexRunner CreateInstance() =>
2121
// Create a new interpreter instance.
22-
new RegexInterpreter(_code, RegexParser.GetTargetCulture(_code.Tree.Options));
22+
new RegexInterpreter(_code, RegexParser.GetTargetCulture(_code.Options));
2323
}
2424

2525
/// <summary>Executes a block of regular expression codes while consuming input.</summary>
@@ -356,7 +356,7 @@ protected internal override void Scan(ReadOnlySpan<char> text)
356356
stoppos = 0;
357357
}
358358

359-
while (_code.Tree.FindOptimizations.TryFindNextStartingPosition(text, ref runtextpos, runtextbeg, runtextstart, runtextend))
359+
while (_code.FindOptimizations.TryFindNextStartingPosition(text, ref runtextpos, runtextbeg, runtextstart, runtextend))
360360
{
361361
CheckTimeout();
362362

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs renamed to src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreterCode.cs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@ namespace System.Text.RegularExpressions
99
/// <summary>Contains the code, written by <see cref="RegexWriter"/>, for <see cref="RegexInterpreter"/> to evaluate a regular expression.</summary>
1010
internal sealed class RegexInterpreterCode
1111
{
12-
/// <summary>The optimized parse tree and associated information.</summary>
13-
public readonly RegexTree Tree;
12+
/// <summary>Find logic to use to find the next possible location for a match.</summary>
13+
public readonly RegexFindOptimizations FindOptimizations;
14+
/// <summary>The options associated with the regex.</summary>
15+
public readonly RegexOptions Options;
1416
/// <summary>RegexOpcodes and arguments written by <see cref="RegexWriter"/>.</summary>
1517
public readonly int[] Codes;
1618
/// <summary>The string / set table. <see cref="Codes"/> includes offsets into this table, for string and set arguments.</summary>
@@ -19,17 +21,15 @@ internal sealed class RegexInterpreterCode
1921
public readonly uint[]?[] StringsAsciiLookup;
2022
/// <summary>How many instructions in <see cref="Codes"/> use backtracking.</summary>
2123
public readonly int TrackCount;
22-
/// <summary>True if right to left.</summary>
23-
public readonly bool RightToLeft;
2424

25-
public RegexInterpreterCode(RegexTree tree, int[] codes, string[] strings, int trackcount)
25+
public RegexInterpreterCode(RegexFindOptimizations findOptimizations, RegexOptions options, int[] codes, string[] strings, int trackcount)
2626
{
27-
Tree = tree;
27+
FindOptimizations = findOptimizations;
28+
Options = options;
2829
Codes = codes;
2930
Strings = strings;
3031
StringsAsciiLookup = new uint[strings.Length][];
3132
TrackCount = trackcount;
32-
RightToLeft = (tree.Options & RegexOptions.RightToLeft) != 0;
3333
}
3434

3535
/// <summary>Gets whether the specified opcode may incur backtracking.</summary>
@@ -140,7 +140,7 @@ public override string ToString()
140140
{
141141
var sb = new StringBuilder();
142142

143-
sb.AppendLine($"Direction: {(RightToLeft ? "right-to-left" : "left-to-right")}");
143+
sb.AppendLine($"Direction: {((Options & RegexOptions.RightToLeft) != 0 ? "right-to-left" : "left-to-right")}");
144144
sb.AppendLine();
145145
for (int i = 0; i < Codes.Length; i += OpcodeSize((RegexOpcode)Codes[i]))
146146
{

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -88,26 +88,29 @@ public static RegexTree Parse(string pattern, RegexOptions options, CultureInfo
8888
parser.Reset(options);
8989
RegexNode root = parser.ScanRegex();
9090

91-
// Construct sparse capnum mapping if some numbers are unused.
92-
int[]? capnumlist = parser._capnumlist;
93-
Hashtable? caps = parser._caps;
91+
int[]? captureNumberList = parser._capnumlist;
92+
Hashtable? sparseMapping = parser._caps;
9493
int captop = parser._captop;
95-
int capsize;
96-
if (capnumlist == null || captop == capnumlist.Length)
94+
95+
int captureCount;
96+
if (captureNumberList == null || captop == captureNumberList.Length)
9797
{
98-
capsize = captop;
99-
caps = null;
98+
// The capture list isn't sparse. Null out the capture mapping as it's not necessary,
99+
// and store the number of captures.
100+
captureCount = captop;
101+
sparseMapping = null;
100102
}
101103
else
102104
{
103-
capsize = capnumlist.Length;
104-
for (int i = 0; i < capnumlist.Length; i++)
105+
// The capture list is sparse. Store the number of captures, and populate the number-to-names-list.
106+
captureCount = captureNumberList.Length;
107+
for (int i = 0; i < captureNumberList.Length; i++)
105108
{
106-
caps[capnumlist[i]] = i;
109+
sparseMapping[captureNumberList[i]] = i;
107110
}
108111
}
109112

110-
return new RegexTree(root, parser._capnames!, parser._capnamelist?.ToArray(), caps, capsize, options, culture);
113+
return new RegexTree(root, captureCount, parser._capnamelist?.ToArray(), parser._capnames!, sparseMapping, options, culture);
111114
}
112115

113116
/// <summary>
@@ -116,11 +119,10 @@ public static RegexTree Parse(string pattern, RegexOptions options, CultureInfo
116119
public static RegexReplacement ParseReplacement(string pattern, RegexOptions options, Hashtable caps, int capsize, Hashtable capnames)
117120
{
118121
CultureInfo culture = (options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
119-
var parser = new RegexParser(pattern, options, culture, caps, capsize, capnames, stackalloc int[OptionStackDefaultSize]);
122+
using var parser = new RegexParser(pattern, options, culture, caps, capsize, capnames, stackalloc int[OptionStackDefaultSize]);
120123

121124
RegexNode root = parser.ScanReplacement();
122125
var regexReplacement = new RegexReplacement(pattern, root, caps);
123-
parser.Dispose();
124126

125127
return regexReplacement;
126128
}
@@ -208,7 +210,7 @@ public static string Unescape(string input)
208210

209211
private static string UnescapeImpl(string input, int i)
210212
{
211-
var parser = new RegexParser(input, RegexOptions.None, CultureInfo.InvariantCulture, new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize]);
213+
using var parser = new RegexParser(input, RegexOptions.None, CultureInfo.InvariantCulture, new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize]);
212214

213215
// In the worst case the escaped string has the same length.
214216
// For small inputs we use stack allocation.
@@ -236,8 +238,6 @@ private static string UnescapeImpl(string input, int i)
236238
vsb.Append(input.AsSpan(lastpos, i - lastpos));
237239
} while (i < input.Length);
238240

239-
parser.Dispose();
240-
241241
return vsb.ToString();
242242
}
243243

0 commit comments

Comments
 (0)