Skip to content

Commit 80ba698

Browse files
authored
Delete old code generation approach from RegexCompiler / source generator (#62318)
* Delete old code generation approach from RegexCompiler / source generator In .NET Framework and up through .NET Core 3.1, the code generated for RegexOptions.Compiler was effectively an unrolled version of what RegexInterpreter would process. The RegexNode tree would be turned into a series of opcodes via RegexWriter; the interpreter would then sit in a loop processing those opcodes, and the RegexCompiler iterates through the opcodes generating code for each equivalent to what the interpreter would do but with some decisions made at compile-time rather than at run-time. This approach, however, leads to complicated code that's not pay-for-play (e.g. a big backtracking jump table that all compilations go through even if there's no backtracking), that doesn't factor in the shape of the tree (e.g. it's difficult to add optimizations based on interactions between nodes in the graph), and that doesn't read well when emitted as C# instead of IL as part of the source generator. In .NET 5, we started adding an alternative implementation that processed the RegexNode tree directly, addressing all of those cited issues; however, it only worked for a subset of expressions, namely those with little-to-no backtracking (e.g. non-atomic loops and alternations weren't supported). Since then, we've improved it to the point where everything other than RegexOptions.RightToLeft (which implicitly means lookbehinds as well) is supported, and we've agreed it's ok to drop compilation for those constructs; if they ever become an issue, we can add support for them via the new compilation scheme. As such, this PR: - Deletes all of the code associated with the older code generation scheme - Updates the Regex ctor to fall back to selecting the interpreter if the expression can't be compiled - Updates the source generator to fall back to just emitting a cached use of Regex if the expression can't be compiled (and issuing a diagnostic in that case) - Adds several tests that now pass with the new scheme that didn't with the old (and that still don't with the interpreter) * Make the addition of more declarations a bit more robust * Reduce backtracking code gen when nodes are atomic Also added some comments and renamed a few methods for consistency between RegexCompiler and RegexGenerator.Emitter * Fix tests on mono interpreter
1 parent f2c8f7c commit 80ba698

25 files changed

+1061
-4480
lines changed

src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@ namespace System.Text.RegularExpressions.Generator
88
{
99
internal static class DiagnosticDescriptors
1010
{
11+
private const string Category = "RegexGenerator";
12+
1113
public static DiagnosticDescriptor InvalidRegexGeneratorAttribute { get; } = new DiagnosticDescriptor(
1214
id: "SYSLIB1040",
1315
title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
1416
messageFormat: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
15-
category: "RegexGenerator",
17+
category: Category,
1618
DiagnosticSeverity.Error,
1719
isEnabledByDefault: true,
1820
customTags: WellKnownDiagnosticTags.NotConfigurable);
@@ -21,7 +23,7 @@ internal static class DiagnosticDescriptors
2123
id: "SYSLIB1041",
2224
title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
2325
messageFormat: new LocalizableResourceString(nameof(SR.MultipleRegexGeneratorAttributesMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
24-
category: "RegexGenerator",
26+
category: Category,
2527
DiagnosticSeverity.Error,
2628
isEnabledByDefault: true,
2729
customTags: WellKnownDiagnosticTags.NotConfigurable);
@@ -30,7 +32,7 @@ internal static class DiagnosticDescriptors
3032
id: "SYSLIB1042",
3133
title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
3234
messageFormat: new LocalizableResourceString(nameof(SR.InvalidRegexArgumentsMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
33-
category: "RegexGenerator",
35+
category: Category,
3436
DiagnosticSeverity.Error,
3537
isEnabledByDefault: true,
3638
customTags: WellKnownDiagnosticTags.NotConfigurable);
@@ -39,7 +41,7 @@ internal static class DiagnosticDescriptors
3941
id: "SYSLIB1043",
4042
title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
4143
messageFormat: new LocalizableResourceString(nameof(SR.RegexMethodMustHaveValidSignatureMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
42-
category: "RegexGenerator",
44+
category: Category,
4345
DiagnosticSeverity.Error,
4446
isEnabledByDefault: true,
4547
customTags: WellKnownDiagnosticTags.NotConfigurable);
@@ -48,9 +50,17 @@ internal static class DiagnosticDescriptors
4850
id: "SYSLIB1044",
4951
title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
5052
messageFormat: new LocalizableResourceString(nameof(SR.InvalidLangVersionMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
51-
category: "RegexGenerator",
53+
category: Category,
5254
DiagnosticSeverity.Error,
5355
isEnabledByDefault: true,
5456
customTags: WellKnownDiagnosticTags.NotConfigurable);
57+
58+
public static DiagnosticDescriptor LimitedSourceGeneration { get; } = new DiagnosticDescriptor(
59+
id: "SYSLIB1045",
60+
title: new LocalizableResourceString(nameof(SR.LimitedSourceGenerationTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
61+
messageFormat: new LocalizableResourceString(nameof(SR.LimitedSourceGenerationMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
62+
category: Category,
63+
DiagnosticSeverity.Info,
64+
isEnabledByDefault: true);
5565
}
5666
}

src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs

Lines changed: 293 additions & 1505 deletions
Large diffs are not rendered by default.

src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ private static bool IsSyntaxTargetForGeneration(SyntaxNode node) =>
190190
SymbolDisplayFormat.FullyQualifiedFormat.WithGlobalNamespaceStyle(SymbolDisplayGlobalNamespaceStyle.Omitted));
191191

192192
var regexMethod = new RegexMethod(
193+
methodSyntax,
193194
regexMethodSymbol.Name,
194195
methodSyntax.Modifiers.ToString(),
195196
pattern,
@@ -231,7 +232,7 @@ static bool IsAllowedKind(SyntaxKind kind) =>
231232
}
232233

233234
/// <summary>A regex method.</summary>
234-
internal sealed record RegexMethod(string MethodName, string Modifiers, string Pattern, RegexOptions Options, int MatchTimeout, RegexCode Code);
235+
internal sealed record RegexMethod(MethodDeclarationSyntax MethodSyntax, string MethodName, string Modifiers, string Pattern, RegexOptions Options, int MatchTimeout, RegexCode Code);
235236

236237
/// <summary>A type holding a regex method.</summary>
237238
internal sealed record RegexType(RegexMethod? Method, string Keyword, string Namespace, string Name, string Constraints)

src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,12 @@ public void Initialize(IncrementalGeneratorInitializationContext context)
7070
context.ReportDiagnostic(d);
7171
break;
7272

73-
case string s:
74-
code.Add(s);
73+
case ValueTuple<string, ImmutableArray<Diagnostic>> t:
74+
code.Add(t.Item1);
75+
foreach (Diagnostic d in t.Item2)
76+
{
77+
context.ReportDiagnostic(d);
78+
}
7579
break;
7680
}
7781
}

src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,12 @@
137137
<data name="InvalidLangVersionMessage" xml:space="preserve">
138138
<value>C# LangVersion of 10 or greater is required</value>
139139
</data>
140+
<data name="LimitedSourceGenerationTitle" xml:space="preserve">
141+
<value>RegexGenerator limitation reached.</value>
142+
</data>
143+
<data name="LimitedSourceGenerationMessage" xml:space="preserve">
144+
<value>The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</value>
145+
</data>
140146
<data name="Generic" xml:space="preserve">
141147
<value>Regular expression parser error '{0}' at offset {1}.</value>
142148
</data>

src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,16 @@
152152
<target state="translated">Délka nemůže být menší než 0 nebo přesáhnout délku vstupu.</target>
153153
<note />
154154
</trans-unit>
155+
<trans-unit id="LimitedSourceGenerationMessage">
156+
<source>The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</source>
157+
<target state="new">The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</target>
158+
<note />
159+
</trans-unit>
160+
<trans-unit id="LimitedSourceGenerationTitle">
161+
<source>RegexGenerator limitation reached.</source>
162+
<target state="new">RegexGenerator limitation reached.</target>
163+
<note />
164+
</trans-unit>
155165
<trans-unit id="MakeException">
156166
<source>Invalid pattern '{0}' at offset {1}. {2}</source>
157167
<target state="translated">Neplatný vzor {0} u posunu {1}. {2}</target>

src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,16 @@
152152
<target state="translated">Die Länge darf nicht kleiner als 0 sein oder die Eingabelänge überschreiten.</target>
153153
<note />
154154
</trans-unit>
155+
<trans-unit id="LimitedSourceGenerationMessage">
156+
<source>The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</source>
157+
<target state="new">The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</target>
158+
<note />
159+
</trans-unit>
160+
<trans-unit id="LimitedSourceGenerationTitle">
161+
<source>RegexGenerator limitation reached.</source>
162+
<target state="new">RegexGenerator limitation reached.</target>
163+
<note />
164+
</trans-unit>
155165
<trans-unit id="MakeException">
156166
<source>Invalid pattern '{0}' at offset {1}. {2}</source>
157167
<target state="translated">Ungültiges Muster "{0}" bei Offset {1}. {2}</target>

src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,16 @@
152152
<target state="translated">La longitud no puede ser inferior a 0 ni superar la longitud de entrada.</target>
153153
<note />
154154
</trans-unit>
155+
<trans-unit id="LimitedSourceGenerationMessage">
156+
<source>The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</source>
157+
<target state="new">The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</target>
158+
<note />
159+
</trans-unit>
160+
<trans-unit id="LimitedSourceGenerationTitle">
161+
<source>RegexGenerator limitation reached.</source>
162+
<target state="new">RegexGenerator limitation reached.</target>
163+
<note />
164+
</trans-unit>
155165
<trans-unit id="MakeException">
156166
<source>Invalid pattern '{0}' at offset {1}. {2}</source>
157167
<target state="translated">Patrón '{0}' no válido en el desplazamiento {1}. {2}</target>

src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,16 @@
152152
<target state="translated">La longueur ne peut pas être inférieure à 0 ou supérieure à la longueur d'entrée.</target>
153153
<note />
154154
</trans-unit>
155+
<trans-unit id="LimitedSourceGenerationMessage">
156+
<source>The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</source>
157+
<target state="new">The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</target>
158+
<note />
159+
</trans-unit>
160+
<trans-unit id="LimitedSourceGenerationTitle">
161+
<source>RegexGenerator limitation reached.</source>
162+
<target state="new">RegexGenerator limitation reached.</target>
163+
<note />
164+
</trans-unit>
155165
<trans-unit id="MakeException">
156166
<source>Invalid pattern '{0}' at offset {1}. {2}</source>
157167
<target state="translated">Modèle « {0} » non valide au niveau du décalage {1}. {2}</target>

src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,16 @@
152152
<target state="translated">Lenght non può essere minore di zero o superare la lunghezza di input.</target>
153153
<note />
154154
</trans-unit>
155+
<trans-unit id="LimitedSourceGenerationMessage">
156+
<source>The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</source>
157+
<target state="new">The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</target>
158+
<note />
159+
</trans-unit>
160+
<trans-unit id="LimitedSourceGenerationTitle">
161+
<source>RegexGenerator limitation reached.</source>
162+
<target state="new">RegexGenerator limitation reached.</target>
163+
<note />
164+
</trans-unit>
155165
<trans-unit id="MakeException">
156166
<source>Invalid pattern '{0}' at offset {1}. {2}</source>
157167
<target state="translated">Criterio '{0}' non valido alla posizione di offset {1}. {2}</target>

0 commit comments

Comments
 (0)