Skip to content

Commit 6b372fb

Browse files
authored
Fix non-determinism in Regex source generator (#78103)
* Fix non-determinism in Regex source generator The source generator enumerates a Hashtable to write out its contents. When the keys of the Hashtable are strings, string hash code randomization may result in the order of that enumeration being different in different processes, leading to non-deterministic ordering of values written out and thus non-deterministic source generator output. * Address PR feedback
1 parent b00a4ee commit 6b372fb

File tree

5 files changed

+65
-8
lines changed

5 files changed

+65
-8
lines changed

src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -123,13 +123,13 @@ private static void EmitRegexDerivedImplementation(
123123
if (rm.Tree.CaptureNumberSparseMapping is not null)
124124
{
125125
writer.Write(" base.Caps = new Hashtable {");
126-
AppendHashtableContents(writer, rm.Tree.CaptureNumberSparseMapping);
126+
AppendHashtableContents(writer, rm.Tree.CaptureNumberSparseMapping.Cast<DictionaryEntry>().OrderBy(de => de.Key as int?));
127127
writer.WriteLine($" }};");
128128
}
129129
if (rm.Tree.CaptureNameToNumberMapping is not null)
130130
{
131131
writer.Write(" base.CapNames = new Hashtable {");
132-
AppendHashtableContents(writer, rm.Tree.CaptureNameToNumberMapping);
132+
AppendHashtableContents(writer, rm.Tree.CaptureNameToNumberMapping.Cast<DictionaryEntry>().OrderBy(de => de.Key as string, StringComparer.Ordinal));
133133
writer.WriteLine($" }};");
134134
}
135135
if (rm.Tree.CaptureNames is not null)
@@ -149,11 +149,10 @@ private static void EmitRegexDerivedImplementation(
149149
writer.WriteLine(runnerFactoryImplementation);
150150
writer.WriteLine($"}}");
151151

152-
static void AppendHashtableContents(IndentedTextWriter writer, Hashtable ht)
152+
static void AppendHashtableContents(IndentedTextWriter writer, IEnumerable<DictionaryEntry> contents)
153153
{
154-
IDictionaryEnumerator en = ht.GetEnumerator();
155154
string separator = "";
156-
while (en.MoveNext())
155+
foreach (DictionaryEntry en in contents)
157156
{
158157
writer.Write(separator);
159158
separator = ", ";

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,7 @@ public int[] GetGroupNumbers()
322322
{
323323
result[(int)de.Value!] = (int)de.Key;
324324
}
325+
Array.Sort(result);
325326
}
326327

327328
return result;

src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.GetGroupNames.Tests.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,10 @@ public void GroupNamesAndNumbers(string pattern, string input, string[] expected
149149

150150
int[] numbers = regex.GetGroupNumbers();
151151
Assert.Equal(expectedNumbers.Length, numbers.Length);
152+
for (int i = 0; i < numbers.Length - 1; i++)
153+
{
154+
Assert.True(numbers[i] <= numbers[i + 1]);
155+
}
152156

153157
string[] names = regex.GetGroupNames();
154158
Assert.Equal(expectedNames.Length, names.Length);

src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorHelper.netcoreapp.cs

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ internal static byte[] CreateAssemblyImage(string source, string assemblyName)
6767
throw new InvalidOperationException();
6868
}
6969

70-
internal static async Task<IReadOnlyList<Diagnostic>> RunGenerator(
71-
string code, bool compile = false, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default)
70+
private static async Task<(Compilation, GeneratorDriverRunResult)> RunGeneratorCore(
71+
string code, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default)
7272
{
7373
var proj = new AdhocWorkspace()
7474
.AddSolution(SolutionInfo.Create(SolutionId.CreateNewId(), VersionStamp.Create()))
@@ -87,7 +87,13 @@ internal static async Task<IReadOnlyList<Diagnostic>> RunGenerator(
8787
var generator = new RegexGenerator();
8888
CSharpGeneratorDriver cgd = CSharpGeneratorDriver.Create(new[] { generator.AsSourceGenerator() }, parseOptions: CSharpParseOptions.Default.WithLanguageVersion(langVersion));
8989
GeneratorDriver gd = cgd.RunGenerators(comp!, cancellationToken);
90-
GeneratorDriverRunResult generatorResults = gd.GetRunResult();
90+
return (comp, gd.GetRunResult());
91+
}
92+
93+
internal static async Task<IReadOnlyList<Diagnostic>> RunGenerator(
94+
string code, bool compile = false, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default)
95+
{
96+
(Compilation comp, GeneratorDriverRunResult generatorResults) = await RunGeneratorCore(code, langVersion, additionalRefs, allowUnsafe, cancellationToken);
9197
if (!compile)
9298
{
9399
return generatorResults.Diagnostics;
@@ -107,6 +113,20 @@ internal static async Task<IReadOnlyList<Diagnostic>> RunGenerator(
107113
return generatorResults.Diagnostics.Concat(results.Diagnostics).Where(d => d.Severity != DiagnosticSeverity.Hidden).ToArray();
108114
}
109115

116+
internal static async Task<string> GenerateSourceText(
117+
string code, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default)
118+
{
119+
(Compilation comp, GeneratorDriverRunResult generatorResults) = await RunGeneratorCore(code, langVersion, additionalRefs, allowUnsafe, cancellationToken);
120+
string generatedSource = string.Concat(generatorResults.GeneratedTrees.Select(t => t.ToString()));
121+
122+
if (generatorResults.Diagnostics.Length != 0)
123+
{
124+
throw new ArgumentException(string.Join(Environment.NewLine, generatorResults.Diagnostics) + Environment.NewLine + generatedSource);
125+
}
126+
127+
return generatedSource;
128+
}
129+
110130
internal static async Task<Regex> SourceGenRegexAsync(
111131
string pattern, CultureInfo? culture, RegexOptions? options = null, TimeSpan? matchTimeout = null, CancellationToken cancellationToken = default)
112132
{

src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorParserTests.cs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33

44
using Microsoft.CodeAnalysis;
55
using Microsoft.CodeAnalysis.CSharp;
6+
using Microsoft.DotNet.RemoteExecutor;
67
using System.Collections.Generic;
8+
using System.Diagnostics;
79
using System.Globalization;
810
using System.Threading.Tasks;
911
using Xunit;
@@ -839,5 +841,36 @@ partial class C
839841
public static partial Regex Valid();
840842
}", compile: true));
841843
}
844+
845+
[ConditionalFact(typeof(RemoteExecutor), nameof(RemoteExecutor.IsSupported))]
846+
[OuterLoop("Takes several seconds")]
847+
public void Deterministic_SameRegexProducesSameSource()
848+
{
849+
string first = Generate();
850+
for (int trials = 0; trials < 3; trials++)
851+
{
852+
Assert.Equal(first, Generate());
853+
}
854+
855+
static string Generate()
856+
{
857+
const string Code =
858+
@"using System.Text.RegularExpressions;
859+
partial class C
860+
{
861+
[GeneratedRegex(""(?<Name>\w+) (?<Street>\w+), (?<City>\w+) (?<State>[A-Z]{2}) (?<Zip>[0-9]{5})"")]
862+
public static partial Regex Valid();
863+
}";
864+
865+
// Generate the source in a new process so that any process-specific randomization is different between runs,
866+
// e.g. hash code randomization for strings.
867+
868+
using RemoteInvokeHandle handle = RemoteExecutor.Invoke(
869+
async () => Console.WriteLine(await RegexGeneratorHelper.GenerateSourceText(Code)),
870+
new RemoteInvokeOptions { StartInfo = new ProcessStartInfo { RedirectStandardOutput = true } });
871+
872+
return handle.Process.StandardOutput.ReadToEnd();
873+
}
874+
}
842875
}
843876
}

0 commit comments

Comments
 (0)