Skip to content

Commit 11ab33a

Browse files
authored
Enable regex generator nullable reference types validation (#80142)
* Enable regex generator nullable reference types Multiple times in the past we've audited the regex source generator for nullable reference type annotations, but as it was only compiled officially for netstandard2.0, any work we did to get it green eventually rotted. This follows the work done to enable also building a .NET Core target for the json generator to do the same for the regex generator. In doing so, I also took the opportunity to clean up the REGEXGENERATOR compilation constant. * Add SetTargetFramework to ProjectReferences * Fix regex unit tests
1 parent 2975fa4 commit 11ab33a

21 files changed

+304
-286
lines changed

src/libraries/System.ComponentModel.TypeConverter/src/System.ComponentModel.TypeConverter.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@
248248
<ItemGroup>
249249
<ProjectReference Include="$(LibrariesProjectRoot)System.Text.RegularExpressions\gen\System.Text.RegularExpressions.Generator.csproj"
250250
ReferenceOutputAssembly="false"
251+
SetTargetFramework="TargetFramework=netstandard2.0"
251252
OutputItemType="Analyzer" />
252253
<Reference Include="System.Collections" />
253254
<Reference Include="System.Collections.NonGeneric" />

src/libraries/System.Private.DataContractSerialization/src/System.Private.DataContractSerialization.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@
152152
<ItemGroup>
153153
<ProjectReference Include="$(LibrariesProjectRoot)System.Text.RegularExpressions\gen\System.Text.RegularExpressions.Generator.csproj"
154154
ReferenceOutputAssembly="false"
155+
SetTargetFramework="TargetFramework=netstandard2.0"
155156
OutputItemType="Analyzer" />
156157
<Reference Include="System.Collections" />
157158
<Reference Include="System.Collections.Concurrent" />

src/libraries/System.Private.Xml/src/System.Private.Xml.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,7 @@
760760
<ItemGroup>
761761
<ProjectReference Include="$(LibrariesProjectRoot)System.Text.RegularExpressions\gen\System.Text.RegularExpressions.Generator.csproj"
762762
ReferenceOutputAssembly="false"
763+
SetTargetFramework="TargetFramework=netstandard2.0"
763764
OutputItemType="Analyzer" />
764765
<Reference Include="System.Collections" />
765766
<Reference Include="System.Collections.Concurrent" />

src/libraries/System.Runtime/tests/System.Runtime.Tests.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,7 @@
324324
<ItemGroup>
325325
<ProjectReference Include="$(LibrariesProjectRoot)System.Text.RegularExpressions\gen\System.Text.RegularExpressions.Generator.csproj"
326326
ReferenceOutputAssembly="false"
327+
SetTargetFramework="TargetFramework=netstandard2.0"
327328
OutputItemType="Analyzer" />
328329

329330
<PackageReference Include="Moq" Version="$(MoqVersion)" />

src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -843,7 +843,7 @@ void EmitFixedSet_LeftToRight()
843843
3 => $"{span}.IndexOfAny({Literal(primarySet.Chars[0])}, {Literal(primarySet.Chars[1])}, {Literal(primarySet.Chars[2])})",
844844
_ => $"{span}.IndexOfAny({Literal(new string(primarySet.Chars))})",
845845
} :
846-
(primarySet.Range.Value.LowInclusive == primarySet.Range.Value.HighInclusive, primarySet.Range.Value.Negated) switch
846+
(primarySet.Range!.Value.LowInclusive == primarySet.Range.Value.HighInclusive, primarySet.Range.Value.Negated) switch
847847
{
848848
(false, false) => $"{span}.IndexOfAnyInRange({Literal(primarySet.Range.Value.LowInclusive)}, {Literal(primarySet.Range.Value.HighInclusive)})",
849849
(true, false) => $"{span}.IndexOf({Literal(primarySet.Range.Value.LowInclusive)})",
@@ -2920,7 +2920,7 @@ void EmitSingleCharLoop(RegexNode node, RegexNode? subsequent = null, bool emitL
29202920
if (!rtl &&
29212921
node.N > 1 && // no point in using IndexOf for small loops, in particular optionals
29222922
subsequent?.FindStartingLiteralNode() is RegexNode literalNode &&
2923-
TryEmitIndexOf(literalNode, useLast: true, negate: false, out int literalLength, out string indexOfExpr))
2923+
TryEmitIndexOf(literalNode, useLast: true, negate: false, out int literalLength, out string? indexOfExpr))
29242924
{
29252925
writer.WriteLine($"if ({startingPos} >= {endingPos} ||");
29262926

@@ -3685,7 +3685,7 @@ void EmitSingleCharAtomicLoop(RegexNode node, bool emitLengthChecksIfRequired =
36853685
TransferSliceStaticPosToPos();
36863686
writer.WriteLine($"int {iterationLocal} = inputSpan.Length - pos;");
36873687
}
3688-
else if (maxIterations == int.MaxValue && TryEmitIndexOf(node, useLast: false, negate: true, out _, out string indexOfExpr))
3688+
else if (maxIterations == int.MaxValue && TryEmitIndexOf(node, useLast: false, negate: true, out _, out string? indexOfExpr))
36893689
{
36903690
// We're unbounded and we can use an IndexOf method to perform the search. The unbounded restriction is
36913691
// purely for simplicity; it could be removed in the future with additional code to handle that case.
@@ -4351,8 +4351,8 @@ private static bool TryEmitIndexOf(
43514351
if (node.Kind == RegexNodeKind.Multi)
43524352
{
43534353
Debug.Assert(!negate, "Negation isn't appropriate for a multi");
4354-
indexOfExpr = $"{last}IndexOf({Literal(node.Str)})";
4355-
literalLength = node.Str.Length;
4354+
indexOfExpr = $"{last}IndexOf({Literal(node.Str!)})";
4355+
literalLength = node.Str!.Length;
43564356
return true;
43574357
}
43584358

src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ public partial class RegexGenerator
4141
return null;
4242
}
4343

44-
IMethodSymbol regexMethodSymbol = context.TargetSymbol as IMethodSymbol;
44+
IMethodSymbol? regexMethodSymbol = context.TargetSymbol as IMethodSymbol;
4545
if (regexMethodSymbol is null)
4646
{
4747
return null;
@@ -95,7 +95,7 @@ public partial class RegexGenerator
9595
// int matchTimeoutMilliseconds, or string cultureName.
9696
else if (items.Length == 3)
9797
{
98-
if (items[2].Type.SpecialType == SpecialType.System_Int32)
98+
if (items[2].Type?.SpecialType == SpecialType.System_Int32)
9999
{
100100
matchTimeout = items[2].Value as int?;
101101
}

src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
11
<Project Sdk="Microsoft.NET.Sdk">
22

33
<PropertyGroup>
4-
<TargetFramework>netstandard2.0</TargetFramework>
4+
<!--
5+
Source generators must target netstandard2.0, which doesn't support nullable reference types. In order
6+
to enable the nullable reference type compiler checks, we also target NetCoreAppToolCurrent. Note that
7+
this doesn't use the live shared framework but an LKG targeting pack instead to avoid layering concerns.
8+
-->
9+
<TargetFrameworks>netstandard2.0;$(NetCoreAppToolCurrent)</TargetFrameworks>
510
<EnableDefaultItems>true</EnableDefaultItems>
611
<EnableDefaultEmbeddedResourceItems>false</EnableDefaultEmbeddedResourceItems>
712
<UsingToolXliff>true</UsingToolXliff>
813
<CLSCompliant>false</CLSCompliant>
914
<NoWarn>$(NoWarn);CS0436;CS0649</NoWarn>
1015
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
11-
<DefineConstants>$(DefineConstants);REGEXGENERATOR</DefineConstants>
1216
<AnalyzerLanguage>cs</AnalyzerLanguage>
1317
</PropertyGroup>
1418

@@ -46,4 +50,9 @@
4650
<Compile Include="..\src\System\Collections\HashtableExtensions.cs" Link="Production\HashtableExtensions.cs" />
4751
</ItemGroup>
4852

53+
<!-- Don't reference System.Text.RegularExpressions from the LKG, as shared sources are compiled into this project. -->
54+
<ItemGroup Condition="'$(TargetFramework)' == '$(NetCoreAppToolCurrent)'">
55+
<DefaultReferenceExclusion Include="System.Text.RegularExpressions" />
56+
</ItemGroup>
57+
4958
</Project>

src/libraries/System.Text.RegularExpressions/gen/UpgradeToGeneratedRegexAnalyzer.cs

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,13 @@ private static bool ValidateParameters(ImmutableArray<IArgumentOperation> argume
168168
for (int i = 0; i < arguments.Length; i++)
169169
{
170170
IArgumentOperation argument = arguments[i];
171-
string argumentName = argument.Parameter.Name;
171+
string? argumentName = argument.Parameter?.Name;
172+
173+
// If the argument name is null (e.g. an __arglist), then we don't emit a diagnostic.
174+
if (argumentName is null)
175+
{
176+
return false;
177+
}
172178

173179
// If one of the arguments is a timeout, then we don't emit a diagnostic.
174180
if (argumentName.Equals(timeoutArgumentName, StringComparison.OrdinalIgnoreCase) ||
@@ -180,7 +186,7 @@ private static bool ValidateParameters(ImmutableArray<IArgumentOperation> argume
180186
// If the argument is the pattern, then we validate that it is constant and we store the index.
181187
if (argumentName.Equals(PatternArgumentName, StringComparison.OrdinalIgnoreCase))
182188
{
183-
if (!IsConstant(argument))
189+
if (!argument.Value.ConstantValue.HasValue)
184190
{
185191
return false;
186192
}
@@ -191,12 +197,12 @@ private static bool ValidateParameters(ImmutableArray<IArgumentOperation> argume
191197
// If the argument is the options, then we validate that it is constant, that it doesn't have RegexOptions.NonBacktracking, and we store the index.
192198
if (argumentName.Equals(OptionsArgumentName, StringComparison.OrdinalIgnoreCase))
193199
{
194-
if (!IsConstant(argument))
200+
if (!argument.Value.ConstantValue.HasValue)
195201
{
196202
return false;
197203
}
198204

199-
RegexOptions value = (RegexOptions)((int)argument.Value.ConstantValue.Value);
205+
RegexOptions value = (RegexOptions)(int)argument.Value.ConstantValue.Value!;
200206
if ((value & RegexOptions.NonBacktracking) > 0)
201207
{
202208
return false;
@@ -209,15 +215,6 @@ private static bool ValidateParameters(ImmutableArray<IArgumentOperation> argume
209215
return true;
210216
}
211217

212-
/// <summary>
213-
/// Ensures that the input to the constructor or invocation is constant at compile time
214-
/// which is a requirement in order to be able to use the source generator.
215-
/// </summary>
216-
/// <param name="argument">The argument to be analyzed.</param>
217-
/// <returns><see langword="true"/> if the argument is constant; otherwise, <see langword="false"/>.</returns>
218-
private static bool IsConstant(IArgumentOperation argument)
219-
=> argument.Value.ConstantValue.HasValue;
220-
221218
/// <summary>
222219
/// Ensures that the compilation can find the Regex and RegexAttribute types, and also validates that the
223220
/// LangVersion of the project is >= 10.0 (which is the current requirement for the Regex source generator.
@@ -233,7 +230,7 @@ private static bool ProjectSupportsRegexSourceGenerator(Compilation compilation,
233230
return false;
234231
}
235232

236-
INamedTypeSymbol generatedRegexAttributeTypeSymbol = compilation.GetTypeByMetadataName(GeneratedRegexTypeName);
233+
INamedTypeSymbol? generatedRegexAttributeTypeSymbol = compilation.GetTypeByMetadataName(GeneratedRegexTypeName);
237234
if (generatedRegexAttributeTypeSymbol == null)
238235
{
239236
return false;

src/libraries/System.Text.RegularExpressions/gen/UpgradeToGeneratedRegexCodeFixer.cs

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ private static async Task<Document> ConvertToSourceGenerator(Document document,
161161
{
162162
operationArguments = invocationOperation.Arguments;
163163
IEnumerable<SyntaxNode> arguments = operationArguments
164-
.Where(arg => arg.Parameter.Name is not (UpgradeToGeneratedRegexAnalyzer.OptionsArgumentName or UpgradeToGeneratedRegexAnalyzer.PatternArgumentName))
164+
.Where(arg => arg.Parameter?.Name is not (UpgradeToGeneratedRegexAnalyzer.OptionsArgumentName or UpgradeToGeneratedRegexAnalyzer.PatternArgumentName))
165165
.Select(arg => arg.Syntax);
166166

167167
replacement = generator.InvocationExpression(generator.MemberAccessExpression(replacement, invocationOperation.TargetMethod.Name), arguments);
@@ -192,7 +192,7 @@ private static async Task<Document> ConvertToSourceGenerator(Document document,
192192
// we also need to parse the pattern in case there are options that were specified inside the pattern via the `(?i)` switch.
193193
SyntaxNode? cultureNameValue = null;
194194
RegexOptions regexOptions = regexOptionsValue is not null ? GetRegexOptionsFromArgument(operationArguments) : RegexOptions.None;
195-
string pattern = GetRegexPatternFromArgument(operationArguments);
195+
string pattern = GetRegexPatternFromArgument(operationArguments)!;
196196
regexOptions |= RegexParser.ParseOptionsInPattern(pattern, regexOptions);
197197

198198
// If the options include IgnoreCase and don't specify CultureInvariant then we will have to calculate the user's current culture in order to pass
@@ -239,9 +239,9 @@ static IEnumerable<ISymbol> GetAllMembers(ITypeSymbol? symbol)
239239
}
240240
}
241241

242-
static string GetRegexPatternFromArgument(ImmutableArray<IArgumentOperation> arguments)
242+
static string? GetRegexPatternFromArgument(ImmutableArray<IArgumentOperation> arguments)
243243
{
244-
IArgumentOperation? patternArgument = arguments.SingleOrDefault(arg => arg.Parameter.Name == UpgradeToGeneratedRegexAnalyzer.PatternArgumentName);
244+
IArgumentOperation? patternArgument = arguments.SingleOrDefault(arg => arg.Parameter?.Name == UpgradeToGeneratedRegexAnalyzer.PatternArgumentName);
245245
if (patternArgument is null)
246246
{
247247
return null;
@@ -252,16 +252,17 @@ static string GetRegexPatternFromArgument(ImmutableArray<IArgumentOperation> arg
252252

253253
static RegexOptions GetRegexOptionsFromArgument(ImmutableArray<IArgumentOperation> arguments)
254254
{
255-
IArgumentOperation? optionsArgument = arguments.SingleOrDefault(arg => arg.Parameter.Name == UpgradeToGeneratedRegexAnalyzer.OptionsArgumentName);
255+
IArgumentOperation? optionsArgument = arguments.SingleOrDefault(arg => arg.Parameter?.Name == UpgradeToGeneratedRegexAnalyzer.OptionsArgumentName);
256256

257-
return optionsArgument is null ? RegexOptions.None :
258-
(RegexOptions)(int)optionsArgument.Value.ConstantValue.Value;
257+
return optionsArgument is null || !optionsArgument.Value.ConstantValue.HasValue ?
258+
RegexOptions.None :
259+
(RegexOptions)(int)optionsArgument.Value.ConstantValue.Value!;
259260
}
260261

261262
// Helper method that looks generates the node for pattern argument or options argument.
262263
static SyntaxNode? GetNode(ImmutableArray<IArgumentOperation> arguments, SyntaxGenerator generator, string parameterName)
263264
{
264-
IArgumentOperation? argument = arguments.SingleOrDefault(arg => arg.Parameter.Name == parameterName);
265+
IArgumentOperation? argument = arguments.SingleOrDefault(arg => arg.Parameter?.Name == parameterName);
265266
if (argument is null)
266267
{
267268
return null;
@@ -270,7 +271,7 @@ static RegexOptions GetRegexOptionsFromArgument(ImmutableArray<IArgumentOperatio
270271
Debug.Assert(parameterName is UpgradeToGeneratedRegexAnalyzer.OptionsArgumentName or UpgradeToGeneratedRegexAnalyzer.PatternArgumentName);
271272
if (parameterName == UpgradeToGeneratedRegexAnalyzer.OptionsArgumentName)
272273
{
273-
string optionsLiteral = Literal(((RegexOptions)(int)argument.Value.ConstantValue.Value).ToString());
274+
string optionsLiteral = Literal(((RegexOptions)(int)argument.Value.ConstantValue.Value!).ToString());
274275
return SyntaxFactory.ParseExpression(optionsLiteral);
275276
}
276277
else if (argument.Value is ILiteralOperation literalOperation)

src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
<PropertyGroup>
33
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
44
<TargetFramework>$(NetCoreAppCurrent)</TargetFramework>
5+
<DefineConstants>$(DefineConstants);SYSTEM_TEXT_REGULAREXPRESSIONS</DefineConstants>
56
</PropertyGroup>
67
<ItemGroup>
78
<Compile Include="System\Collections\HashtableExtensions.cs" />

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -547,12 +547,13 @@ public static string ConvertOldStringsToClass(string set, string category)
547547
strLength -= 2;
548548
}
549549

550-
#if REGEXGENERATOR
551-
return StringExtensions.Create
550+
return
551+
#if NETCOREAPP2_1_OR_GREATER
552+
string
552553
#else
553-
return string.Create
554+
StringExtensions
554555
#endif
555-
(strLength, (set, category, startsWithNulls), static (span, state) =>
556+
.Create(strLength, (set, category, startsWithNulls), static (span, state) =>
556557
{
557558
int index;
558559

@@ -981,7 +982,9 @@ public static bool ParticipatesInCaseConversion(ReadOnlySpan<char> s)
981982
/// <summary>Gets whether the specified span contains only ASCII.</summary>
982983
public static bool IsAscii(ReadOnlySpan<char> s)
983984
{
984-
#if REGEXGENERATOR
985+
#if NET8_0_OR_GREATER
986+
return Ascii.IsValid(s);
987+
#else
985988
foreach (char c in s)
986989
{
987990
if (c >= 128)
@@ -991,8 +994,6 @@ public static bool IsAscii(ReadOnlySpan<char> s)
991994
}
992995

993996
return true;
994-
#else
995-
return Ascii.IsValid(s);
996997
#endif
997998
}
998999

@@ -1250,11 +1251,12 @@ static bool InitializeValue(char ch, string set, ref uint[]? asciiLazyCache)
12501251
}
12511252

12521253
uint[]? cache = asciiLazyCache ?? Interlocked.CompareExchange(ref asciiLazyCache, new uint[CacheArrayLength], null) ?? asciiLazyCache;
1253-
#if REGEXGENERATOR
1254-
InterlockedExtensions.Or(ref cache[ch >> 4], bitsToSet);
1254+
#if NET5_0_OR_GREATER
1255+
Interlocked
12551256
#else
1256-
Interlocked.Or(ref cache[ch >> 4], bitsToSet);
1257+
InterlockedExtensions
12571258
#endif
1259+
.Or(ref cache[ch >> 4], bitsToSet);
12581260

12591261
// Return the computed value.
12601262
return isInClass;
@@ -1542,12 +1544,13 @@ internal static unsafe string CharsToStringClass(ReadOnlySpan<char> chars)
15421544
// Get the pointer/length of the span to be able to pass it into string.Create.
15431545
#pragma warning disable CS8500 // takes address of managed type
15441546
ReadOnlySpan<char> tmpChars = chars; // avoid address exposing the span and impacting the other code in the method that uses it
1545-
#if REGEXGENERATOR
1546-
return StringExtensions.Create(
1547+
return
1548+
#if NETCOREAPP2_1_OR_GREATER
1549+
string
15471550
#else
1548-
return string.Create(
1551+
StringExtensions
15491552
#endif
1550-
SetStartIndex + count, (IntPtr)(&tmpChars), static (span, charsPtr) =>
1553+
.Create(SetStartIndex + count, (IntPtr)(&tmpChars), static (span, charsPtr) =>
15511554
{
15521555
// Fill in the set string
15531556
span[FlagsIndex] = (char)0;

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexOptions.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
namespace System.Text.RegularExpressions
55
{
66
[Flags]
7-
#if REGEXGENERATOR
8-
internal
9-
#else
7+
#if SYSTEM_TEXT_REGULAREXPRESSIONS
108
public
9+
#else
10+
internal
1111
#endif
1212
enum RegexOptions
1313
{

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParseError.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ namespace System.Text.RegularExpressions
1010
/// <remarks>
1111
/// This information is made available through <see cref="RegexParseException.Error"/>.
1212
/// </remarks>
13-
#if REGEXGENERATOR
14-
internal
15-
#else
13+
#if SYSTEM_TEXT_REGULAREXPRESSIONS
1614
public
15+
#else
16+
internal
1717
#endif
1818
enum RegexParseError
1919
{

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParseException.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ namespace System.Text.RegularExpressions
1010
/// detailed information in the <see cref="Error"/> and <see cref="Offset"/> properties.
1111
/// </summary>
1212
[Serializable]
13-
#if REGEXGENERATOR
14-
internal
15-
#else
13+
#if SYSTEM_TEXT_REGULAREXPRESSIONS
1614
public
15+
#else
16+
internal
1717
#endif
1818
sealed class RegexParseException : ArgumentException
1919
{

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2076,7 +2076,7 @@ internal static int MapCaptureNumber(int capnum, Hashtable? caps) =>
20762076
// ' a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~
20772077
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Q, S, 0, 0, 0};
20782078

2079-
#if NET7_0_OR_GREATER
2079+
#if NET8_0_OR_GREATER
20802080
private static readonly IndexOfAnyValues<char> s_metachars =
20812081
IndexOfAnyValues.Create("\t\n\f\r #$()*+.?[\\^{|");
20822082

0 commit comments

Comments
 (0)