Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ public partial class RegexGenerator
string? pattern = null;
int? options = null;
int? matchTimeout = null;
string? cultureName = string.Empty;
foreach (AttributeData attributeData in boundAttributes)
{
if (!SymbolEqualityComparer.Default.Equals(attributeData.AttributeClass, generatedRegexAttributeSymbol))
Expand All @@ -75,7 +76,7 @@ public partial class RegexGenerator
}

ImmutableArray<TypedConstant> items = attributeData.ConstructorArguments;
if (items.Length == 0 || items.Length > 3)
if (items.Length == 0 || items.Length > 4)
{
return Diagnostic.Create(DiagnosticDescriptors.InvalidGeneratedRegexAttribute, methodSyntax.GetLocation());
}
Expand All @@ -85,9 +86,23 @@ public partial class RegexGenerator
if (items.Length >= 2)
{
options = items[1].Value as int?;
if (items.Length == 3)
if (items.Length == 4)
{
matchTimeout = items[2].Value as int?;
cultureName = items[3].Value as string;
}
// If there are 3 parameters, we need to check if the third argument is
// int matchTimeoutMilliseconds, or string cultureName.
else if (items.Length == 3)
{
if (items[2].Type.SpecialType == SpecialType.System_Int32)
{
matchTimeout = items[2].Value as int?;
}
else
{
cultureName = items[2].Value as string;
}
}
}
}
Expand All @@ -97,7 +112,7 @@ public partial class RegexGenerator
return null;
}

if (pattern is null)
if (pattern is null || cultureName is null)
{
return Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "(null)");
}
Expand All @@ -113,14 +128,40 @@ public partial class RegexGenerator

RegexOptions regexOptions = options is not null ? (RegexOptions)options : RegexOptions.None;

// TODO: This is going to include the culture that's current at the time of compilation.
// What should we do about that? We could:
// - say not specifying CultureInvariant is invalid if anything about options or the expression will look at culture
// - fall back to not generating source if it's not specified
// - just use whatever culture is present at build time
// - devise a new way of not using the culture present at build time
// - ...
CultureInfo culture = (regexOptions & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
// If RegexOptions.IgnoreCase was specified or the inline ignore case option `(?i)` is present in the pattern, then we will (in priority order):
// - If a culture name was passed in:
// - If RegexOptions.CultureInvariant was also passed in, then we emit a diagnostic due to the explicit conflict.
// - We try to initialize a culture using the passed in culture name to be used for case-sensitive comparisons. If
// the culture name is invalid, we'll emit a diagnostic.
// - Default to use Invariant Culture if no culture name was passed in.
CultureInfo culture = CultureInfo.InvariantCulture;
RegexOptions regexOptionsWithPatternOptions;
try
{
regexOptionsWithPatternOptions = regexOptions | RegexParser.ParseOptionsInPattern(pattern, regexOptions);
}
catch (Exception e)
{
return Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), e.Message);
}

if ((regexOptionsWithPatternOptions & RegexOptions.IgnoreCase) != 0 && !string.IsNullOrEmpty(cultureName))
{
if ((regexOptions & RegexOptions.CultureInvariant) != 0)
{
// User passed in both a culture name and set RegexOptions.CultureInvariant which causes an explicit conflict.
return Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "cultureName");
}

try
{
culture = CultureInfo.GetCultureInfo(cultureName);
}
catch (CultureNotFoundException)
{
return Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "cultureName");
}
}

// Validate the options
const RegexOptions SupportedOptions =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,11 +191,34 @@ private static async Task<Document> ConvertToSourceGenerator(Document document,
// Allow user to pick a different name for the method.
newMethod = newMethod.ReplaceToken(newMethod.Identifier, SyntaxFactory.Identifier(methodName).WithAdditionalAnnotations(RenameAnnotation.Create()));

// We now need to check if we have to pass in the cultureName parameter. This parameter will be required in case the option
// RegexOptions.IgnoreCase is set for this Regex. To determine that, we first get the passed in options (if any), and then,
// we also need to parse the pattern in case there are options that were specified inside the pattern via the `(?i)` switch.
SyntaxNode? cultureNameValue = null;
RegexOptions regexOptions = regexOptionsValue is not null ? GetRegexOptionsFromArgument(operationArguments) : RegexOptions.None;
string pattern = GetRegexPatternFromArgument(operationArguments);
regexOptions |= RegexParser.ParseOptionsInPattern(pattern, regexOptions);

// If the options include IgnoreCase and don't specify CultureInvariant then we will have to calculate the user's current culture in order to pass
// it in as a parameter. If the user specified IgnoreCase, but also selected CultureInvariant, then we skip as the default is to use Invariant culture.
if ((regexOptions & RegexOptions.IgnoreCase) != 0 && (regexOptions & RegexOptions.CultureInvariant) == 0)
{
// If CultureInvariant wasn't specified as options, we default to the current culture.
cultureNameValue = generator.LiteralExpression(CultureInfo.CurrentCulture.Name);

// If options weren't passed in, then we need to define it as well in order to use the three parameter constructor.
if (regexOptionsValue is null)
{
regexOptionsValue = generator.MemberAccessExpression(SyntaxFactory.IdentifierName("RegexOptions"), "None");
}
}

// Generate the GeneratedRegex attribute syntax node with the specified parameters.
SyntaxNode attributes = generator.Attribute(generator.TypeExpression(generatedRegexAttributeSymbol), attributeArguments: (patternValue, regexOptionsValue) switch
SyntaxNode attributes = generator.Attribute(generator.TypeExpression(generatedRegexAttributeSymbol), attributeArguments: (patternValue, regexOptionsValue, cultureNameValue) switch
{
({ }, null) => new[] { patternValue },
({ }, { }) => new[] { patternValue, regexOptionsValue },
({ }, null, null) => new[] { patternValue },
({ }, { }, null) => new[] { patternValue, regexOptionsValue },
({ }, { }, { }) => new[] { patternValue, regexOptionsValue, cultureNameValue },
_ => Array.Empty<SyntaxNode>(),
});

Expand Down Expand Up @@ -223,10 +246,29 @@ static IEnumerable<ISymbol> GetAllMembers(ITypeSymbol? symbol)
}
}

static string GetRegexPatternFromArgument(ImmutableArray<IArgumentOperation> arguments)
{
IArgumentOperation? patternArgument = arguments.SingleOrDefault(arg => arg.Parameter.Name == UpgradeToGeneratedRegexAnalyzer.PatternArgumentName);
if (patternArgument is null)
{
return null;
}

return patternArgument.Value.ConstantValue.Value as string;
}

static RegexOptions GetRegexOptionsFromArgument(ImmutableArray<IArgumentOperation> arguments)
{
IArgumentOperation? optionsArgument = arguments.SingleOrDefault(arg => arg.Parameter.Name == UpgradeToGeneratedRegexAnalyzer.OptionsArgumentName);

return optionsArgument is null ? RegexOptions.None :
(RegexOptions)(int)optionsArgument.Value.ConstantValue.Value;
}

// Helper method that looks generates the node for pattern argument or options argument.
static SyntaxNode? GetNode(ImmutableArray<IArgumentOperation> arguments, SyntaxGenerator generator, string parameterName)
{
var argument = arguments.SingleOrDefault(arg => arg.Parameter.Name == parameterName);
IArgumentOperation? argument = arguments.SingleOrDefault(arg => arg.Parameter.Name == parameterName);
if (argument is null)
{
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,10 @@ public sealed partial class GeneratedRegexAttribute : System.Attribute
{
public GeneratedRegexAttribute([System.Diagnostics.CodeAnalysis.StringSyntax(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex)] string pattern) { }
public GeneratedRegexAttribute([System.Diagnostics.CodeAnalysis.StringSyntax(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options) { }
public GeneratedRegexAttribute([System.Diagnostics.CodeAnalysis.StringSyntax(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options, string cultureName) { }
public GeneratedRegexAttribute([System.Diagnostics.CodeAnalysis.StringSyntax(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options, int matchTimeoutMilliseconds) { }
public GeneratedRegexAttribute([System.Diagnostics.CodeAnalysis.StringSyntax(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options, int matchTimeoutMilliseconds, string cultureName) { }
public string CultureName { get; }
public string Pattern { get; }
public System.Text.RegularExpressions.RegexOptions Options { get; }
public int MatchTimeoutMilliseconds { get; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.Threading;

namespace System.Text.RegularExpressions;
Expand All @@ -24,15 +25,47 @@ public GeneratedRegexAttribute([StringSyntax(StringSyntaxAttribute.Regex, nameof
{
}

/// <summary>Initializes a new instance of the <see cref="GeneratedRegexAttribute"/> with the specified pattern and options.</summary>
/// <param name="pattern">The regular expression pattern to match.</param>
/// <param name="options">A bitwise combination of the enumeration values that modify the regular expression.</param>
/// <param name="cultureName">The name of a culture to be used for case sensitive comparisons. <paramref name="cultureName"/> is not case-sensitive.</param>
/// <remarks>
/// For a list of predefined culture names on Windows systems, see the Language tag column in the <see href="https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/a9eac961-e77d-41a6-90a5-ce1a8b0cdb9c">list of
/// language/region names suported by Windows</see>. Culture names follow the standard defined by <see href="https://tools.ietf.org/html/bcp47">BCP 47</see>. In addition,
/// starting with Windows 10, <paramref name="cultureName"/> can be any valid BCP-47 language tag.
///
/// If <paramref name="cultureName"/> is <see cref="string.Empty"/>, the invariant culture will be used.
/// </remarks>
public GeneratedRegexAttribute([StringSyntax(StringSyntaxAttribute.Regex, nameof(options))] string pattern, RegexOptions options, string cultureName) : this(pattern, options, Timeout.Infinite, cultureName)
{
}

/// <summary>Initializes a new instance of the <see cref="GeneratedRegexAttribute"/> with the specified pattern, options, and timeout.</summary>
/// <param name="pattern">The regular expression pattern to match.</param>
/// <param name="options">A bitwise combination of the enumeration values that modify the regular expression.</param>
/// <param name="matchTimeoutMilliseconds">A time-out interval (milliseconds), or <see cref="Timeout.Infinite"/> to indicate that the method should not time out.</param>
public GeneratedRegexAttribute([StringSyntax(StringSyntaxAttribute.Regex, nameof(options))] string pattern, RegexOptions options, int matchTimeoutMilliseconds) : this(pattern, options, matchTimeoutMilliseconds, string.Empty /* Empty string means Invariant culture */)
{
}

/// <summary>Initializes a new instance of the <see cref="GeneratedRegexAttribute"/> with the specified pattern, options, and timeout.</summary>
/// <param name="pattern">The regular expression pattern to match.</param>
/// <param name="options">A bitwise combination of the enumeration values that modify the regular expression.</param>
/// <param name="matchTimeoutMilliseconds">A time-out interval (milliseconds), or <see cref="Timeout.Infinite"/> to indicate that the method should not time out.</param>
public GeneratedRegexAttribute([StringSyntax(StringSyntaxAttribute.Regex, nameof(options))] string pattern, RegexOptions options, int matchTimeoutMilliseconds)
/// <param name="cultureName">The name of a culture to be used for case sensitive comparisons. <paramref name="cultureName"/> is not case-sensitive.</param>
/// <remarks>
/// For a list of predefined culture names on Windows systems, see the Language tag column in the <see href="https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/a9eac961-e77d-41a6-90a5-ce1a8b0cdb9c">list of
/// language/region names suported by Windows</see>. Culture names follow the standard defined by <see href="https://tools.ietf.org/html/bcp47">BCP 47</see>. In addition,
/// starting with Windows 10, <paramref name="cultureName"/> can be any valid BCP-47 language tag.
///
/// If <paramref name="cultureName"/> is <see cref="string.Empty"/>, the invariant culture will be used.
/// </remarks>
public GeneratedRegexAttribute([StringSyntax(StringSyntaxAttribute.Regex, nameof(options))] string pattern, RegexOptions options, int matchTimeoutMilliseconds, string cultureName)
{
Pattern = pattern;
Options = options;
MatchTimeoutMilliseconds = matchTimeoutMilliseconds;
CultureName = cultureName;
}

/// <summary>Gets the regular expression pattern to match.</summary>
Expand All @@ -43,4 +76,7 @@ public GeneratedRegexAttribute([StringSyntax(StringSyntaxAttribute.Regex, nameof

/// <summary>Gets a time-out interval (milliseconds), or <see cref="Timeout.Infinite"/> to indicate that the method should not time out.</summary>
public int MatchTimeoutMilliseconds { get; }

/// <summary>Gets the name of the culture to be used for case sensitive comparisons.</summary>
public string CultureName { get; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,24 @@ private RegexParser(string pattern, RegexOptions options, CultureInfo culture, H
internal static CultureInfo GetTargetCulture(RegexOptions options) =>
(options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;

public static RegexOptions ParseOptionsInPattern(string pattern, RegexOptions options)
{
using var parser = new RegexParser(pattern, options, CultureInfo.InvariantCulture, // since we won't perform case conversions, culture doesn't matter in this case.
new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize]);

// We don't really need to Count the Captures, but this method will already do a quick
// pass through the pattern, and will scan the options found and return them as an out
// parameter, so we use that to get out the pattern inline options.
parser.CountCaptures(out RegexOptions foundOptionsInPattern);
parser.Reset(options);
return foundOptionsInPattern;
}

public static RegexTree Parse(string pattern, RegexOptions options, CultureInfo culture)
{
using var parser = new RegexParser(pattern, options, culture, new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize]);

parser.CountCaptures();
parser.CountCaptures(out _);
parser.Reset(options);
RegexNode root = parser.ScanRegex();

Expand Down Expand Up @@ -1772,10 +1785,10 @@ private static RegexOptions OptionFromCode(char ch) =>
/// <summary>
/// A prescanner for deducing the slots used for captures by doing a partial tokenization of the pattern.
/// </summary>
private void CountCaptures()
private void CountCaptures(out RegexOptions optionsFoundInPattern)
{
NoteCaptureSlot(0, 0);

optionsFoundInPattern = RegexOptions.None;
_autocap = 1;

while (CharsRight() > 0)
Expand Down Expand Up @@ -1850,6 +1863,7 @@ private void CountCaptures()

// get the options if it's an option construct (?cimsx-cimsx...)
ScanOptions();
optionsFoundInPattern |= _options;

if (CharsRight() > 0)
{
Expand Down
Loading