Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2960,6 +2960,21 @@ void EmitNode(RegexNode node, RegexNode? subsequent = null, bool emitLengthCheck
return;
}

// Output any regex pattern comments associated with this node
if (rm.Tree.NodeComments?.TryGetValue(node, out List<string>? comments) is true)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot, it looks like all of the comments are getting associated with the same root node, as all of the comments are being output at the beginning of TryMatchAtCurrentPosition rather than throughout the method at the appropriate point

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed - comments are now attached after ScanBlank() captures trailing comments, ensuring they're associated with the correct node. This distributes comments throughout the generated method instead of clustering them at the beginning. (commit will be in next push)

{
foreach (string comment in comments)
{
// Split multi-line comments to maintain proper alignment
string[] lines = comment.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);
foreach (string line in lines)
{
string trimmedLine = line.Trim();
writer.WriteLine($"// {trimmedLine}");
}
}
}

// For everything else, output a comment about what the node is.
writer.WriteLine($"// {DescribeNode(node, rm)}");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ public void Initialize(IncrementalGeneratorInitializationContext context)
{
try
{
RegexTree regexTree = RegexParser.Parse(method.Pattern, method.Options | RegexOptions.Compiled, method.Culture); // make sure Compiled is included to get all optimizations applied to it
RegexTree regexTree = RegexParser.Parse(method.Pattern, method.Options | RegexOptions.Compiled, method.Culture, captureComments: true); // make sure Compiled is included to get all optimizations applied to it
AnalysisResults analysis = RegexTreeAnalyzer.Analyze(regexTree);
return new RegexMethod(method.DeclaringType, method.IsProperty, method.DiagnosticLocation, method.MemberName, method.Modifiers, method.NullableRegex, method.Pattern, method.Options, method.MatchTimeout, regexTree, analysis, method.CompilationData);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@ internal ref struct RegexParser

private bool _ignoreNextParen; // flag to skip capturing a parentheses group

private RegexParser(string pattern, RegexOptions options, CultureInfo culture, Hashtable caps, int capsize, Hashtable? capnames, Span<int> optionSpan)
private Dictionary<RegexNode, List<string>>? _nodeComments; // side-channel for storing comments associated with nodes
private List<string>? _pendingComments; // comments waiting to be associated with the next node

private RegexParser(string pattern, RegexOptions options, CultureInfo culture, Hashtable caps, int capsize, Hashtable? capnames, Span<int> optionSpan, bool captureComments = false)
{
Debug.Assert(pattern != null, "Pattern must be set");
Debug.Assert(culture != null, "Culture must be set");
Expand All @@ -79,6 +82,12 @@ private RegexParser(string pattern, RegexOptions options, CultureInfo culture, H
_capnumlist = null;
_capnamelist = null;
_ignoreNextParen = false;

if (captureComments)
{
_nodeComments = new Dictionary<RegexNode, List<string>>();
_pendingComments = new List<string>();
}
}

/// <summary>Gets the culture to use based on the specified options.</summary>
Expand All @@ -100,9 +109,9 @@ public static RegexOptions ParseOptionsInPattern(string pattern, RegexOptions op
return foundOptionsInPattern;
}

public static RegexTree Parse(string pattern, RegexOptions options, CultureInfo culture)
public static RegexTree Parse(string pattern, RegexOptions options, CultureInfo culture, bool captureComments = false)
{
using var parser = new RegexParser(pattern, options, culture, new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize]);
using var parser = new RegexParser(pattern, options, culture, new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize], captureComments);

parser.CountCaptures(out _);
parser.Reset(options);
Expand Down Expand Up @@ -130,7 +139,7 @@ public static RegexTree Parse(string pattern, RegexOptions options, CultureInfo
}
}

return new RegexTree(root, captureCount, parser._capnamelist?.ToArray(), parser._capnames!, sparseMapping, options, parser._hasIgnoreCaseBackreferenceNodes ? culture : null);
return new RegexTree(root, captureCount, parser._capnamelist?.ToArray(), parser._capnames!, sparseMapping, options, parser._hasIgnoreCaseBackreferenceNodes ? culture : null, parser._nodeComments);
}

/// <summary>This static call constructs a flat concatenation node given a replacement pattern.</summary>
Expand Down Expand Up @@ -352,6 +361,7 @@ private RegexNode ScanRegex()
_optionsStack.Append((int)_options);
if (ScanGroupOpen() is RegexNode grouper)
{
AttachCommentsToNode(grouper);
PushGroup();
StartGroup(grouper);
}
Expand Down Expand Up @@ -424,6 +434,12 @@ private RegexNode ScanRegex()

ScanBlank();

// Attach any comments that appeared after this node in the pattern
if (_unit is not null)
{
AttachCommentsToNode(_unit);
}

if (_pos == _pattern.Length || !(isQuantifier = IsTrueQuantifier()))
{
_concatenation!.AddChild(_unit!);
Expand Down Expand Up @@ -1048,21 +1064,37 @@ private void ScanBlank()

if ((_options & RegexOptions.IgnorePatternWhitespace) != 0 && _pos < _pattern.Length && _pattern[_pos] == '#')
{
int commentStart = _pos + 1; // Skip the '#'
_pos = _pattern.IndexOf('\n', _pos);
if (_pos < 0)
{
_pos = _pattern.Length;
}

if (_pendingComments is not null && commentStart < _pos)
{
string comment = _pattern.Substring(commentStart, _pos - commentStart).Trim();
// Preserve even empty comments for visual separation
_pendingComments.Add(comment);
}
}
else if (_pos + 2 < _pattern.Length && _pattern[_pos + 2] == '#' && _pattern[_pos + 1] == '?' && _pattern[_pos] == '(')
{
int commentStart = _pos + 3; // Skip '(?#'
_pos = _pattern.IndexOf(')', _pos);
if (_pos < 0)
{
_pos = _pattern.Length;
throw MakeException(RegexParseError.UnterminatedComment, SR.UnterminatedComment);
}

if (_pendingComments is not null && commentStart < _pos)
{
string comment = _pattern.Substring(commentStart, _pos - commentStart).Trim();
// Preserve even empty comments for visual separation
_pendingComments.Add(comment);
}

_pos++;
}
else
Expand All @@ -1072,6 +1104,22 @@ private void ScanBlank()
}
}

/// <summary>Attaches any pending comments to the specified node.</summary>
private void AttachCommentsToNode(RegexNode node)
{
if (_pendingComments is not null && _pendingComments.Count > 0)
{
if (!_nodeComments!.TryGetValue(node, out List<string>? comments))
{
comments = new List<string>();
_nodeComments[node] = comments;
}

comments.AddRange(_pendingComments);
_pendingComments.Clear();
}
}

/// <summary>Scans chars following a '\' (not counting the '\'), and returns a RegexNode for the type of atom scanned</summary>
private RegexNode? ScanBackslash(bool scanOnly)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;

Expand Down Expand Up @@ -39,8 +40,10 @@ internal sealed class RegexTree
/// capture group number and the value is the index into <see cref="CaptureNames"/> for that capture group.
/// </remarks>
public readonly Hashtable? CaptureNumberSparseMapping;
/// <summary>A mapping of RegexNode to its associated comments from the pattern (for source generator use only).</summary>
internal readonly Dictionary<RegexNode, List<string>>? NodeComments;

internal RegexTree(RegexNode root, int captureCount, string[]? captureNames, Hashtable? captureNameToNumberMapping, Hashtable? captureNumberSparseMapping, RegexOptions options, CultureInfo? culture)
internal RegexTree(RegexNode root, int captureCount, string[]? captureNames, Hashtable? captureNameToNumberMapping, Hashtable? captureNumberSparseMapping, RegexOptions options, CultureInfo? culture, Dictionary<RegexNode, List<string>>? nodeComments = null)
{
#if DEBUG
// Asserts to both demonstrate and validate the relationships between the various capture data structures.
Expand Down Expand Up @@ -77,6 +80,7 @@ internal RegexTree(RegexNode root, int captureCount, string[]? captureNames, Has
CaptureNameToNumberMapping = captureNameToNumberMapping;
CaptureNames = captureNames;
Options = options;
NodeComments = nodeComments;
FindOptimizations = RegexFindOptimizations.Create(root, options);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1193,5 +1193,29 @@ partial class C
// The actual pattern string should properly escape the newline for C#
Assert.Contains("base.pattern = \"\\n\";", actual);
}

[Fact]
public async Task ValidateCommentsInGeneratedCode()
{
string program = """
using System.Text.RegularExpressions;
partial class C
{
[GeneratedRegex(@"(?x)
^ # Start of line
\w+ # Word characters
$ # End of line
")]
public static partial Regex WithComments();
}
""";

string actual = await RegexGeneratorHelper.GenerateSourceText(program, allowUnsafe: true, checkOverflow: false);

// Verify comments appear in the explanation section
Assert.Contains("// Start of line", actual);
Assert.Contains("// Word characters", actual);
Assert.Contains("// End of line", actual);
}
}
}
Loading