Skip to content

Commit 0c54c06

Browse files
Copilotstephentoub
andauthored
Fix balancing group inconsistency between IsMatched and Group.Success (#121021)
In `RegexRunner.TransferCapture`, when a balancing group's captured content preceded the balanced group's position, the "innermost interval" logic produced negative-length captures. `TidyBalancing` treats any negative array value as a balancing marker, so these captures were removed, causing the inconsistency. Added a check in `TransferCapture` to ensure `end >= start` after the innermost interval calculation, creating zero-length captures instead of negative-length ones. Zero-length captures with non-negative start positions correctly survive TidyBalancing. --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com>
1 parent 5efa15f commit 0c54c06

File tree

2 files changed

+198
-0
lines changed

2 files changed

+198
-0
lines changed

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,13 @@ protected void TransferCapture(int capnum, int uncapnum, int start, int end)
573573
else if (end <= start2)
574574
{
575575
start = start2;
576+
577+
// Ensure we don't create a capture with negative length
578+
// When the balancing capture precedes the balanced group, end might be less than the new start
579+
if (end < start)
580+
{
581+
end = start;
582+
}
576583
}
577584
else
578585
{

src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2881,5 +2881,196 @@ public async Task MatchNonBacktrackingOver255Minterms()
28812881
Assert.Equal(272, ms[0].Length);
28822882
}
28832883
}
2884+
2885+
/// <summary>
2886+
/// Tests for balancing groups where the balancing group's captured content
2887+
/// precedes the position of the group being balanced.
2888+
/// This tests the fix for https://github.com/dotnet/runtime/issues/111161
2889+
/// </summary>
2890+
[Theory]
2891+
[MemberData(nameof(BalancingGroup_WithConditional_MemberData))]
2892+
public void BalancingGroup_WithConditional_ConsistentBehavior(Regex regex, string input, bool expectedGroup2Matched, string expectedMatch)
2893+
{
2894+
Match m = regex.Match(input);
2895+
2896+
Assert.True(m.Success, $"Match should succeed for input '{input}'");
2897+
Assert.Equal(expectedMatch, m.Value);
2898+
2899+
#if !NETFRAMEWORK // This bug was fixed in .NET Core and doesn't exist in .NET Framework
2900+
// Check that the group 2 state is consistent
2901+
bool group2Success = m.Groups[2].Success;
2902+
int group2CapturesCount = m.Groups[2].Captures.Count;
2903+
2904+
// The key test: Group.Success and Captures.Count should be consistent with the conditional behavior
2905+
Assert.Equal(expectedGroup2Matched, group2Success);
2906+
if (expectedGroup2Matched)
2907+
{
2908+
Assert.True(group2CapturesCount > 0, "If group 2 matched, it should have at least one capture");
2909+
}
2910+
else
2911+
{
2912+
Assert.Equal(0, group2CapturesCount);
2913+
}
2914+
#else
2915+
// On .NET Framework, just use the parameters to avoid xUnit warning
2916+
_ = expectedGroup2Matched;
2917+
#endif
2918+
}
2919+
2920+
public static IEnumerable<object[]> BalancingGroup_WithConditional_MemberData()
2921+
{
2922+
foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
2923+
{
2924+
if (RegexHelpers.IsNonBacktracking(engine))
2925+
{
2926+
// NonBacktracking engine doesn't support balancing groups
2927+
continue;
2928+
}
2929+
2930+
var cases = new (string Pattern, string Input, bool ExpectedGroup2Matched, string ExpectedMatch)[]
2931+
{
2932+
// Original bug report pattern
2933+
// The balancing group (?'2-1'(?'x1'..)) captures content that comes BEFORE group 1's capture
2934+
(@"\d+((?'x'[a-z-[b]]+)).(?<=(?'2-1'(?'x1'..)).{6})b(?(2)(?'Group2Captured'.)|(?'Group2NotCaptured'.))",
2935+
"00123xzacvb1", true, "00123xzacvb1"),
2936+
2937+
// Simpler test case: balancing group in forward context (normal case)
2938+
(@"(a)(?'2-1'b)(?(2)c|d)", "abc", true, "abc"),
2939+
2940+
// Balancing group in lookbehind where captured content comes after balanced group
2941+
(@"(a)b(?<=(?'2-1'.))c(?(2)d|e)", "abcd", true, "abcd"),
2942+
2943+
// Balancing group in lookbehind where captured content comes before balanced group (bug scenario)
2944+
(@"a(b)c(?<=(?'2-1'a)..)d(?(2)e|f)", "abcde", true, "abcde"),
2945+
};
2946+
2947+
Regex[] regexes = RegexHelpers.GetRegexes(engine, cases.Select(c => (c.Pattern, (System.Globalization.CultureInfo?)null, (RegexOptions?)null, (TimeSpan?)null)).ToArray());
2948+
for (int i = 0; i < cases.Length; i++)
2949+
{
2950+
yield return new object[] { regexes[i], cases[i].Input, cases[i].ExpectedGroup2Matched, cases[i].ExpectedMatch };
2951+
}
2952+
}
2953+
}
2954+
2955+
/// <summary>
2956+
/// Tests that IsMatched() behavior is consistent with Group.Success and Group.Captures.Count
2957+
/// after TidyBalancing is called.
2958+
/// </summary>
2959+
[Theory]
2960+
[MemberData(nameof(BalancingGroup_IsMatched_Consistency_MemberData))]
2961+
public void BalancingGroup_IsMatched_Consistency(Regex regex, string input, int groupNumber, bool expectedMatched)
2962+
{
2963+
Match m = regex.Match(input);
2964+
2965+
Assert.True(m.Success, $"Match should succeed for input '{input}'");
2966+
2967+
#if !NETFRAMEWORK // This bug was fixed in .NET Core and doesn't exist in .NET Framework
2968+
// Check that the group state is consistent
2969+
bool groupSuccess = m.Groups[groupNumber].Success;
2970+
int capturesCount = m.Groups[groupNumber].Captures.Count;
2971+
2972+
Assert.Equal(expectedMatched, groupSuccess);
2973+
if (expectedMatched)
2974+
{
2975+
Assert.True(capturesCount > 0, $"If group {groupNumber} matched, it should have at least one capture");
2976+
}
2977+
else
2978+
{
2979+
Assert.Equal(0, capturesCount);
2980+
}
2981+
#else
2982+
// On .NET Framework, just use the parameters to avoid xUnit warning
2983+
_ = groupNumber;
2984+
_ = expectedMatched;
2985+
#endif
2986+
}
2987+
2988+
public static IEnumerable<object[]> BalancingGroup_IsMatched_Consistency_MemberData()
2989+
{
2990+
foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
2991+
{
2992+
if (RegexHelpers.IsNonBacktracking(engine))
2993+
{
2994+
continue;
2995+
}
2996+
2997+
var cases = new (string Pattern, string Input, int GroupNumber, bool ExpectedMatched)[]
2998+
{
2999+
// Group 1 should be balanced out (no captures remaining)
3000+
(@"(a)(?'2-1'b)", "ab", 1, false),
3001+
3002+
// Group 2 should have a capture
3003+
(@"(a)(?'2-1'b)", "ab", 2, true),
3004+
3005+
// Balancing in lookbehind - group 1 should be balanced out
3006+
(@"(a)b(?<=(?'2-1'.))c", "abc", 1, false),
3007+
3008+
// Balancing in lookbehind - group 2 should have a capture
3009+
(@"(a)b(?<=(?'2-1'.))c", "abc", 2, true),
3010+
3011+
// Original bug pattern - group 1 should be balanced out
3012+
(@"\d+((?'x'[a-z-[b]]+)).(?<=(?'2-1'(?'x1'..)).{6})b", "00123xzacvb", 1, false),
3013+
3014+
// Original bug pattern - group 2 should have a capture
3015+
(@"\d+((?'x'[a-z-[b]]+)).(?<=(?'2-1'(?'x1'..)).{6})b", "00123xzacvb", 2, true),
3016+
};
3017+
3018+
Regex[] regexes = RegexHelpers.GetRegexes(engine, cases.Select(c => (c.Pattern, (System.Globalization.CultureInfo?)null, (RegexOptions?)null, (TimeSpan?)null)).ToArray());
3019+
for (int i = 0; i < cases.Length; i++)
3020+
{
3021+
yield return new object[] { regexes[i], cases[i].Input, cases[i].GroupNumber, cases[i].ExpectedMatched };
3022+
}
3023+
}
3024+
}
3025+
3026+
/// <summary>
3027+
/// Tests various balancing group scenarios to ensure correct behavior.
3028+
/// </summary>
3029+
[Theory]
3030+
[MemberData(nameof(BalancingGroup_Various_MemberData))]
3031+
public void BalancingGroup_Various_Scenarios(Regex regex, string input, string expectedValue, int expectedGroup1Count, int expectedGroup2Count)
3032+
{
3033+
Match m = regex.Match(input);
3034+
3035+
Assert.True(m.Success);
3036+
Assert.Equal(expectedValue, m.Value);
3037+
3038+
#if !NETFRAMEWORK // This test depends on the fix for balancing groups
3039+
Assert.Equal(expectedGroup1Count, m.Groups[1].Captures.Count);
3040+
Assert.Equal(expectedGroup2Count, m.Groups[2].Captures.Count);
3041+
#else
3042+
// On .NET Framework, just use the parameters to avoid xUnit warning
3043+
_ = expectedGroup1Count;
3044+
_ = expectedGroup2Count;
3045+
#endif
3046+
}
3047+
3048+
public static IEnumerable<object[]> BalancingGroup_Various_MemberData()
3049+
{
3050+
foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
3051+
{
3052+
if (RegexHelpers.IsNonBacktracking(engine))
3053+
{
3054+
continue;
3055+
}
3056+
3057+
var cases = new (string Pattern, string Input, string ExpectedValue, int ExpectedGroup1Count, int ExpectedGroup2Count)[]
3058+
{
3059+
// Basic balancing: group 1 captured, then balanced into group 2
3060+
// Creates a zero-length capture in group 2
3061+
(@"(a)(?'2-1'b)", "ab", "ab", 0, 1),
3062+
3063+
// Multiple captures: group 2 is the second (a), then balancing transfers from group 1
3064+
// Group 2 gets its own capture plus a zero-length capture from balancing
3065+
(@"(a)(a)(?'2-1'b)", "aab", "aab", 0, 2),
3066+
};
3067+
3068+
Regex[] regexes = RegexHelpers.GetRegexes(engine, cases.Select(c => (c.Pattern, (System.Globalization.CultureInfo?)null, (RegexOptions?)null, (TimeSpan?)null)).ToArray());
3069+
for (int i = 0; i < cases.Length; i++)
3070+
{
3071+
yield return new object[] { regexes[i], cases[i].Input, cases[i].ExpectedValue, cases[i].ExpectedGroup1Count, cases[i].ExpectedGroup2Count };
3072+
}
3073+
}
3074+
}
28843075
}
28853076
}

0 commit comments

Comments
 (0)