|
| 1 | +// Licensed to the .NET Foundation under one or more agreements. |
| 2 | +// The .NET Foundation licenses this file to you under the MIT license. |
| 3 | + |
| 4 | +using System.Collections.Generic; |
| 5 | +using System.Linq; |
| 6 | +using System.Tests; |
| 7 | +using Xunit; |
| 8 | + |
| 9 | +namespace System.Text.RegularExpressions.Tests |
| 10 | +{ |
| 11 | + public class RegexBalancingGroupTests |
| 12 | + { |
| 13 | + /// <summary> |
| 14 | + /// Tests for balancing groups where the balancing group's captured content |
| 15 | + /// precedes the position of the group being balanced. |
| 16 | + /// This tests the fix for https://github.com/dotnet/runtime/issues/XXXXX |
| 17 | + /// </summary> |
| 18 | + [Theory] |
| 19 | + [MemberData(nameof(BalancingGroup_WithConditional_MemberData))] |
| 20 | + public void BalancingGroup_WithConditional_ConsistentBehavior(RegexEngine engine, Regex regex, string input, bool expectedGroup2Matched, string expectedMatch) |
| 21 | + { |
| 22 | + _ = engine; // To satisfy xUnit analyzer |
| 23 | + Match m = regex.Match(input); |
| 24 | + |
| 25 | + Assert.True(m.Success, $"Match should succeed for input '{input}'"); |
| 26 | + Assert.Equal(expectedMatch, m.Value); |
| 27 | + |
| 28 | + // Check that the group 2 state is consistent |
| 29 | + bool group2Success = m.Groups[2].Success; |
| 30 | + int group2CapturesCount = m.Groups[2].Captures.Count; |
| 31 | + |
| 32 | + // The key test: Group.Success and Captures.Count should be consistent with the conditional behavior |
| 33 | + Assert.Equal(expectedGroup2Matched, group2Success); |
| 34 | + if (expectedGroup2Matched) |
| 35 | + { |
| 36 | + Assert.True(group2CapturesCount > 0, "If group 2 matched, it should have at least one capture"); |
| 37 | + } |
| 38 | + else |
| 39 | + { |
| 40 | + Assert.Equal(0, group2CapturesCount); |
| 41 | + } |
| 42 | + } |
| 43 | + |
| 44 | + public static IEnumerable<object[]> BalancingGroup_WithConditional_MemberData() |
| 45 | + { |
| 46 | + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) |
| 47 | + { |
| 48 | + if (RegexHelpers.IsNonBacktracking(engine)) |
| 49 | + { |
| 50 | + // NonBacktracking engine doesn't support balancing groups |
| 51 | + continue; |
| 52 | + } |
| 53 | + |
| 54 | + var cases = new (string Pattern, string Input, bool ExpectedGroup2Matched, string ExpectedMatch)[] |
| 55 | + { |
| 56 | + // Original bug report pattern |
| 57 | + // The balancing group (?'2-1'(?'x1'..)) captures content that comes BEFORE group 1's capture |
| 58 | + (@"\d+((?'x'[a-z-[b]]+)).(?<=(?'2-1'(?'x1'..)).{6})b(?(2)(?'Group2Captured'.)|(?'Group2NotCaptured'.))", |
| 59 | + "00123xzacvb1", true, "00123xzacvb1"), |
| 60 | + |
| 61 | + // Simpler test case: balancing group in forward context (normal case) |
| 62 | + (@"(a)(?'2-1'b)(?(2)c|d)", "abc", true, "abc"), |
| 63 | + |
| 64 | + // Balancing group in lookbehind where captured content comes after balanced group |
| 65 | + (@"(a)b(?<=(?'2-1'.))c(?(2)d|e)", "abcd", true, "abcd"), |
| 66 | + |
| 67 | + // Balancing group in lookbehind where captured content comes before balanced group (bug scenario) |
| 68 | + (@"a(b)c(?<=(?'2-1'a)..)d(?(2)e|f)", "abcde", true, "abcde"), |
| 69 | + |
| 70 | + // Case where balancing fails (group 1 has no captures) |
| 71 | + (@"a(?'2-1'b)?(?(2)c|d)", "ad", false, "ad"), |
| 72 | + |
| 73 | + // Multiple balancing operations |
| 74 | + (@"(a)(b)(?'3-1'c)(?'3-2'd)(?(3)e|f)", "abcde", true, "abcde"), |
| 75 | + }; |
| 76 | + |
| 77 | + Regex[] regexes = RegexHelpers.GetRegexes(engine, cases.Select(c => (c.Pattern, (System.Globalization.CultureInfo?)null, (RegexOptions?)null, (TimeSpan?)null)).ToArray()); |
| 78 | + for (int i = 0; i < cases.Length; i++) |
| 79 | + { |
| 80 | + yield return new object[] { engine, regexes[i], cases[i].Input, cases[i].ExpectedGroup2Matched, cases[i].ExpectedMatch }; |
| 81 | + } |
| 82 | + } |
| 83 | + } |
| 84 | + |
| 85 | + /// <summary> |
| 86 | + /// Tests that IsMatched() behavior is consistent with Group.Success and Group.Captures.Count |
| 87 | + /// after TidyBalancing is called. |
| 88 | + /// </summary> |
| 89 | + [Theory] |
| 90 | + [MemberData(nameof(BalancingGroup_IsMatched_Consistency_MemberData))] |
| 91 | + public void BalancingGroup_IsMatched_Consistency(RegexEngine engine, Regex regex, string input, int groupNumber, bool expectedMatched) |
| 92 | + { |
| 93 | + _ = engine; // To satisfy xUnit analyzer |
| 94 | + Match m = regex.Match(input); |
| 95 | + |
| 96 | + Assert.True(m.Success, $"Match should succeed for input '{input}'"); |
| 97 | + |
| 98 | + // Check that the group state is consistent |
| 99 | + bool groupSuccess = m.Groups[groupNumber].Success; |
| 100 | + int capturesCount = m.Groups[groupNumber].Captures.Count; |
| 101 | + |
| 102 | + Assert.Equal(expectedMatched, groupSuccess); |
| 103 | + if (expectedMatched) |
| 104 | + { |
| 105 | + Assert.True(capturesCount > 0, $"If group {groupNumber} matched, it should have at least one capture"); |
| 106 | + } |
| 107 | + else |
| 108 | + { |
| 109 | + Assert.Equal(0, capturesCount); |
| 110 | + } |
| 111 | + } |
| 112 | + |
| 113 | + public static IEnumerable<object[]> BalancingGroup_IsMatched_Consistency_MemberData() |
| 114 | + { |
| 115 | + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) |
| 116 | + { |
| 117 | + if (RegexHelpers.IsNonBacktracking(engine)) |
| 118 | + { |
| 119 | + continue; |
| 120 | + } |
| 121 | + |
| 122 | + var cases = new (string Pattern, string Input, int GroupNumber, bool ExpectedMatched)[] |
| 123 | + { |
| 124 | + // Group 1 should be balanced out (no captures remaining) |
| 125 | + (@"(a)(?'2-1'b)", "ab", 1, false), |
| 126 | + |
| 127 | + // Group 2 should have a capture |
| 128 | + (@"(a)(?'2-1'b)", "ab", 2, true), |
| 129 | + |
| 130 | + // Balancing in lookbehind - group 1 should be balanced out |
| 131 | + (@"(a)b(?<=(?'2-1'.))c", "abc", 1, false), |
| 132 | + |
| 133 | + // Balancing in lookbehind - group 2 should have a capture |
| 134 | + (@"(a)b(?<=(?'2-1'.))c", "abc", 2, true), |
| 135 | + |
| 136 | + // Original bug pattern - group 1 should be balanced out |
| 137 | + (@"\d+((?'x'[a-z-[b]]+)).(?<=(?'2-1'(?'x1'..)).{6})b", "00123xzacvb", 1, false), |
| 138 | + |
| 139 | + // Original bug pattern - group 2 should have a capture |
| 140 | + (@"\d+((?'x'[a-z-[b]]+)).(?<=(?'2-1'(?'x1'..)).{6})b", "00123xzacvb", 2, true), |
| 141 | + }; |
| 142 | + |
| 143 | + Regex[] regexes = RegexHelpers.GetRegexes(engine, cases.Select(c => (c.Pattern, (System.Globalization.CultureInfo?)null, (RegexOptions?)null, (TimeSpan?)null)).ToArray()); |
| 144 | + for (int i = 0; i < cases.Length; i++) |
| 145 | + { |
| 146 | + yield return new object[] { engine, regexes[i], cases[i].Input, cases[i].GroupNumber, cases[i].ExpectedMatched }; |
| 147 | + } |
| 148 | + } |
| 149 | + } |
| 150 | + |
| 151 | + /// <summary> |
| 152 | + /// Tests various balancing group scenarios to ensure correct behavior. |
| 153 | + /// </summary> |
| 154 | + [Theory] |
| 155 | + [MemberData(nameof(BalancingGroup_Various_MemberData))] |
| 156 | + public void BalancingGroup_Various_Scenarios(RegexEngine engine, Regex regex, string input, string expectedValue, int expectedGroup1Count, int expectedGroup2Count) |
| 157 | + { |
| 158 | + _ = engine; // To satisfy xUnit analyzer |
| 159 | + Match m = regex.Match(input); |
| 160 | + |
| 161 | + Assert.True(m.Success); |
| 162 | + Assert.Equal(expectedValue, m.Value); |
| 163 | + Assert.Equal(expectedGroup1Count, m.Groups[1].Captures.Count); |
| 164 | + Assert.Equal(expectedGroup2Count, m.Groups[2].Captures.Count); |
| 165 | + } |
| 166 | + |
| 167 | + public static IEnumerable<object[]> BalancingGroup_Various_MemberData() |
| 168 | + { |
| 169 | + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) |
| 170 | + { |
| 171 | + if (RegexHelpers.IsNonBacktracking(engine)) |
| 172 | + { |
| 173 | + continue; |
| 174 | + } |
| 175 | + |
| 176 | + var cases = new (string Pattern, string Input, string ExpectedValue, int ExpectedGroup1Count, int ExpectedGroup2Count)[] |
| 177 | + { |
| 178 | + // Basic balancing: group 1 captured, then balanced into group 2 |
| 179 | + (@"(a)(?'2-1'b)", "ab", "ab", 0, 1), |
| 180 | + |
| 181 | + // Balancing with nested capture: the nested capture should still exist |
| 182 | + (@"(a)(?'2-1'(c))", "ac", "ac", 0, 1), |
| 183 | + |
| 184 | + // Multiple captures in group 1, one balanced |
| 185 | + (@"(a)(a)(?'2-1'b)", "aab", "aab", 1, 1), |
| 186 | + |
| 187 | + // Balancing all captures from group 1 |
| 188 | + (@"(a)(?'2-1'b)(?'2-1'c)", "abc", "abc", 0, 2), |
| 189 | + }; |
| 190 | + |
| 191 | + Regex[] regexes = RegexHelpers.GetRegexes(engine, cases.Select(c => (c.Pattern, (System.Globalization.CultureInfo?)null, (RegexOptions?)null, (TimeSpan?)null)).ToArray()); |
| 192 | + for (int i = 0; i < cases.Length; i++) |
| 193 | + { |
| 194 | + yield return new object[] { engine, regexes[i], cases[i].Input, cases[i].ExpectedValue, cases[i].ExpectedGroup1Count, cases[i].ExpectedGroup2Count }; |
| 195 | + } |
| 196 | + } |
| 197 | + } |
| 198 | + } |
| 199 | +} |
0 commit comments