Skip to content

Commit 24b59d8

Browse files
authored
Avoid concatenating adjacent regex loops+strings under right-to-left (#103591)
The pattern gets reversed but the actual strings in multis aren't (instead the evaluation just compares them in reverse), which means optimizations based on comparing nodes in a sequence and the text of such a string either need to take that into account or be disabled for right-to-left.
1 parent 83031f1 commit 24b59d8

File tree

2 files changed

+9
-1
lines changed

2 files changed

+9
-1
lines changed

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1702,7 +1702,10 @@ static bool CanCombineCounts(int nodeMin, int nodeMax, int nextMin, int nextMax)
17021702
break;
17031703

17041704
// Coalescing a loop with a subsequent string
1705-
case RegexNodeKind.Oneloop or RegexNodeKind.Onelazy when nextNode.Kind == RegexNodeKind.Multi && currentNode.Ch == nextNode.Str![0]:
1705+
case RegexNodeKind.Oneloop or RegexNodeKind.Onelazy when
1706+
nextNode.Kind == RegexNodeKind.Multi &&
1707+
(nextNode.Options & RegexOptions.RightToLeft) == 0 && // RTL multi nodes don't have their text reversed, and it's not worth the code to optimize further
1708+
currentNode.Ch == nextNode.Str![0]:
17061709
{
17071710
// Determine how many of the multi's characters can be combined.
17081711
// We already checked for the first, so we know it's at least one.

src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -870,6 +870,11 @@ public static IEnumerable<object[]> Match_MemberData()
870870
yield return (@"\s+\d+", " asdf12 ", RegexOptions.RightToLeft, 0, 6, false, string.Empty);
871871
yield return ("aaa", "aaabbb", RegexOptions.None, 3, 3, false, string.Empty);
872872
yield return ("abc|def", "123def456", RegexOptions.RightToLeft | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 9, true, "def");
873+
yield return (@"^says?$", "says", RegexOptions.RightToLeft, 0, 4, true, "says");
874+
yield return (@"^says?$", "say", RegexOptions.RightToLeft, 0, 3, true, "say");
875+
yield return (@"^say(s?)$", "says", RegexOptions.RightToLeft, 0, 4, true, "says");
876+
yield return (@"^(say)s?$", "says", RegexOptions.RightToLeft, 0, 4, true, "says");
877+
yield return (@"^(.+?) (says?),\s'(.+)'$", "User says, 'adventure'", RegexOptions.RightToLeft, 0, 22, true, "User says, 'adventure'");
873878

874879
// .* : RTL, Case-sensitive
875880
yield return (@".*\nfoo", "This shouldn't match", RegexOptions.None | RegexOptions.RightToLeft, 0, 20, false, "");

0 commit comments

Comments
 (0)