Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -490,29 +490,25 @@ private static void EmitTryFindNextPossibleStartingPosition(IndentedTextWriter w
// Emit the code for whatever find mode has been determined.
switch (regexTree.FindOptimizations.FindMode)
{
case FindNextStartingPositionMode.LeadingPrefix_LeftToRight:
Debug.Assert(!string.IsNullOrEmpty(regexTree.FindOptimizations.LeadingPrefix));
EmitIndexOf_LeftToRight(regexTree.FindOptimizations.LeadingPrefix);
case FindNextStartingPositionMode.LeadingString_LeftToRight:
case FindNextStartingPositionMode.FixedDistanceString_LeftToRight:
EmitIndexOf_LeftToRight();
break;

case FindNextStartingPositionMode.LeadingPrefix_RightToLeft:
Debug.Assert(!string.IsNullOrEmpty(regexTree.FindOptimizations.LeadingPrefix));
EmitIndexOf_RightToLeft(regexTree.FindOptimizations.LeadingPrefix);
case FindNextStartingPositionMode.LeadingString_RightToLeft:
EmitIndexOf_RightToLeft();
break;

case FindNextStartingPositionMode.LeadingSet_LeftToRight:
case FindNextStartingPositionMode.FixedSets_LeftToRight:
Debug.Assert(regexTree.FindOptimizations.FixedDistanceSets is { Count: > 0 });
case FindNextStartingPositionMode.FixedDistanceSets_LeftToRight:
EmitFixedSet_LeftToRight();
break;

case FindNextStartingPositionMode.LeadingSet_RightToLeft:
Debug.Assert(regexTree.FindOptimizations.FixedDistanceSets is { Count: > 0 });
EmitFixedSet_RightToLeft();
break;

case FindNextStartingPositionMode.LiteralAfterLoop_LeftToRight:
Debug.Assert(regexTree.FindOptimizations.LiteralAfterLoop is not null);
EmitLiteralAfterAtomicLoop();
break;

Expand Down Expand Up @@ -712,22 +708,52 @@ bool EmitAnchors()
return false;
}

// Emits a case-sensitive prefix search for a string at the beginning of the pattern.
void EmitIndexOf_LeftToRight(string prefix)
// Emits a case-sensitive left-to-right search for a substring.
void EmitIndexOf_LeftToRight()
{
writer.WriteLine($"// The pattern begins with a literal {Literal(prefix)}. Find the next occurrence.");
RegexFindOptimizations opts = regexTree.FindOptimizations;

string substring = "";
string offset = "";
string offsetDescription = "at the beginning of the pattern";

switch (opts.FindMode)
{
case FindNextStartingPositionMode.LeadingString_LeftToRight:
substring = regexTree.FindOptimizations.LeadingPrefix;
Debug.Assert(!string.IsNullOrEmpty(substring));
break;

case FindNextStartingPositionMode.FixedDistanceString_LeftToRight:
Debug.Assert(!string.IsNullOrEmpty(regexTree.FindOptimizations.FixedDistanceLiteral.String));
substring = regexTree.FindOptimizations.FixedDistanceLiteral.String;
if (regexTree.FindOptimizations.FixedDistanceLiteral is { Distance: > 0 } literal)
{
offset = $" + {literal.Distance}";
offsetDescription = $" at index {literal.Distance} in the pattern";
}
break;

default:
Debug.Fail($"Unexpected mode: {opts.FindMode}");
break;
}

writer.WriteLine($"// The pattern has the literal {Literal(substring)} {offsetDescription}. Find the next occurrence.");
writer.WriteLine($"// If it can't be found, there's no match.");
writer.WriteLine($"int i = inputSpan.Slice(pos).IndexOf({Literal(prefix)});");
writer.WriteLine($"int i = inputSpan.Slice(pos{offset}).IndexOf({Literal(substring)});");
using (EmitBlock(writer, "if (i >= 0)"))
{
writer.WriteLine("base.runtextpos = pos + i;");
writer.WriteLine("return true;");
}
}

// Emits a case-sensitive right-to-left prefix search for a string at the beginning of the pattern.
void EmitIndexOf_RightToLeft(string prefix)
// Emits a case-sensitive right-to-left search for a substring.
void EmitIndexOf_RightToLeft()
{
string prefix = regexTree.FindOptimizations.LeadingPrefix;

writer.WriteLine($"// The pattern begins with a literal {Literal(prefix)}. Find the next occurrence right-to-left.");
writer.WriteLine($"// If it can't be found, there's no match.");
writer.WriteLine($"pos = inputSpan.Slice(0, pos).LastIndexOf({Literal(prefix)});");
Expand All @@ -742,6 +768,8 @@ void EmitIndexOf_RightToLeft(string prefix)
// and potentially other sets at other fixed positions in the pattern.
void EmitFixedSet_LeftToRight()
{
Debug.Assert(regexTree.FindOptimizations.FixedDistanceSets is { Count: > 0 });

List<(char[]? Chars, string Set, int Distance)>? sets = regexTree.FindOptimizations.FixedDistanceSets;
(char[]? Chars, string Set, int Distance) primarySet = sets![0];
const int MaxSets = 4;
Expand Down Expand Up @@ -865,6 +893,8 @@ void EmitFixedSet_LeftToRight()
// (Currently that position will always be a distance of 0, meaning the start of the pattern itself.)
void EmitFixedSet_RightToLeft()
{
Debug.Assert(regexTree.FindOptimizations.FixedDistanceSets is { Count: > 0 });

(char[]? Chars, string Set, int Distance) set = regexTree.FindOptimizations.FixedDistanceSets![0];
Debug.Assert(set.Distance == 0);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1434,6 +1434,11 @@ private static RegexCharClass ParseRecursive(string charClass, int start)
return ranges;
}

/// <summary>Cache of character class strings for single ASCII characters.</summary>
private static readonly string[] s_asciiStrings = new string[128];
/// <summary>Cache of character class strings for pairs of upper/lower-case ASCII letters.</summary>
private static readonly string[] s_asciiLetterPairStrings = new string[26];

/// <summary>Creates a set string for a single character.</summary>
/// <param name="c">The character for which to create the set.</param>
/// <returns>The create set string.</returns>
Expand All @@ -1450,44 +1455,32 @@ internal static unsafe string CharsToStringClass(ReadOnlySpan<char> chars)
}
#endif

// If there aren't any chars, just return an empty class.
if (chars.Length == 0)
switch (chars.Length)
{
return EmptyClass;
}
case 0:
// If there aren't any chars, just return an empty class.
return EmptyClass;

if (chars.Length == 2)
{
switch (chars[0], chars[1])
{
case ('A', 'a'): case ('a', 'A'): return "\0\x0004\0ABab";
case ('B', 'b'): case ('b', 'B'): return "\0\x0004\0BCbc";
case ('C', 'c'): case ('c', 'C'): return "\0\x0004\0CDcd";
case ('D', 'd'): case ('d', 'D'): return "\0\x0004\0DEde";
case ('E', 'e'): case ('e', 'E'): return "\0\x0004\0EFef";
case ('F', 'f'): case ('f', 'F'): return "\0\x0004\0FGfg";
case ('G', 'g'): case ('g', 'G'): return "\0\x0004\0GHgh";
case ('H', 'h'): case ('h', 'H'): return "\0\x0004\0HIhi";
// 'I' and 'i' are missing since depending on the cultuure they may
// have additional mappings.
case ('J', 'j'): case ('j', 'J'): return "\0\x0004\0JKjk";
// 'K' and 'k' are missing since their mapping also includes Kelvin K.
case ('L', 'l'): case ('l', 'L'): return "\0\x0004\0LMlm";
case ('M', 'm'): case ('m', 'M'): return "\0\x0004\0MNmn";
case ('N', 'n'): case ('n', 'N'): return "\0\x0004\0NOno";
case ('O', 'o'): case ('o', 'O'): return "\0\x0004\0OPop";
case ('P', 'p'): case ('p', 'P'): return "\0\x0004\0PQpq";
case ('Q', 'q'): case ('q', 'Q'): return "\0\x0004\0QRqr";
case ('R', 'r'): case ('r', 'R'): return "\0\x0004\0RSrs";
case ('S', 's'): case ('s', 'S'): return "\0\x0004\0STst";
case ('T', 't'): case ('t', 'T'): return "\0\x0004\0TUtu";
case ('U', 'u'): case ('u', 'U'): return "\0\x0004\0UVuv";
case ('V', 'v'): case ('v', 'V'): return "\0\x0004\0VWvw";
case ('W', 'w'): case ('w', 'W'): return "\0\x0004\0WXwx";
case ('X', 'x'): case ('x', 'X'): return "\0\x0004\0XYxy";
case ('Y', 'y'): case ('y', 'Y'): return "\0\x0004\0YZyz";
case ('Z', 'z'): case ('z', 'Z'): return "\0\x0004\0Z[z{";
}
case 1:
// Special-case ASCII characters to avoid the computation/allocation in this very common case.
if (chars[0] < 128)
{
string[] asciiStrings = s_asciiStrings;
if (chars[0] < asciiStrings.Length)
{
return asciiStrings[chars[0]] ??= $"\0\u0002\0{chars[0]}{(char)(chars[0] + 1)}";
}
}
break;

case 2:
// Special-case cased ASCII letter pairs to avoid the computation/allocation in this very common case.
int masked0 = chars[0] | 0x20;
if ((uint)(masked0 - 'a') <= 'z' - 'a' && masked0 == (chars[1] | 0x20))
{
return s_asciiLetterPairStrings[masked0 - 'a'] ??= $"\0\u0004\0{(char)(masked0 & ~0x20)}{(char)((masked0 & ~0x20) + 1)}{(char)masked0}{(char)(masked0 + 1)}";
}
break;
}

// Count how many characters there actually are. All but the very last possible
Expand All @@ -1514,8 +1507,8 @@ internal static unsafe string CharsToStringClass(ReadOnlySpan<char> chars)

// Fill in the set string
span[FlagsIndex] = (char)0;
span[CategoryLengthIndex] = (char)0;
span[SetLengthIndex] = (char)(span.Length - SetStartIndex);
span[CategoryLengthIndex] = (char)0;
int i = SetStartIndex;
foreach (char c in chars)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -417,29 +417,25 @@ protected void EmitTryFindNextPossibleStartingPosition()
// Either anchors weren't specified, or they don't completely root all matches to a specific location.
switch (_regexTree.FindOptimizations.FindMode)
{
case FindNextStartingPositionMode.LeadingPrefix_LeftToRight:
Debug.Assert(!string.IsNullOrEmpty(_regexTree.FindOptimizations.LeadingPrefix));
EmitIndexOf_LeftToRight(_regexTree.FindOptimizations.LeadingPrefix);
case FindNextStartingPositionMode.LeadingString_LeftToRight:
case FindNextStartingPositionMode.FixedDistanceString_LeftToRight:
EmitIndexOf_LeftToRight();
break;

case FindNextStartingPositionMode.LeadingPrefix_RightToLeft:
Debug.Assert(!string.IsNullOrEmpty(_regexTree.FindOptimizations.LeadingPrefix));
EmitIndexOf_RightToLeft(_regexTree.FindOptimizations.LeadingPrefix);
case FindNextStartingPositionMode.LeadingString_RightToLeft:
EmitIndexOf_RightToLeft();
break;

case FindNextStartingPositionMode.LeadingSet_LeftToRight:
case FindNextStartingPositionMode.FixedSets_LeftToRight:
Debug.Assert(_regexTree.FindOptimizations.FixedDistanceSets is { Count: > 0 });
case FindNextStartingPositionMode.FixedDistanceSets_LeftToRight:
EmitFixedSet_LeftToRight();
break;

case FindNextStartingPositionMode.LeadingSet_RightToLeft:
Debug.Assert(_regexTree.FindOptimizations.FixedDistanceSets is { Count: > 0 });
EmitFixedSet_RightToLeft();
break;

case FindNextStartingPositionMode.LiteralAfterLoop_LeftToRight:
Debug.Assert(_regexTree.FindOptimizations.LiteralAfterLoop is not null);
EmitLiteralAfterAtomicLoop();
break;

Expand Down Expand Up @@ -707,16 +703,27 @@ bool EmitAnchors()
return false;
}

// Emits a case-sensitive prefix search for a string at the beginning of the pattern.
void EmitIndexOf_LeftToRight(string prefix)
// Emits a case-sensitive left-to-right search for a substring.
void EmitIndexOf_LeftToRight()
{
RegexFindOptimizations opts = _regexTree.FindOptimizations;
Debug.Assert(opts.FindMode is FindNextStartingPositionMode.LeadingString_LeftToRight or FindNextStartingPositionMode.FixedDistanceString_LeftToRight);

using RentedLocalBuilder i = RentInt32Local();

// int i = inputSpan.Slice(pos).IndexOf(prefix);
Ldloca(inputSpan);
Ldloc(pos);
if (opts.FindMode == FindNextStartingPositionMode.FixedDistanceString_LeftToRight &&
opts.FixedDistanceLiteral is { Distance: > 0 } literal)
{
Ldc(literal.Distance);
Add();
}
Call(s_spanSliceIntMethod);
Ldstr(prefix);
Ldstr(opts.FindMode == FindNextStartingPositionMode.LeadingString_LeftToRight ?
opts.LeadingPrefix :
opts.FixedDistanceLiteral.String!);
Call(s_stringAsSpanMethod);
Call(s_spanIndexOfSpan);
Stloc(i);
Expand All @@ -737,9 +744,12 @@ void EmitIndexOf_LeftToRight(string prefix)
Ret();
}

// Emits a case-sensitive right-to-left prefix search for a string at the beginning of the pattern.
void EmitIndexOf_RightToLeft(string prefix)
// Emits a case-sensitive right-to-left search for a substring.
void EmitIndexOf_RightToLeft()
{
string prefix = _regexTree.FindOptimizations.LeadingPrefix;
Debug.Assert(!string.IsNullOrEmpty(prefix));

// pos = inputSpan.Slice(0, pos).LastIndexOf(prefix);
Ldloca(inputSpan);
Ldc(0);
Expand Down Expand Up @@ -770,6 +780,8 @@ void EmitIndexOf_RightToLeft(string prefix)
// and potentially other sets at other fixed positions in the pattern.
void EmitFixedSet_LeftToRight()
{
Debug.Assert(_regexTree.FindOptimizations.FixedDistanceSets is { Count: > 0 });

List<(char[]? Chars, string Set, int Distance)>? sets = _regexTree.FindOptimizations.FixedDistanceSets;
(char[]? Chars, string Set, int Distance) primarySet = sets![0];
const int MaxSets = 4;
Expand Down Expand Up @@ -967,6 +979,8 @@ void EmitFixedSet_LeftToRight()
// (Currently that position will always be a distance of 0, meaning the start of the pattern itself.)
void EmitFixedSet_RightToLeft()
{
Debug.Assert(_regexTree.FindOptimizations.FixedDistanceSets is { Count: > 0 });

(char[]? Chars, string Set, int Distance) set = _regexTree.FindOptimizations.FixedDistanceSets![0];
Debug.Assert(set.Distance == 0);

Expand Down
Loading