Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ToTitleCase Perf improvements #1442

Merged
merged 14 commits into from
Feb 23, 2024
62 changes: 62 additions & 0 deletions src/Benchmarks/TransformersBenchmarks.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
[MemoryDiagnoser(false)]
public class TransformersBenchmarks
{
// hard-coded seed ensures the same random strings are generated each time.
const int RAND_SEED = 17432;

static readonly char[] _alphabet =
Enumerable
.Repeat((int) ' ', 12)
.Concat(Enumerable.Range('a', 'z' - 'a'))
.Concat(Enumerable.Range('A', 'Z' - 'A'))
.Concat(Enumerable.Range('0', '9' - '0'))
.Concat(new int[]
{
'.',
',',
'(',
')',
'!',
'$'
})
.Select(x => (char) x)
.ToArray();

Random random = new(RAND_SEED);
string input = null!;

[Params(10, 100, 1000)]
public int StringLen;

[GlobalSetup]
public void GlobalSetup()
{
var chars = new char[StringLen];
for (var i = 0; i < StringLen; i++)
{
chars[i] = _alphabet[random.Next(0, _alphabet.Length)];
}

input = new(chars);
}

[Benchmark]
public string AllTransforms() =>
input.Transform(To.LowerCase, To.UpperCase, To.SentenceCase, To.TitleCase);

[Benchmark]
public string LowerCase() =>
input.Transform(To.LowerCase);

[Benchmark]
public string UpperCase() =>
input.Transform(To.UpperCase);

[Benchmark]
public string SentenceCase() =>
input.Transform(To.SentenceCase);

[Benchmark]
public string TitleCase() =>
input.Transform(To.TitleCase);
}
85 changes: 45 additions & 40 deletions src/Humanizer/Transformer/ToTitleCase.cs
Original file line number Diff line number Diff line change
@@ -1,54 +1,59 @@
namespace Humanizer
namespace Humanizer;

class ToTitleCase : ICulturedStringTransformer
{
class ToTitleCase : ICulturedStringTransformer
{
public string Transform(string input) =>
Transform(input, null);
public string Transform(string input) =>
Transform(input, null);

public string Transform(string input, CultureInfo? culture)
{
culture ??= CultureInfo.CurrentCulture;
static Regex regex = new(@"(\w|[^\u0000-\u007F])+'?\w*", RegexOptions.Compiled);

var result = input;
var matches = Regex.Matches(input, @"(\w|[^\u0000-\u007F])+'?\w*");
var firstWord = true;
foreach (Match word in matches)
public string Transform(string input, CultureInfo? culture)
{
culture ??= CultureInfo.CurrentCulture;
var matches = regex.Matches(input);
var builder = new StringBuilder(input);
var textInfo = culture.TextInfo;
foreach (Match word in matches)
{
var value = word.Value;
if (AllCapitals(value) || lookups.Contains(value))
{
if (!AllCapitals(word.Value))
{
result = ReplaceWithTitleCase(word, result, culture, firstWord);
}
firstWord = false;
continue;
}

return result;
builder[word.Index] = textInfo.ToUpper(value[0]);
Overwrite(builder, word.Index + 1, textInfo.ToLower(value[1..]));
}

static bool AllCapitals(string input) =>
input.All(char.IsUpper);

static string ReplaceWithTitleCase(Match word, string source, CultureInfo culture, bool firstWord)
{
var articles = new List<string> { "a", "an", "the" };
var conjunctions = new List<string> { "and", "as", "but", "if", "nor", "or", "so", "yet" };
var prepositions = new List<string> { "as", "at", "by", "for", "in", "of", "off", "on", "to", "up", "via" };

var wordToConvert = word.Value;
string replacement;
return builder.ToString();
}

if (firstWord ||
(!articles.Contains(wordToConvert) &&
!conjunctions.Contains(wordToConvert) &&
!prepositions.Contains(wordToConvert)))
{
replacement = culture.TextInfo.ToUpper(wordToConvert[0]) + culture.TextInfo.ToLower(wordToConvert.Remove(0, 1));
static void Overwrite(StringBuilder builder, int index, string replacement) =>
builder
.Remove(index, replacement.Length)
.Insert(index, replacement);

}
else
static bool AllCapitals(string input)
{
foreach (var ch in input)
{
if (!char.IsUpper(ch))
{
replacement = culture.TextInfo.ToLower(wordToConvert);
return false;
}
return source.Substring(0, word.Index) + replacement + source.Substring(word.Index + word.Length);
}

return true;
}

static FrozenSet<string> lookups;

static ToTitleCase()
{
var articles = new List<string> { "a", "an", "the" };
var conjunctions = new List<string> { "and", "as", "but", "if", "nor", "or", "so", "yet" };
var prepositions = new List<string> { "as", "at", "by", "for", "in", "of", "off", "on", "to", "up", "via" };

lookups = articles.Concat(conjunctions).Concat(prepositions).ToFrozenSet();
}
}
}