Skip to content

Commit

Permalink
ToTitleCase Perf improvements (#1442)
Browse files Browse the repository at this point in the history
  • Loading branch information
SimonCropp authored Feb 23, 2024
1 parent 3126786 commit 30c2fb6
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 40 deletions.
62 changes: 62 additions & 0 deletions src/Benchmarks/TransformersBenchmarks.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
[MemoryDiagnoser(false)]
public class TransformersBenchmarks
{
// hard-coded seed ensures the same random strings are generated each time.
const int RAND_SEED = 17432;

static readonly char[] _alphabet =
Enumerable
.Repeat((int) ' ', 12)
.Concat(Enumerable.Range('a', 'z' - 'a'))
.Concat(Enumerable.Range('A', 'Z' - 'A'))
.Concat(Enumerable.Range('0', '9' - '0'))
.Concat(new int[]
{
'.',
',',
'(',
')',
'!',
'$'
})
.Select(x => (char) x)
.ToArray();

Random random = new(RAND_SEED);
string input = null!;

[Params(10, 100, 1000)]
public int StringLen;

[GlobalSetup]
public void GlobalSetup()
{
var chars = new char[StringLen];
for (var i = 0; i < StringLen; i++)
{
chars[i] = _alphabet[random.Next(0, _alphabet.Length)];
}

input = new(chars);
}

[Benchmark]
public string AllTransforms() =>
input.Transform(To.LowerCase, To.UpperCase, To.SentenceCase, To.TitleCase);

[Benchmark]
public string LowerCase() =>
input.Transform(To.LowerCase);

[Benchmark]
public string UpperCase() =>
input.Transform(To.UpperCase);

[Benchmark]
public string SentenceCase() =>
input.Transform(To.SentenceCase);

[Benchmark]
public string TitleCase() =>
input.Transform(To.TitleCase);
}
85 changes: 45 additions & 40 deletions src/Humanizer/Transformer/ToTitleCase.cs
Original file line number Diff line number Diff line change
@@ -1,54 +1,59 @@
namespace Humanizer
namespace Humanizer;

class ToTitleCase : ICulturedStringTransformer
{
class ToTitleCase : ICulturedStringTransformer
{
public string Transform(string input) =>
Transform(input, null);
public string Transform(string input) =>
Transform(input, null);

public string Transform(string input, CultureInfo? culture)
{
culture ??= CultureInfo.CurrentCulture;
static Regex regex = new(@"(\w|[^\u0000-\u007F])+'?\w*", RegexOptions.Compiled);

var result = input;
var matches = Regex.Matches(input, @"(\w|[^\u0000-\u007F])+'?\w*");
var firstWord = true;
foreach (Match word in matches)
public string Transform(string input, CultureInfo? culture)
{
culture ??= CultureInfo.CurrentCulture;
var matches = regex.Matches(input);
var builder = new StringBuilder(input);
var textInfo = culture.TextInfo;
foreach (Match word in matches)
{
var value = word.Value;
if (AllCapitals(value) || lookups.Contains(value))
{
if (!AllCapitals(word.Value))
{
result = ReplaceWithTitleCase(word, result, culture, firstWord);
}
firstWord = false;
continue;
}

return result;
builder[word.Index] = textInfo.ToUpper(value[0]);
Overwrite(builder, word.Index + 1, textInfo.ToLower(value[1..]));
}

static bool AllCapitals(string input) =>
input.All(char.IsUpper);

static string ReplaceWithTitleCase(Match word, string source, CultureInfo culture, bool firstWord)
{
var articles = new List<string> { "a", "an", "the" };
var conjunctions = new List<string> { "and", "as", "but", "if", "nor", "or", "so", "yet" };
var prepositions = new List<string> { "as", "at", "by", "for", "in", "of", "off", "on", "to", "up", "via" };

var wordToConvert = word.Value;
string replacement;
return builder.ToString();
}

if (firstWord ||
(!articles.Contains(wordToConvert) &&
!conjunctions.Contains(wordToConvert) &&
!prepositions.Contains(wordToConvert)))
{
replacement = culture.TextInfo.ToUpper(wordToConvert[0]) + culture.TextInfo.ToLower(wordToConvert.Remove(0, 1));
static void Overwrite(StringBuilder builder, int index, string replacement) =>
builder
.Remove(index, replacement.Length)
.Insert(index, replacement);

}
else
static bool AllCapitals(string input)
{
foreach (var ch in input)
{
if (!char.IsUpper(ch))
{
replacement = culture.TextInfo.ToLower(wordToConvert);
return false;
}
return source.Substring(0, word.Index) + replacement + source.Substring(word.Index + word.Length);
}

return true;
}

static FrozenSet<string> lookups;

static ToTitleCase()
{
var articles = new List<string> { "a", "an", "the" };
var conjunctions = new List<string> { "and", "as", "but", "if", "nor", "or", "so", "yet" };
var prepositions = new List<string> { "as", "at", "by", "for", "in", "of", "off", "on", "to", "up", "via" };

lookups = articles.Concat(conjunctions).Concat(prepositions).ToFrozenSet();
}
}
}

0 comments on commit 30c2fb6

Please sign in to comment.