-
Notifications
You must be signed in to change notification settings - Fork 966
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ToTitleCase Perf improvements (#1442)
- Loading branch information
1 parent
3126786
commit 30c2fb6
Showing
2 changed files
with
107 additions
and
40 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
[MemoryDiagnoser(false)] | ||
public class TransformersBenchmarks | ||
{ | ||
// hard-coded seed ensures the same random strings are generated each time. | ||
const int RAND_SEED = 17432; | ||
|
||
static readonly char[] _alphabet = | ||
Enumerable | ||
.Repeat((int) ' ', 12) | ||
.Concat(Enumerable.Range('a', 'z' - 'a')) | ||
.Concat(Enumerable.Range('A', 'Z' - 'A')) | ||
.Concat(Enumerable.Range('0', '9' - '0')) | ||
.Concat(new int[] | ||
{ | ||
'.', | ||
',', | ||
'(', | ||
')', | ||
'!', | ||
'$' | ||
}) | ||
.Select(x => (char) x) | ||
.ToArray(); | ||
|
||
Random random = new(RAND_SEED); | ||
string input = null!; | ||
|
||
[Params(10, 100, 1000)] | ||
public int StringLen; | ||
|
||
[GlobalSetup] | ||
public void GlobalSetup() | ||
{ | ||
var chars = new char[StringLen]; | ||
for (var i = 0; i < StringLen; i++) | ||
{ | ||
chars[i] = _alphabet[random.Next(0, _alphabet.Length)]; | ||
} | ||
|
||
input = new(chars); | ||
} | ||
|
||
[Benchmark] | ||
public string AllTransforms() => | ||
input.Transform(To.LowerCase, To.UpperCase, To.SentenceCase, To.TitleCase); | ||
|
||
[Benchmark] | ||
public string LowerCase() => | ||
input.Transform(To.LowerCase); | ||
|
||
[Benchmark] | ||
public string UpperCase() => | ||
input.Transform(To.UpperCase); | ||
|
||
[Benchmark] | ||
public string SentenceCase() => | ||
input.Transform(To.SentenceCase); | ||
|
||
[Benchmark] | ||
public string TitleCase() => | ||
input.Transform(To.TitleCase); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,54 +1,59 @@ | ||
namespace Humanizer | ||
namespace Humanizer; | ||
|
||
class ToTitleCase : ICulturedStringTransformer | ||
{ | ||
class ToTitleCase : ICulturedStringTransformer | ||
{ | ||
public string Transform(string input) => | ||
Transform(input, null); | ||
public string Transform(string input) => | ||
Transform(input, null); | ||
|
||
public string Transform(string input, CultureInfo? culture) | ||
{ | ||
culture ??= CultureInfo.CurrentCulture; | ||
static Regex regex = new(@"(\w|[^\u0000-\u007F])+'?\w*", RegexOptions.Compiled); | ||
|
||
var result = input; | ||
var matches = Regex.Matches(input, @"(\w|[^\u0000-\u007F])+'?\w*"); | ||
var firstWord = true; | ||
foreach (Match word in matches) | ||
public string Transform(string input, CultureInfo? culture) | ||
{ | ||
culture ??= CultureInfo.CurrentCulture; | ||
var matches = regex.Matches(input); | ||
var builder = new StringBuilder(input); | ||
var textInfo = culture.TextInfo; | ||
foreach (Match word in matches) | ||
{ | ||
var value = word.Value; | ||
if (AllCapitals(value) || lookups.Contains(value)) | ||
{ | ||
if (!AllCapitals(word.Value)) | ||
{ | ||
result = ReplaceWithTitleCase(word, result, culture, firstWord); | ||
} | ||
firstWord = false; | ||
continue; | ||
} | ||
|
||
return result; | ||
builder[word.Index] = textInfo.ToUpper(value[0]); | ||
Overwrite(builder, word.Index + 1, textInfo.ToLower(value[1..])); | ||
} | ||
|
||
static bool AllCapitals(string input) => | ||
input.All(char.IsUpper); | ||
|
||
static string ReplaceWithTitleCase(Match word, string source, CultureInfo culture, bool firstWord) | ||
{ | ||
var articles = new List<string> { "a", "an", "the" }; | ||
var conjunctions = new List<string> { "and", "as", "but", "if", "nor", "or", "so", "yet" }; | ||
var prepositions = new List<string> { "as", "at", "by", "for", "in", "of", "off", "on", "to", "up", "via" }; | ||
|
||
var wordToConvert = word.Value; | ||
string replacement; | ||
return builder.ToString(); | ||
} | ||
|
||
if (firstWord || | ||
(!articles.Contains(wordToConvert) && | ||
!conjunctions.Contains(wordToConvert) && | ||
!prepositions.Contains(wordToConvert))) | ||
{ | ||
replacement = culture.TextInfo.ToUpper(wordToConvert[0]) + culture.TextInfo.ToLower(wordToConvert.Remove(0, 1)); | ||
static void Overwrite(StringBuilder builder, int index, string replacement) => | ||
builder | ||
.Remove(index, replacement.Length) | ||
.Insert(index, replacement); | ||
|
||
} | ||
else | ||
static bool AllCapitals(string input) | ||
{ | ||
foreach (var ch in input) | ||
{ | ||
if (!char.IsUpper(ch)) | ||
{ | ||
replacement = culture.TextInfo.ToLower(wordToConvert); | ||
return false; | ||
} | ||
return source.Substring(0, word.Index) + replacement + source.Substring(word.Index + word.Length); | ||
} | ||
|
||
return true; | ||
} | ||
|
||
static FrozenSet<string> lookups; | ||
|
||
static ToTitleCase() | ||
{ | ||
var articles = new List<string> { "a", "an", "the" }; | ||
var conjunctions = new List<string> { "and", "as", "but", "if", "nor", "or", "so", "yet" }; | ||
var prepositions = new List<string> { "as", "at", "by", "for", "in", "of", "off", "on", "to", "up", "via" }; | ||
|
||
lookups = articles.Concat(conjunctions).Concat(prepositions).ToFrozenSet(); | ||
} | ||
} | ||
} |