Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -284,4 +284,5 @@ __pycache__/
*.btp.cs
*.btm.cs
*.odx.cs
*.xsd.cs
*.xsd.cs
/tests/Slugify.Core.Benchmarks/BenchmarkDotNet.Artifacts/*
34 changes: 21 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,21 @@ PM> Install-Package Slugify.Core

Or running `dotnet add package Slugify.Core` from the command line.

Upgrading from 4.x to 5.x
-----------------------------

* 5.0 is significantly faster and uses less memory. 2-3x faster than version 4 and allocates about 20% less memory.
* `DeniedCharactersRegex` is no longer a string, and it now takes in a `Regex` object. This will allow you to use Source Generated regexes on platforms that support them. Using something like:
```csharp
[GeneratedRegex(@"[^a-z0-9\-\._]")]
private static partial Regex GeneratedRegex();
```
* Using a generated regex version will be ever so slightly the fastest way of generating a slug on platforms that support it. However it will use a bit more memory.
* The way tab and new-line characters are handled has been changed. They are no longer translated to `-` by default and will instead be stripped. This will only be a noticeable change if you have disabled `CollapseDashes` (which is not the default).
* The option to disable collapsing whitespace has been removed.
* `AllowedChars` is renamed to `AllowedCharacters`.


Upgrading from 2.x to 3.x
-------------------------

Expand Down Expand Up @@ -104,11 +119,6 @@ This specifies whether the output string should be converted to lower-case. If s

- Default value: `true`

### `CollapseWhiteSpace`
This specifies whether consecutive whitespace should be replaced by just one space (`" "`). The whitespace will be collapsed before any other character replacements are being made.

- Default value: `true`

### `TrimWhitespace`
This specifies whether leading and trailing whitespace should be removed from the input string. The whitespace will be trimmed before any other character replacements are being made.

Expand Down Expand Up @@ -148,7 +158,7 @@ String replacements are being made after whitespace has been trimmed and collaps
config.StringReplacements.Add("ß", "ss");
```

### `AllowedChars`
### `AllowedCharacters`
Set of characters that are allowed in the slug, which will be kept when the input string is being processed. By default, this contains all ASCII characters, the full stop, the dash and the underscore. This is the preferred way of controlling which characters should be replaced when generating the slug.

Characters that are not allowed will be replaced after string replacements are completed.
Expand All @@ -162,17 +172,15 @@ Characters that are not allowed will be replaced after string replacements are c
var config = new SlugHelperConfiguration();

// add individual characters to the list of allowed characters
config.AllowedChars.Add('!');
config.AllowedCharacters.Add('!');

// remove previously added or default characters
config.AllowedChars.Remove('.');
config.AllowedCharacters.Remove('.');
```

### `DeniedCharactersRegex`
Alternative method of specifying which characters will be allowed in the slug, which will replace the functionality of the `AllowedChars` set. The value must be a valid regular expression that specifies which characters *are to be removed*. Every match of this regular expression in the input string will be removed. The removal happens after string replacements are completed.

This functionality is kept in place for legacy compatibility reasons and since it relies on regular expressions, it will perform worse than using the `AllowedChars` way of specifying.

Specifying the `DeniedCharactersRegex` option will disable the character removal behavior from the `AllowedChars` option.

- Default value: `null`
Expand All @@ -183,21 +191,21 @@ Specifying the `DeniedCharactersRegex` option will disable the character removal
var helper = new SlugHelper(new SlugHelperConfiguration
{
// this is equivalent to the default behavior from `AllowChars`
DeniedCharactersRegex = "[^a-zA-Z0-9._-]"
DeniedCharactersRegex = new(@"[^a-zA-Z0-9._-]")
});
Console.WriteLine(helper.GenerateSlug("OLA ke ase!")); // "ola-ke-ase"

helper = new SlugHelper(new SlugHelperConfiguration
{
// remove certain characters explicitly
DeniedCharactersRegex = @"[abcdef]"
DeniedCharactersRegex = new(@"[abcdef]")
});
Console.WriteLine(helper.GenerateSlug("abcdefghijk")); // "ghijk"

helper = new SlugHelper(new SlugHelperConfiguration
{
// remove more complex matches
DeniedCharactersRegex = @"foo|bar"
DeniedCharactersRegex = new(@"foo|bar")
});
Console.WriteLine(helper.GenerateSlug("this is an foo example")); // "this-is-an-example"
```
4 changes: 2 additions & 2 deletions Slugify.sln
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.30011.22
# Visual Studio Version 17
VisualStudioVersion = 17.11.34909.67
MinimumVisualStudioVersion = 10.0.40219.1
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{321AAEAA-F675-4B15-80B5-3FF2E6A15602}"
EndProject
Expand Down
17 changes: 14 additions & 3 deletions src/Slugify.Core/ISlugHelper.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
namespace Slugify;

/// <summary>
/// Provides configuration settings for SlugHelper and generates a URL-friendly slug from a given string.
/// The slug is created by normalizing and replacing characters.
/// </summary>
public interface ISlugHelper
{
/// <summary>
/// Generates a slug from the provided <paramref name="inputString"/>
/// Holds the configuration settings for the SlugHelper. It can be accessed and modified through its getter and
/// setter.
/// </summary>
/// <param name="inputString">The string to slugify</param>
/// <returns>A slugified version of <paramref name="inputString"/></returns>
SlugHelperConfiguration Config { get; set; }

/// <summary>
/// Generates a URL-friendly slug from the provided string by normalizing and replacing characters.
/// </summary>
/// <param name="inputString">The string to be transformed into a slug format.</param>
/// <returns>A string that represents the slug version of the input, with specified transformations applied.</returns>
/// <exception cref="ArgumentNullException">Thrown when the input string is null.</exception>
string GenerateSlug(string inputString);
}
201 changes: 41 additions & 160 deletions src/Slugify.Core/SlugHelper.cs
Original file line number Diff line number Diff line change
@@ -1,209 +1,90 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Text;
using System.Text.RegularExpressions;

namespace Slugify;


/// <summary>
/// Generates a URL-friendly slug from a given string by normalizing and replacing characters.
/// </summary>
/// <param name="config">Specifies the configuration options for generating the slug, including transformations and allowed characters.</param>
public class SlugHelper(SlugHelperConfiguration config) : ISlugHelper
{
private static readonly Dictionary<string, Regex> _deleteRegexMap = [];
private static readonly Lazy<SlugHelperConfiguration> _defaultConfig = new(() => new SlugHelperConfiguration());

protected SlugHelperConfiguration Config { get; set; } = config ?? throw new ArgumentNullException(nameof(config), "can't be null use default config or empty constructor.");
protected static readonly SlugHelperConfiguration _defaultConfig = new();
protected static readonly Dictionary<string, Regex> _deleteRegexMap = [];
public SlugHelperConfiguration Config { get; set; } = config ?? throw new ArgumentNullException(nameof(config), "can't be null use default config or empty constructor.");

public SlugHelper() : this(_defaultConfig.Value) { }
public SlugHelper() : this(_defaultConfig) { }

/// <summary>
/// Implements <see cref="ISlugHelper.GenerateSlug(string)"/>
/// Generates a URL-friendly slug from the provided string by normalizing and replacing characters.
/// </summary>
/// <param name="inputString">The string to be transformed into a slug format.</param>
/// <returns>A string that represents the slug version of the input, with specified transformations applied.</returns>
/// <exception cref="ArgumentNullException">Thrown when the input string is null.</exception>
public virtual string GenerateSlug(string inputString)
{
var sb = new StringBuilder();

// First we trim and lowercase if necessary
PrepareStringBuilder(inputString.Normalize(NormalizationForm.FormD), sb);
ApplyStringReplacements(sb);
RemoveNonSpacingMarks(sb);

if (Config.DeniedCharactersRegex == null)
if (inputString is null)
{
RemoveNotAllowedCharacters(sb);
throw new ArgumentNullException(nameof(inputString));
}

// For backwards compatibility
if (Config.DeniedCharactersRegex != null)
{
if (!_deleteRegexMap.TryGetValue(Config.DeniedCharactersRegex, out var deniedCharactersRegex))
{
deniedCharactersRegex = new Regex(Config.DeniedCharactersRegex, RegexOptions.Compiled);
_deleteRegexMap.Add(Config.DeniedCharactersRegex, deniedCharactersRegex);
}

var currentValue = sb.ToString();
sb.Clear();
sb.Append(DeleteCharacters(currentValue, deniedCharactersRegex));
}
var normalizedInput = inputString.Normalize(NormalizationForm.FormD);

if (Config.CollapseDashes)
{
CollapseDashes(sb);
}
normalizedInput = Config.TrimWhitespace ? normalizedInput.Trim() : normalizedInput;
normalizedInput = Config.ForceLowerCase ? normalizedInput.ToLower() : normalizedInput;

return sb.ToString();
}
var sb = new StringBuilder(normalizedInput);

private void PrepareStringBuilder(string inputString, StringBuilder sb)
{
var seenFirstNonWhitespace = false;
var indexOfLastNonWhitespace = 0;
for (var i = 0; i < inputString.Length; i++)
{
// first, clean whitepace
var c = inputString[i];
var isWhitespace = char.IsWhiteSpace(c);
if (!seenFirstNonWhitespace && isWhitespace)
{
if (Config.TrimWhitespace)
{
continue;
}
else
{
sb.Append(c);
}
}
else
{
seenFirstNonWhitespace = true;
if (!isWhitespace)
{
indexOfLastNonWhitespace = sb.Length;
}
else
{
c = ' ';

if (Config.CollapseWhiteSpace)
{
while ((i + 1) < inputString.Length && char.IsWhiteSpace(inputString[i + 1]))
{
i++;
}
}
}
if (Config.ForceLowerCase)
{
c = char.ToLower(c);
}

sb.Append(c);
}
}

if (Config.TrimWhitespace)
{
sb.Length = indexOfLastNonWhitespace + 1;
}
}

private void ApplyStringReplacements(StringBuilder sb)
{
foreach (var replacement in Config.StringReplacements)
{
var search = replacement.Key.Normalize(NormalizationForm.FormD);
var replace = replacement.Value.Normalize(NormalizationForm.FormD);

for (var i = 0; i < sb.Length; i++)
{
if (SubstringEquals(sb, i, search))
{
sb.Remove(i, search.Length);
sb.Insert(i, replace);

i += replace.Length - 1;
}
}
}
}

private static bool SubstringEquals(StringBuilder sb, int index, string toMatch)
{
if (sb.Length - index < toMatch.Length)
{
return false;
}

for (var i = index; i < sb.Length; i++)
{
var matchIndex = i - index;

if (matchIndex == toMatch.Length)
{
return true;
}
else if (sb[i] != toMatch[matchIndex])
{
return false;
}
sb.Replace(search, replace);
}
return (sb.Length - index) == toMatch.Length;
}

// Thanks http://stackoverflow.com/a/249126!
protected static void RemoveNonSpacingMarks(StringBuilder sb)
{
for (var ich = 0; ich < sb.Length; ich++)
if (Config.DeniedCharactersRegex == null)
{
if (CharUnicodeInfo.GetUnicodeCategory(sb[ich]) == UnicodeCategory.NonSpacingMark)
var allowedChars = Config.AllowedCharacters;
for (int i = 0; i < sb.Length;)
{
sb.Remove(ich, 1);
ich--;
if (!allowedChars.Contains(sb[i]))
{
sb.Remove(i, 1);
}
else
{
i++;
}
}
}
}

protected void RemoveNotAllowedCharacters(StringBuilder sb)
{
// perf!
var allowedChars = Config.AllowedChars;
for (var i = 0; i < sb.Length; i++)
else // Back compat regex
{
if (!allowedChars.Contains(sb[i]))
{
sb.Remove(i, 1);
i--;
}
var currentValue = sb.ToString();
sb.Clear();
sb.Insert(0, Config.DeniedCharactersRegex.Replace(currentValue, string.Empty));
}
}

protected static void CollapseDashes(StringBuilder sb)
{
var firstDash = true;
for (var i = 0; i < sb.Length; i++)
if (Config.CollapseDashes)
{
// first, clean whitepace
if (sb[i] == '-')
for (int i = 0; i < sb.Length - 1;)
{
if (firstDash)
if (sb[i] == '-' && sb[i + 1] == '-')
{
firstDash = false;
sb.Remove(i, 1);
}
else
{
sb.Remove(i, 1);
i--;
i++;
}
}
else
{
firstDash = true;
}
}

return sb.ToString();
}

protected static string DeleteCharacters(string str, Regex deniedCharactersRegex) => deniedCharactersRegex.Replace(str, string.Empty);
}

Loading