Skip to content

Commit

Permalink
Namespace and ClassName sanitization helpers;
Browse files Browse the repository at this point in the history
DevelopmentDependency=true
  • Loading branch information
yugabe committed Feb 13, 2024
1 parent 0ed087d commit 9a12eae
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 1 deletion.
65 changes: 64 additions & 1 deletion src/Analyzers.Core/CodeAnalysis/TextProcessing.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
namespace PodNet.Analyzers.CodeAnalysis;
using System.Text.RegularExpressions;

namespace PodNet.Analyzers.CodeAnalysis;

/// <summary>Contains helpers to process text values common in analyzer authoring.</summary>
public static class TextProcessing
Expand All @@ -10,4 +12,65 @@ public static string TrimAttributeSuffix(string attributeName)
=> attributeName.EndsWith("Attribute") && attributeName.Length > "Attribute".Length
? attributeName[..^"Attribute".Length]
: attributeName;

/// <summary>
/// Sanitizes the given <paramref name="namespaceCandidate"/> to adhere to basic namespacing rules:
/// <list type="bullet">
/// <item>replaces consequtive path and segment separator characters ("separators") <c>'\'</c>, <c>'/'</c> and <c>'.'</c> with a single <c>'.'</c>,</item>
/// <item>replaces characters that are not letters, numbers, underscores or separators each with one <c>'_'</c>,</item>
/// <item>removes leading and trailing separators,</item>
/// <item>appends an <c>'_'</c> before leading numbers in each segment.</item>
/// </list>
/// </summary>
/// <param name="namespaceCandidate">The namespace to sanitize. Can be a relative path, but make sure that the path doesn't contain the filename itself, as it would become multiple namespace segments itself. It's advised to get make the path relative to the file's containing folder instead. See also <seealso cref="PathProcessing.GetRelativePath(string, string)"/>.</param>
/// <returns>The sanitized namespace. Can only contain letters, (non-leading) numbers, underscores and dots. Can be empty, but cannot be whitespace or <see langword="null"/>.</returns>
public static string GetNamespace(string namespaceCandidate)
=> NamespaceSanitizer.Replace(namespaceCandidate, static match =>
{
if (match.Groups["leadingOrTrailingDots"].Success)
return "";
if (match.Groups["namespaceSegmentLeadingNumber"].Success)
return $"{(match.Groups["namespaceSegmentLeadingNumberDot"].Success ? "." : "")}_{match.Groups["namespaceSegmentLeadingNumber"].Value}";
if (match.Groups["pathOrSegmentSeparators"].Success)
return ".";
if (match.Groups["invalidChars"] is { Success: true, Length: var invalidCharCount })
return new('_', invalidCharCount);
throw new InvalidOperationException("Regex pattern matched, but none of the provided cases handled the match.");
});

private static Regex NamespaceSanitizer { get; } = new Regex(@"(?<leadingOrTrailingDots>^[./\\]+|[./\\]+$)|(?:(?:^|(?<namespaceSegmentLeadingNumberDot>[./\\]+))(?<namespaceSegmentLeadingNumber>\d))|(?<pathOrSegmentSeparators>[./\\]+)|(?<invalidChars>[^._\w\d]+)", RegexOptions.Compiled
#if !DEBUG
, TimeSpan.FromMilliseconds(100));
#else
);
#endif

/// <summary>
/// Sanitizes the given <paramref name="classNameCandidate"/> to adhere to basic class naming rules:
/// <list type="bullet">
/// <item>replaces characters that are not letters, numbers or underscores each with one <c>'_'</c>,</item>
/// <item>if <paramref name="classNameCandidate"/> starts with a number, appends an <c>'_'</c> before it,</item>
/// <item>if <paramref name="classNameCandidate"/> is empty, returns <c>'_'</c>.</item>
/// </list>
/// </summary>
/// <param name="classNameCandidate">The classname to sanitize. Possibly a file name without the extension (see <see cref="Path.GetFileNameWithoutExtension(string)"/>).</param>
/// <returns>The sanitized class name. Can only contain letters, (non-leading) numbers and underscores. Can not be empty, whitespace or <see langword="null"/>.</returns>
public static string GetClassName(string classNameCandidate)
=> ClassNameSanitizer.Replace(classNameCandidate, static match =>
{
if (match.Groups["leadingNumber"] is { Success: true, Value: var leadingNumber })
return $"_{leadingNumber}";
if (match.Groups["invalidChars"] is { Success: true, Length: var invalidCharCount })
return new('_', invalidCharCount);
if (match.Groups["isEmpty"].Success)
return "_";
throw new InvalidOperationException("Regex pattern matched, but none of the provided cases handled the match.");
});

private static Regex ClassNameSanitizer { get; } = new Regex(@"(?:^(?<leadingNumber>\d))|(?<invalidChars>[^_\w\d]+)|(?<isEmpty>^$)", RegexOptions.Compiled
#if !DEBUG
, TimeSpan.FromMilliseconds(100));
#else
);
#endif
}
3 changes: 3 additions & 0 deletions src/Analyzers.Core/build/PodNet.Analyzers.Core.props
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
<PodNetIncludeAnalyzerCore>true</PodNetIncludeAnalyzerCore>
<PodNetIncludeLibPlaceholder>true</PodNetIncludeLibPlaceholder>

<!-- Only means the resulting package will be set to PrivateAssets="all", IncludeAssets="runtime; build; native; contentfiles; analyzers" when installed via VS or CLI. Consumer can opt out by editing the .csproj file, and the analyzer can opt out by setting this value to false. Info: https://github.com/NuGet/Home/wiki/DevelopmentDependency-support-for-PackageReference -->
<DevelopmentDependency>true</DevelopmentDependency>

<!-- Analyzers don't get referenced directly by user code. -->
<GenerateDocumentationFile>false</GenerateDocumentationFile>
</PropertyGroup>
Expand Down
33 changes: 33 additions & 0 deletions tests/Analyzers.Tests/TextProcessingTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,37 @@ public void TrimAttributeSuffix_TrimsAsExpected(string input, string? expected =
expected ??= input;
Assert.AreEqual(expected, actual);
}

[DataTestMethod]
[DataRow("", "")]
[DataRow("Namespace", "Namespace")]
[DataRow("Namespace.SubNamespace", "Namespace.SubNamespace")]
[DataRow("./Folder", "Folder")]
[DataRow("A/B", "A.B")]
[DataRow("A.B", "A.B")]
[DataRow("..//../A/\\B/.\\", "A.B")]
[DataRow("..\\..\\", "")]
[DataRow("A & B/C & D", "A___B.C___D")]
[DataRow("11", "_11")]
[DataRow("11/22", "_11._22")]
[DataRow("../ A & B.0//", "_A___B._0")]
[DataRow("./Folder/File.ext", "Folder.File.ext")]
public void NamespaceSanitization_WorksAsExpected(string input, string expected)
{
var actual = TextProcessing.GetNamespace(input);
Assert.AreEqual(expected, actual);
}


[DataTestMethod]
[DataRow("", "_")]
[DataRow("Filename", "Filename")]
[DataRow("Filename.ext", "Filename_ext")]
[DataRow("1 File", "_1_File")]
[DataRow("./Folder/1 File .ext", "__Folder_1_File__ext")]
public void ClassNameSanitization_WorksAsExpected(string input, string expected)
{
var actual = TextProcessing.GetClassName(input);
Assert.AreEqual(expected, actual);
}
}

0 comments on commit 9a12eae

Please sign in to comment.