Skip to content

Fix and optimize EscapeUnescapeIri #32025

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 37 additions & 40 deletions src/libraries/System.Private.Uri/src/System/IriHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -109,19 +109,11 @@ internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end
ValueStringBuilder dest = new ValueStringBuilder(size);
byte[]? bytes = null;

const int percentEncodingLen = 3; // Escaped UTF-8 will take 3 chars: %AB.
int bufferRemaining = 0;

int next = start;
char ch;
bool escape = false;
bool surrogatePair = false;

for (; next < end; ++next)
{
escape = false;
surrogatePair = false;

if ((ch = pInput[next]) == '%')
{
if (next + 2 < end)
Expand Down Expand Up @@ -226,56 +218,61 @@ internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end
{
// unicode

char ch2;
bool escape;
bool surrogatePair = false;

char ch2 = '\0';

if ((char.IsHighSurrogate(ch)) && (next + 1 < end))
{
ch2 = pInput[next + 1];
escape = !CheckIriUnicodeRange(ch, ch2, ref surrogatePair, component == UriComponents.Query);
if (!escape)
{
// copy the two chars
dest.Append(pInput[next++]);
dest.Append(pInput[next]);
}
}
else
{
if (CheckIriUnicodeRange(ch, component == UriComponents.Query))
escape = !CheckIriUnicodeRange(ch, component == UriComponents.Query);
}

if (escape)
{
Span<byte> encodedBytes = stackalloc byte[4];

Rune rune;
if (surrogatePair)
{
// copy it
dest.Append(pInput[next]);
rune = new Rune(ch, ch2);
}
else
else if (!Rune.TryCreate(ch, out rune))
{
// escape it
escape = true;
rune = Rune.ReplacementChar;
}
}
}
else
{
// just copy the character
dest.Append(pInput[next]);
}

if (escape)
{
const int MaxNumberOfBytesEncoded = 4;
int bytesWritten = rune.EncodeToUtf8(encodedBytes);
encodedBytes = encodedBytes.Slice(0, bytesWritten);

byte[] encodedBytes = new byte[MaxNumberOfBytesEncoded];
fixed (byte* pEncodedBytes = &encodedBytes[0])
foreach (byte b in encodedBytes)
{
UriHelper.EscapeAsciiChar(b, ref dest);
}
}
else
{
int encodedBytesCount = Encoding.UTF8.GetBytes(pInput + next, surrogatePair ? 2 : 1, pEncodedBytes, MaxNumberOfBytesEncoded);
Debug.Assert(encodedBytesCount <= MaxNumberOfBytesEncoded, "UTF8 encoder should not exceed specified byteCount");

bufferRemaining -= encodedBytesCount * percentEncodingLen;

for (int count = 0; count < encodedBytesCount; ++count)
dest.Append(ch);
if (surrogatePair)
{
UriHelper.EscapeAsciiChar(encodedBytes[count], ref dest);
dest.Append(ch2);
}
}

if (surrogatePair)
{
next++;
}
}
else
{
// just copy the character
dest.Append(pInput[next]);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
using System.Collections.Generic;
using Xunit;

namespace System.PrivateUri.Tests
{
public class EscapeUnescapeIriTests
{
public static IEnumerable<object[]> ReplacesStandaloneSurrogatesWithReplacementChar()
{
const string UrlEncodedReplacementChar = "%EF%BF%BD";
const string HighSurrogate = "\ud83f";
const string LowSurrogate = "\udffe";

yield return new object[] { "a", "a" };
yield return new object[] { HighSurrogate + LowSurrogate, "%F0%9F%BF%BE" };
yield return new object[] { HighSurrogate, UrlEncodedReplacementChar };
yield return new object[] { LowSurrogate, UrlEncodedReplacementChar };
yield return new object[] { LowSurrogate + HighSurrogate, UrlEncodedReplacementChar + UrlEncodedReplacementChar };
yield return new object[] { LowSurrogate + LowSurrogate, UrlEncodedReplacementChar + UrlEncodedReplacementChar };
yield return new object[] { HighSurrogate + HighSurrogate, UrlEncodedReplacementChar + UrlEncodedReplacementChar };
}

[Theory]
[MemberData(nameof(ReplacesStandaloneSurrogatesWithReplacementChar))]
public static void ReplacesStandaloneSurrogatesWithReplacementChar(string input, string expected)
{
const string Prefix = "scheme:";
Uri uri = new Uri(Prefix + input);
string actual = uri.AbsoluteUri.Substring(Prefix.Length);
Assert.Equal(expected, actual);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
</PropertyGroup>
<ItemGroup>
<Compile Include="AppxUriValue.cs" />
<Compile Include="EscapeUnescapeIriTests.cs" />
<Compile Include="IdnCheckHostNameTest.cs" />
<Compile Include="IdnDnsSafeHostTest.cs" />
<Compile Include="IdnHostNameValidationTest.cs" />
Expand Down