Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Refactor Encoding to split fast-path and fallback logic #23098

Merged
merged 15 commits into from
Mar 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\StringSplitOptions.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SystemException.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\ASCIIEncoding.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\ASCIIUtility.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\StringBuilderCache.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\CodePageDataItem.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Decoder.cs" />
Expand All @@ -775,6 +776,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderReplacementFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Encoding.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Encoding.Internal.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingData.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingInfo.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingNLS.cs" />
Expand Down
4 changes: 2 additions & 2 deletions src/System.Private.CoreLib/shared/System/String.cs
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ internal static unsafe string CreateStringFromEncoding(
Debug.Assert(byteLength >= 0);

// Get our string length
int stringLength = encoding.GetCharCount(bytes, byteLength, null);
int stringLength = encoding.GetCharCount(bytes, byteLength);
Debug.Assert(stringLength >= 0, "stringLength >= 0");

// They gave us an empty string if they needed one
Expand All @@ -491,7 +491,7 @@ internal static unsafe string CreateStringFromEncoding(
string s = FastAllocateString(stringLength);
fixed (char* pTempChars = &s._firstChar)
{
int doubleCheck = encoding.GetChars(bytes, byteLength, pTempChars, stringLength, null);
int doubleCheck = encoding.GetChars(bytes, byteLength, pTempChars, stringLength);
Debug.Assert(stringLength == doubleCheck,
"Expected encoding.GetChars to return same length as encoding.GetCharCount");
}
Expand Down
1,128 changes: 520 additions & 608 deletions src/System.Private.CoreLib/shared/System/Text/ASCIIEncoding.cs

Large diffs are not rendered by default.

76 changes: 76 additions & 0 deletions src/System.Private.CoreLib/shared/System/Text/ASCIIUtility.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Runtime.CompilerServices;

namespace System.Text
{
/*
* Contains naive unoptimized (non-SIMD) implementations of ASCII transcoding
* operations. Vectorized methods can be substituted here as a drop-in replacement.
*/

internal unsafe static class ASCIIUtility
{
[MethodImpl(MethodImplOptions.NoInlining)] // the actual implementation won't be inlined, so this shouldn't be either, lest it throw off benchmarks
public static uint GetIndexOfFirstNonAsciiByte(byte* pBytes, uint byteCount)
{
uint idx = 0;
for (; idx < byteCount; idx++)
{
if ((sbyte)pBytes[idx] < 0)
{
break;
}
}
return idx;
}

[MethodImpl(MethodImplOptions.NoInlining)] // the actual implementation won't be inlined, so this shouldn't be either, lest it throw off benchmarks
public static uint GetIndexOfFirstNonAsciiChar(char* pChars, uint charCount)
{
uint idx = 0;
for (; idx < charCount; idx++)
{
if (pChars[idx] > 0x7Fu)
{
break;
}
}
return idx;
}

[MethodImpl(MethodImplOptions.NoInlining)] // the actual implementation won't be inlined, so this shouldn't be either, lest it throw off benchmarks
public static uint NarrowUtf16ToAscii(char* pChars, byte* pBytes, uint elementCount)
{
uint idx = 0;
for (; idx < elementCount; idx++)
{
uint ch = pChars[idx];
if (ch > 0x7Fu)
{
break;
}
pBytes[idx] = (byte)ch;
}
return idx;
}

[MethodImpl(MethodImplOptions.NoInlining)] // the actual implementation won't be inlined, so this shouldn't be either, lest it throw off benchmarks
public static uint WidenAsciiToUtf16(byte* pBytes, char* pChars, uint elementCount)
{
uint idx = 0;
for (; idx < elementCount; idx++)
{
byte b = pBytes[idx];
if (b > 0x7F)
{
break;
}
pChars[idx] = (char)b;
}
return idx;
}
}
}
104 changes: 104 additions & 0 deletions src/System.Private.CoreLib/shared/System/Text/DecoderFallback.cs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ public virtual void Reset()
internal unsafe byte* byteStart;
internal unsafe char* charEnd;

internal Encoding _encoding;
internal DecoderNLS _decoder;
private int _originalByteCount;

// Internal Reset
internal unsafe void InternalReset()
{
Expand All @@ -82,6 +86,22 @@ internal unsafe void InternalInitialize(byte* byteStart, char* charEnd)
this.charEnd = charEnd;
}

internal static DecoderFallbackBuffer CreateAndInitialize(Encoding encoding, DecoderNLS decoder, int originalByteCount)
{
// The original byte count is only used for keeping track of what 'index' value needs
// to be passed to the abstract Fallback method. The index value is calculated by subtracting
// 'bytes.Length' (where bytes is expected to be the entire remaining input buffer)
// from the 'originalByteCount' value specified here.

DecoderFallbackBuffer fallbackBuffer = (decoder is null) ? encoding.DecoderFallback.CreateFallbackBuffer() : decoder.FallbackBuffer;

fallbackBuffer._encoding = encoding;
fallbackBuffer._decoder = decoder;
fallbackBuffer._originalByteCount = originalByteCount;

return fallbackBuffer;
}

// Fallback the current byte by sticking it into the remaining char buffer.
// This can only be called by our encodings (other have to use the public fallback methods), so
// we can use our DecoderNLS here too (except we don't).
Expand Down Expand Up @@ -191,6 +211,90 @@ internal unsafe virtual int InternalFallback(byte[] bytes, byte* pBytes)
return 0;
}

internal int InternalFallbackGetCharCount(ReadOnlySpan<byte> remainingBytes, int fallbackLength)
{
return (Fallback(remainingBytes.Slice(0, fallbackLength).ToArray(), index: _originalByteCount - remainingBytes.Length))
? DrainRemainingDataForGetCharCount()
: 0;
}

internal bool TryInternalFallbackGetChars(ReadOnlySpan<byte> remainingBytes, int fallbackLength, Span<char> chars, out int charsWritten)
{
if (Fallback(remainingBytes.Slice(0, fallbackLength).ToArray(), index: _originalByteCount - remainingBytes.Length))
{
return TryDrainRemainingDataForGetChars(chars, out charsWritten);
}
else
{
// Return true because we weren't asked to write anything, so this is a "success" in the sense that
// the output buffer was large enough to hold the desired 0 chars of output.

charsWritten = 0;
return true;
}
}

private Rune GetNextRune()
{
// Call GetNextChar() and try treating it as a non-surrogate character.
// If that fails, call GetNextChar() again and attempt to treat the two chars
// as a surrogate pair. If that still fails, throw an exception since the fallback
// mechanism is giving us a bad replacement character.

Rune rune;
char ch = GetNextChar();
if (!Rune.TryCreate(ch, out rune) && !Rune.TryCreate(ch, GetNextChar(), out rune))
{
throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
}

return rune;
}

internal int DrainRemainingDataForGetCharCount()
{
int totalCharCount = 0;

Rune thisRune;
while ((thisRune = GetNextRune()).Value != 0)
{
// We need to check for overflow while tallying the fallback char count.

totalCharCount += thisRune.Utf16SequenceLength;
if (totalCharCount < 0)
{
InternalReset();
Encoding.ThrowConversionOverflow();
}
}

return totalCharCount;
}

internal bool TryDrainRemainingDataForGetChars(Span<char> chars, out int charsWritten)
{
int originalCharCount = chars.Length;

Rune thisRune;
while ((thisRune = GetNextRune()).Value != 0)
{
if (thisRune.TryEncode(chars, out int charsWrittenJustNow))
{
chars = chars.Slice(charsWrittenJustNow);
continue;
}
else
{
InternalReset();
charsWritten = default;
return false;
}
}

charsWritten = originalCharCount - chars.Length;
return true;
}

// private helper methods
internal void ThrowLastBytesRecursive(byte[] bytesUnknown)
{
Expand Down
Loading