Skip to content

Commit ecea0bd

Browse files
committed
移除转码扩展
1 parent 567f8d7 commit ecea0bd

File tree

4 files changed

+183
-218
lines changed

4 files changed

+183
-218
lines changed
Lines changed: 162 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,184 @@
1-
using System;
1+
#if NETSTANDARD2_1
2+
using System;
23
using System.Buffers;
3-
using System.Diagnostics;
44
using System.Text;
55

66
namespace WebApiClientCore
77
{
88
/// <summary>
99
/// 提供Encoding扩展
1010
/// </summary>
11-
static partial class EncodingExtensions
11+
static class EncodingExtensions
1212
{
1313
/// <summary>
14-
/// 转换编码
14+
/// The maximum number of input elements after which we'll begin to chunk the input.
1515
/// </summary>
16-
/// <param name="srcEncoding"></param>
17-
/// <param name="dstEncoding">目标编码</param>
18-
/// <param name="buffer">源内容</param>
19-
/// <param name="writer">目标写入器</param>
20-
public static void Convert(this Encoding srcEncoding, Encoding dstEncoding, ReadOnlySpan<byte> buffer, IBufferWriter<byte> writer)
16+
/// <remarks>
17+
/// The reason for this chunking is that the existing Encoding / Encoder / Decoder APIs
18+
/// like GetByteCount / GetCharCount will throw if an integer overflow occurs. Since
19+
/// we may be working with large inputs in these extension methods, we don't want to
20+
/// risk running into this issue. While it's technically possible even for 1 million
21+
/// input elements to result in an overflow condition, such a scenario is unrealistic,
22+
/// so we won't worry about it.
23+
/// </remarks>
24+
private const int MaxInputElementsPerIteration = 1 * 1024 * 1024;
25+
26+
/// <summary>
27+
/// Encodes the specified <see cref="ReadOnlySpan{Char}"/> to <see langword="byte"/>s using the specified <see cref="Encoding"/>
28+
/// and writes the result to <paramref name="writer"/>.
29+
/// </summary>
30+
/// <param name="encoding">The <see cref="Encoding"/> which represents how the data in <paramref name="chars"/> should be encoded.</param>
31+
/// <param name="chars">The <see cref="ReadOnlySpan{Char}"/> to encode to <see langword="byte"/>s.</param>
32+
/// <param name="writer">The buffer to which the encoded bytes will be written.</param>
33+
/// <exception cref="EncoderFallbackException">Thrown if <paramref name="chars"/> contains data that cannot be encoded and <paramref name="encoding"/> is configured
34+
/// to throw an exception when such data is seen.</exception>
35+
public static long GetBytes(this Encoding encoding, ReadOnlySpan<char> chars, IBufferWriter<byte> writer)
2136
{
22-
var decoder = srcEncoding.GetDecoder();
23-
var charCount = decoder.GetCharCount(buffer, false);
24-
var charArray = charCount > 1024 ? ArrayPool<char>.Shared.Rent(charCount) : null;
25-
var chars = charArray == null ? stackalloc char[charCount] : charArray.AsSpan()[..charCount];
37+
if (chars.Length <= MaxInputElementsPerIteration)
38+
{
39+
// The input span is small enough where we can one-shot this.
40+
41+
int byteCount = encoding.GetByteCount(chars);
42+
Span<byte> scratchBuffer = writer.GetSpan(byteCount);
43+
44+
int actualBytesWritten = encoding.GetBytes(chars, scratchBuffer);
2645

27-
try
46+
writer.Advance(actualBytesWritten);
47+
return actualBytesWritten;
48+
}
49+
else
2850
{
29-
decoder.Convert(buffer, chars, true, out _, out var charsUsed, out _);
30-
Debug.Assert(charCount == charsUsed);
51+
// Allocate a stateful Encoder instance and chunk this.
3152

32-
var encoder = dstEncoding.GetEncoder();
33-
var byteCount = encoder.GetByteCount(chars, false);
34-
var bytes = writer.GetSpan(byteCount);
53+
Convert(encoding.GetEncoder(), chars, writer, flush: true, out long totalBytesWritten, out _);
54+
return totalBytesWritten;
55+
}
56+
}
3557

36-
encoder.Convert(chars, bytes, true, out _, out var byteUsed, out _);
37-
Debug.Assert(byteCount == byteUsed);
38-
writer.Advance(byteUsed);
58+
/// <summary>
59+
/// Decodes the specified <see cref="ReadOnlySpan{Byte}"/> to <see langword="char"/>s using the specified <see cref="Encoding"/>
60+
/// and writes the result to <paramref name="writer"/>.
61+
/// </summary>
62+
/// <param name="encoding">The <see cref="Encoding"/> which represents how the data in <paramref name="bytes"/> should be decoded.</param>
63+
/// <param name="bytes">The <see cref="ReadOnlySpan{Byte}"/> whose bytes should be decoded.</param>
64+
/// <param name="writer">The buffer to which the decoded chars will be written.</param>
65+
/// <returns>The number of chars written to <paramref name="writer"/>.</returns>
66+
/// <exception cref="DecoderFallbackException">Thrown if <paramref name="bytes"/> contains data that cannot be decoded and <paramref name="encoding"/> is configured
67+
/// to throw an exception when such data is seen.</exception>
68+
public static long GetChars(this Encoding encoding, ReadOnlySpan<byte> bytes, IBufferWriter<char> writer)
69+
{
70+
if (bytes.Length <= MaxInputElementsPerIteration)
71+
{
72+
// The input span is small enough where we can one-shot this.
73+
74+
int charCount = encoding.GetCharCount(bytes);
75+
Span<char> scratchBuffer = writer.GetSpan(charCount);
76+
77+
int actualCharsWritten = encoding.GetChars(bytes, scratchBuffer);
78+
79+
writer.Advance(actualCharsWritten);
80+
return actualCharsWritten;
3981
}
40-
finally
82+
else
4183
{
42-
if (charArray != null)
43-
{
44-
ArrayPool<char>.Shared.Return(charArray);
45-
}
84+
// Allocate a stateful Decoder instance and chunk this.
85+
86+
Convert(encoding.GetDecoder(), bytes, writer, flush: true, out long totalCharsWritten, out _);
87+
return totalCharsWritten;
4688
}
4789
}
90+
91+
/// <summary>
92+
/// Converts a <see cref="ReadOnlySpan{Char}"/> to bytes using <paramref name="encoder"/> and writes the result to <paramref name="writer"/>.
93+
/// </summary>
94+
/// <param name="encoder">The <see cref="Encoder"/> instance which can convert <see langword="char"/>s to <see langword="byte"/>s.</param>
95+
/// <param name="chars">A sequence of characters to encode.</param>
96+
/// <param name="writer">The buffer to which the encoded bytes will be written.</param>
97+
/// <param name="flush"><see langword="true"/> to indicate no further data is to be converted; otherwise <see langword="false"/>.</param>
98+
/// <param name="bytesUsed">When this method returns, contains the count of <see langword="byte"/>s which were written to <paramref name="writer"/>.</param>
99+
/// <param name="completed">
100+
/// When this method returns, contains <see langword="true"/> if <paramref name="encoder"/> contains no partial internal state; otherwise, <see langword="false"/>.
101+
/// If <paramref name="flush"/> is <see langword="true"/>, this will always be set to <see langword="true"/> when the method returns.
102+
/// </param>
103+
/// <exception cref="EncoderFallbackException">Thrown if <paramref name="chars"/> contains data that cannot be encoded and <paramref name="encoder"/> is configured
104+
/// to throw an exception when such data is seen.</exception>
105+
public static void Convert(this Encoder encoder, ReadOnlySpan<char> chars, IBufferWriter<byte> writer, bool flush, out long bytesUsed, out bool completed)
106+
{
107+
// We need to perform at least one iteration of the loop since the encoder could have internal state.
108+
109+
long totalBytesWritten = 0;
110+
111+
do
112+
{
113+
// If our remaining input is very large, instead truncate it and tell the encoder
114+
// that there'll be more data after this call. This truncation is only for the
115+
// purposes of getting the required byte count. Since the writer may give us a span
116+
// larger than what we asked for, we'll pass the entirety of the remaining data
117+
// to the transcoding routine, since it may be able to make progress beyond what
118+
// was initially computed for the truncated input data.
119+
120+
int byteCountForThisSlice = (chars.Length <= MaxInputElementsPerIteration)
121+
? encoder.GetByteCount(chars, flush)
122+
: encoder.GetByteCount(chars.Slice(0, MaxInputElementsPerIteration), flush: false /* this isn't the end of the data */);
123+
124+
Span<byte> scratchBuffer = writer.GetSpan(byteCountForThisSlice);
125+
126+
encoder.Convert(chars, scratchBuffer, flush, out int charsUsedJustNow, out int bytesWrittenJustNow, out completed);
127+
128+
chars = chars.Slice(charsUsedJustNow);
129+
writer.Advance(bytesWrittenJustNow);
130+
totalBytesWritten += bytesWrittenJustNow;
131+
} while (!chars.IsEmpty);
132+
133+
bytesUsed = totalBytesWritten;
134+
}
135+
136+
137+
/// <summary>
138+
/// Converts a <see cref="ReadOnlySpan{Byte}"/> to chars using <paramref name="decoder"/> and writes the result to <paramref name="writer"/>.
139+
/// </summary>
140+
/// <param name="decoder">The <see cref="Decoder"/> instance which can convert <see langword="byte"/>s to <see langword="char"/>s.</param>
141+
/// <param name="bytes">A sequence of bytes to decode.</param>
142+
/// <param name="writer">The buffer to which the decoded chars will be written.</param>
143+
/// <param name="flush"><see langword="true"/> to indicate no further data is to be converted; otherwise <see langword="false"/>.</param>
144+
/// <param name="charsUsed">When this method returns, contains the count of <see langword="char"/>s which were written to <paramref name="writer"/>.</param>
145+
/// <param name="completed">
146+
/// When this method returns, contains <see langword="true"/> if <paramref name="decoder"/> contains no partial internal state; otherwise, <see langword="false"/>.
147+
/// If <paramref name="flush"/> is <see langword="true"/>, this will always be set to <see langword="true"/> when the method returns.
148+
/// </param>
149+
/// <exception cref="DecoderFallbackException">Thrown if <paramref name="bytes"/> contains data that cannot be encoded and <paramref name="decoder"/> is configured
150+
/// to throw an exception when such data is seen.</exception>
151+
public static void Convert(this Decoder decoder, ReadOnlySpan<byte> bytes, IBufferWriter<char> writer, bool flush, out long charsUsed, out bool completed)
152+
{
153+
// We need to perform at least one iteration of the loop since the decoder could have internal state.
154+
155+
long totalCharsWritten = 0;
156+
157+
do
158+
{
159+
// If our remaining input is very large, instead truncate it and tell the decoder
160+
// that there'll be more data after this call. This truncation is only for the
161+
// purposes of getting the required char count. Since the writer may give us a span
162+
// larger than what we asked for, we'll pass the entirety of the remaining data
163+
// to the transcoding routine, since it may be able to make progress beyond what
164+
// was initially computed for the truncated input data.
165+
166+
int charCountForThisSlice = (bytes.Length <= MaxInputElementsPerIteration)
167+
? decoder.GetCharCount(bytes, flush)
168+
: decoder.GetCharCount(bytes.Slice(0, MaxInputElementsPerIteration), flush: false /* this isn't the end of the data */);
169+
170+
Span<char> scratchBuffer = writer.GetSpan(charCountForThisSlice);
171+
172+
decoder.Convert(bytes, scratchBuffer, flush, out int bytesUsedJustNow, out int charsWrittenJustNow, out completed);
173+
174+
bytes = bytes.Slice(bytesUsedJustNow);
175+
writer.Advance(charsWrittenJustNow);
176+
totalCharsWritten += charsWrittenJustNow;
177+
} while (!bytes.IsEmpty);
178+
179+
charsUsed = totalCharsWritten;
180+
}
48181
}
49182
}
183+
184+
#endif

0 commit comments

Comments
 (0)