Skip to content

Commit f8809f5

Browse files
Update Adler32.cs
1 parent c910a81 commit f8809f5

File tree

1 file changed

+95
-122
lines changed

1 file changed

+95
-122
lines changed

src/ImageSharp/Formats/Png/Zlib/Adler32.cs

Lines changed: 95 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33

44
using System;
55
using System.Runtime.CompilerServices;
6-
using System.Runtime.InteropServices;
76
#if SUPPORTS_RUNTIME_INTRINSICS
87
using System.Runtime.Intrinsics;
98
using System.Runtime.Intrinsics.X86;
109
#endif
1110

11+
#pragma warning disable IDE0007 // Use implicit type
1212
namespace SixLabors.ImageSharp.Formats.Png.Zlib
1313
{
1414
/// <summary>
@@ -22,16 +22,22 @@ internal static class Adler32
2222
/// </summary>
2323
public const uint SeedValue = 1U;
2424

25-
#if SUPPORTS_RUNTIME_INTRINSICS
26-
private const int MinBufferSize = 64;
27-
#endif
28-
2925
// Largest prime smaller than 65536
3026
private const uint BASE = 65521;
3127

3228
// NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
3329
private const uint NMAX = 5552;
3430

31+
#if SUPPORTS_RUNTIME_INTRINSICS
32+
private const int MinBufferSize = 64;
33+
34+
private static ReadOnlySpan<byte> Tap1Tap2 => new byte[]
35+
{
36+
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, // tap1
37+
16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 // tap2
38+
};
39+
#endif
40+
3541
/// <summary>
3642
/// Calculates the Adler32 checksum with the bytes taken from the span.
3743
/// </summary>
@@ -83,14 +89,15 @@ private static unsafe uint CalculateSse(uint adler, ReadOnlySpan<byte> buffer)
8389
length -= blocks * BLOCK_SIZE;
8490

8591
int index = 0;
86-
fixed (byte* bufferPtr = &buffer[0])
92+
fixed (byte* bufferPtr = buffer)
93+
fixed (byte* tapPtr = Tap1Tap2)
8794
{
8895
index += (int)blocks * BLOCK_SIZE;
8996
var localBufferPtr = bufferPtr;
9097

9198
// _mm_setr_epi8 on x86
92-
var tap1 = Vector128.Create(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17);
93-
var tap2 = Vector128.Create(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
99+
Vector128<sbyte> tap1 = Sse2.LoadVector128((sbyte*)tapPtr);
100+
Vector128<sbyte> tap2 = Sse2.LoadVector128((sbyte*)(tapPtr + 0x10));
94101
Vector128<byte> zero = Vector128<byte>.Zero;
95102
var ones = Vector128.Create((short)1);
96103

@@ -106,28 +113,28 @@ private static unsafe uint CalculateSse(uint adler, ReadOnlySpan<byte> buffer)
106113

107114
// Process n blocks of data. At most NMAX data bytes can be
108115
// processed before s2 must be reduced modulo BASE.
109-
Vector128<int> v_ps = Vector128.CreateScalar(s1 * n).AsInt32();
110-
Vector128<int> v_s2 = Vector128.CreateScalar(s2).AsInt32();
111-
Vector128<int> v_s1 = Vector128<int>.Zero;
116+
Vector128<uint> v_ps = Vector128.CreateScalar(s1 * n);
117+
Vector128<uint> v_s2 = Vector128.CreateScalar(s2);
118+
Vector128<uint> v_s1 = Vector128<uint>.Zero;
112119

113120
do
114121
{
115122
// Load 32 input bytes.
116123
Vector128<byte> bytes1 = Sse3.LoadDquVector128(localBufferPtr);
117-
Vector128<byte> bytes2 = Sse3.LoadDquVector128(localBufferPtr + 16);
124+
Vector128<byte> bytes2 = Sse3.LoadDquVector128(localBufferPtr + 0x10);
118125

119126
// Add previous block byte sum to v_ps.
120127
v_ps = Sse2.Add(v_ps, v_s1);
121128

122129
// Horizontally add the bytes for s1, multiply-adds the
123130
// bytes by [ 32, 31, 30, ... ] for s2.
124-
v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes1, zero).AsInt32());
131+
v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes1, zero).AsUInt32());
125132
Vector128<short> mad1 = Ssse3.MultiplyAddAdjacent(bytes1, tap1);
126-
v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad1, ones));
133+
v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad1, ones).AsUInt32());
127134

128-
v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes2, zero).AsInt32());
135+
v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes2, zero).AsUInt32());
129136
Vector128<short> mad2 = Ssse3.MultiplyAddAdjacent(bytes2, tap2);
130-
v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad2, ones));
137+
v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad2, ones).AsUInt32());
131138

132139
localBufferPtr += BLOCK_SIZE;
133140
}
@@ -139,148 +146,114 @@ private static unsafe uint CalculateSse(uint adler, ReadOnlySpan<byte> buffer)
139146
const byte S2301 = 0b1011_0001; // A B C D -> B A D C
140147
const byte S1032 = 0b0100_1110; // A B C D -> C D A B
141148

142-
v_s1 = Sse2.Add(v_s1, Sse2.Shuffle(v_s1, S2301));
143149
v_s1 = Sse2.Add(v_s1, Sse2.Shuffle(v_s1, S1032));
144150

145-
s1 += (uint)v_s1.ToScalar();
151+
s1 += v_s1.ToScalar();
146152

147153
v_s2 = Sse2.Add(v_s2, Sse2.Shuffle(v_s2, S2301));
148154
v_s2 = Sse2.Add(v_s2, Sse2.Shuffle(v_s2, S1032));
149155

150-
s2 = (uint)v_s2.ToScalar();
156+
s2 = v_s2.ToScalar();
151157

152158
// Reduce.
153159
s1 %= BASE;
154160
s2 %= BASE;
155161
}
156-
}
157-
158-
ref byte bufferRef = ref MemoryMarshal.GetReference(buffer);
159162

160-
if (length > 0)
161-
{
162-
if (length >= 16)
163+
if (length > 0)
163164
{
164-
s1 += Unsafe.Add(ref bufferRef, index++);
165-
s2 += s1;
166-
s1 += Unsafe.Add(ref bufferRef, index++);
167-
s2 += s1;
168-
s1 += Unsafe.Add(ref bufferRef, index++);
169-
s2 += s1;
170-
s1 += Unsafe.Add(ref bufferRef, index++);
171-
s2 += s1;
172-
s1 += Unsafe.Add(ref bufferRef, index++);
173-
s2 += s1;
174-
s1 += Unsafe.Add(ref bufferRef, index++);
175-
s2 += s1;
176-
s1 += Unsafe.Add(ref bufferRef, index++);
177-
s2 += s1;
178-
s1 += Unsafe.Add(ref bufferRef, index++);
179-
s2 += s1;
180-
s1 += Unsafe.Add(ref bufferRef, index++);
181-
s2 += s1;
182-
s1 += Unsafe.Add(ref bufferRef, index++);
183-
s2 += s1;
184-
s1 += Unsafe.Add(ref bufferRef, index++);
185-
s2 += s1;
186-
s1 += Unsafe.Add(ref bufferRef, index++);
187-
s2 += s1;
188-
s1 += Unsafe.Add(ref bufferRef, index++);
189-
s2 += s1;
190-
s1 += Unsafe.Add(ref bufferRef, index++);
191-
s2 += s1;
192-
s1 += Unsafe.Add(ref bufferRef, index++);
193-
s2 += s1;
194-
s1 += Unsafe.Add(ref bufferRef, index++);
195-
s2 += s1;
196-
length -= 16;
197-
}
165+
if (length >= 16)
166+
{
167+
s2 += s1 += localBufferPtr[0];
168+
s2 += s1 += localBufferPtr[1];
169+
s2 += s1 += localBufferPtr[2];
170+
s2 += s1 += localBufferPtr[3];
171+
s2 += s1 += localBufferPtr[4];
172+
s2 += s1 += localBufferPtr[5];
173+
s2 += s1 += localBufferPtr[6];
174+
s2 += s1 += localBufferPtr[7];
175+
s2 += s1 += localBufferPtr[8];
176+
s2 += s1 += localBufferPtr[9];
177+
s2 += s1 += localBufferPtr[10];
178+
s2 += s1 += localBufferPtr[11];
179+
s2 += s1 += localBufferPtr[12];
180+
s2 += s1 += localBufferPtr[13];
181+
s2 += s1 += localBufferPtr[14];
182+
s2 += s1 += localBufferPtr[15];
183+
184+
localBufferPtr += 16;
185+
length -= 16;
186+
}
198187

199-
while (length-- > 0)
200-
{
201-
s2 += s1 += Unsafe.Add(ref bufferRef, index++);
202-
}
188+
while (length-- > 0)
189+
{
190+
s2 += s1 += *localBufferPtr++;
191+
}
203192

204-
if (s1 >= BASE)
205-
{
206-
s1 -= BASE;
193+
if (s1 >= BASE)
194+
{
195+
s1 -= BASE;
196+
}
197+
198+
s2 %= BASE;
207199
}
208200

209-
s2 %= BASE;
201+
return s1 | (s2 << 16);
210202
}
211-
212-
return s1 | (s2 << 16);
213203
}
214204
#endif
215205

216206
[MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)]
217-
private static uint CalculateScalar(uint adler, ReadOnlySpan<byte> buffer)
207+
private static unsafe uint CalculateScalar(uint adler, ReadOnlySpan<byte> buffer)
218208
{
219209
uint s1 = adler & 0xFFFF;
220210
uint s2 = (adler >> 16) & 0xFFFF;
221211
uint k;
222212

223-
ref byte bufferRef = ref MemoryMarshal.GetReference<byte>(buffer);
224-
uint length = (uint)buffer.Length;
225-
int index = 0;
226-
227-
while (length > 0)
213+
fixed (byte* bufferPtr = buffer)
228214
{
229-
k = length < NMAX ? length : NMAX;
230-
length -= k;
215+
var localBufferPtr = bufferPtr;
216+
uint length = (uint)buffer.Length;
231217

232-
while (k >= 16)
218+
while (length > 0)
233219
{
234-
s1 += Unsafe.Add(ref bufferRef, index++);
235-
s2 += s1;
236-
s1 += Unsafe.Add(ref bufferRef, index++);
237-
s2 += s1;
238-
s1 += Unsafe.Add(ref bufferRef, index++);
239-
s2 += s1;
240-
s1 += Unsafe.Add(ref bufferRef, index++);
241-
s2 += s1;
242-
s1 += Unsafe.Add(ref bufferRef, index++);
243-
s2 += s1;
244-
s1 += Unsafe.Add(ref bufferRef, index++);
245-
s2 += s1;
246-
s1 += Unsafe.Add(ref bufferRef, index++);
247-
s2 += s1;
248-
s1 += Unsafe.Add(ref bufferRef, index++);
249-
s2 += s1;
250-
s1 += Unsafe.Add(ref bufferRef, index++);
251-
s2 += s1;
252-
s1 += Unsafe.Add(ref bufferRef, index++);
253-
s2 += s1;
254-
s1 += Unsafe.Add(ref bufferRef, index++);
255-
s2 += s1;
256-
s1 += Unsafe.Add(ref bufferRef, index++);
257-
s2 += s1;
258-
s1 += Unsafe.Add(ref bufferRef, index++);
259-
s2 += s1;
260-
s1 += Unsafe.Add(ref bufferRef, index++);
261-
s2 += s1;
262-
s1 += Unsafe.Add(ref bufferRef, index++);
263-
s2 += s1;
264-
s1 += Unsafe.Add(ref bufferRef, index++);
265-
s2 += s1;
266-
k -= 16;
267-
}
220+
k = length < NMAX ? length : NMAX;
221+
length -= k;
268222

269-
if (k != 0)
270-
{
271-
do
223+
while (k >= 16)
224+
{
225+
s2 += s1 += localBufferPtr[0];
226+
s2 += s1 += localBufferPtr[1];
227+
s2 += s1 += localBufferPtr[2];
228+
s2 += s1 += localBufferPtr[3];
229+
s2 += s1 += localBufferPtr[4];
230+
s2 += s1 += localBufferPtr[5];
231+
s2 += s1 += localBufferPtr[6];
232+
s2 += s1 += localBufferPtr[7];
233+
s2 += s1 += localBufferPtr[8];
234+
s2 += s1 += localBufferPtr[9];
235+
s2 += s1 += localBufferPtr[10];
236+
s2 += s1 += localBufferPtr[11];
237+
s2 += s1 += localBufferPtr[12];
238+
s2 += s1 += localBufferPtr[13];
239+
s2 += s1 += localBufferPtr[14];
240+
s2 += s1 += localBufferPtr[15];
241+
242+
localBufferPtr += 16;
243+
k -= 16;
244+
}
245+
246+
while (k-- > 0)
272247
{
273-
s1 += Unsafe.Add(ref bufferRef, index++);
274-
s2 += s1;
248+
s2 += s1 += *localBufferPtr++;
275249
}
276-
while (--k != 0);
250+
251+
s1 %= BASE;
252+
s2 %= BASE;
277253
}
278254

279-
s1 %= BASE;
280-
s2 %= BASE;
255+
return (s2 << 16) | s1;
281256
}
282-
283-
return (s2 << 16) | s1;
284257
}
285258
}
286259
}

0 commit comments

Comments
 (0)