33
44using System ;
55using System . Runtime . CompilerServices ;
6- using System . Runtime . InteropServices ;
76#if SUPPORTS_RUNTIME_INTRINSICS
87using System . Runtime . Intrinsics ;
98using System . Runtime . Intrinsics . X86 ;
109#endif
1110
11+ #pragma warning disable IDE0007 // Use implicit type
1212namespace SixLabors . ImageSharp . Formats . Png . Zlib
1313{
1414 /// <summary>
@@ -22,16 +22,22 @@ internal static class Adler32
2222 /// </summary>
2323 public const uint SeedValue = 1U ;
2424
25- #if SUPPORTS_RUNTIME_INTRINSICS
26- private const int MinBufferSize = 64 ;
27- #endif
28-
2925 // Largest prime smaller than 65536
3026 private const uint BASE = 65521 ;
3127
3228 // NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
3329 private const uint NMAX = 5552 ;
3430
31+ #if SUPPORTS_RUNTIME_INTRINSICS
32+ private const int MinBufferSize = 64 ;
33+
34+ private static ReadOnlySpan < byte > Tap1Tap2 => new byte [ ]
35+ {
36+ 32 , 31 , 30 , 29 , 28 , 27 , 26 , 25 , 24 , 23 , 22 , 21 , 20 , 19 , 18 , 17 , // tap1
37+ 16 , 15 , 14 , 13 , 12 , 11 , 10 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 // tap2
38+ } ;
39+ #endif
40+
3541 /// <summary>
3642 /// Calculates the Adler32 checksum with the bytes taken from the span.
3743 /// </summary>
@@ -83,14 +89,15 @@ private static unsafe uint CalculateSse(uint adler, ReadOnlySpan<byte> buffer)
8389 length -= blocks * BLOCK_SIZE ;
8490
8591 int index = 0 ;
86- fixed ( byte * bufferPtr = & buffer [ 0 ] )
92+ fixed ( byte * bufferPtr = buffer )
93+ fixed ( byte * tapPtr = Tap1Tap2 )
8794 {
8895 index += ( int ) blocks * BLOCK_SIZE ;
8996 var localBufferPtr = bufferPtr ;
9097
9198 // _mm_setr_epi8 on x86
92- var tap1 = Vector128 . Create ( 32 , 31 , 30 , 29 , 28 , 27 , 26 , 25 , 24 , 23 , 22 , 21 , 20 , 19 , 18 , 17 ) ;
93- var tap2 = Vector128 . Create ( 16 , 15 , 14 , 13 , 12 , 11 , 10 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 ) ;
99+ Vector128 < sbyte > tap1 = Sse2 . LoadVector128 ( ( sbyte * ) tapPtr ) ;
100+ Vector128 < sbyte > tap2 = Sse2 . LoadVector128 ( ( sbyte * ) ( tapPtr + 0x10 ) ) ;
94101 Vector128 < byte > zero = Vector128 < byte > . Zero ;
95102 var ones = Vector128 . Create ( ( short ) 1 ) ;
96103
@@ -106,28 +113,28 @@ private static unsafe uint CalculateSse(uint adler, ReadOnlySpan<byte> buffer)
106113
107114 // Process n blocks of data. At most NMAX data bytes can be
108115 // processed before s2 must be reduced modulo BASE.
109- Vector128 < int > v_ps = Vector128 . CreateScalar ( s1 * n ) . AsInt32 ( ) ;
110- Vector128 < int > v_s2 = Vector128 . CreateScalar ( s2 ) . AsInt32 ( ) ;
111- Vector128 < int > v_s1 = Vector128 < int > . Zero ;
116+ Vector128 < uint > v_ps = Vector128 . CreateScalar ( s1 * n ) ;
117+ Vector128 < uint > v_s2 = Vector128 . CreateScalar ( s2 ) ;
118+ Vector128 < uint > v_s1 = Vector128 < uint > . Zero ;
112119
113120 do
114121 {
115122 // Load 32 input bytes.
116123 Vector128 < byte > bytes1 = Sse3 . LoadDquVector128 ( localBufferPtr ) ;
117- Vector128 < byte > bytes2 = Sse3 . LoadDquVector128 ( localBufferPtr + 16 ) ;
124+ Vector128 < byte > bytes2 = Sse3 . LoadDquVector128 ( localBufferPtr + 0x10 ) ;
118125
119126 // Add previous block byte sum to v_ps.
120127 v_ps = Sse2 . Add ( v_ps , v_s1 ) ;
121128
122129 // Horizontally add the bytes for s1, multiply-adds the
123130 // bytes by [ 32, 31, 30, ... ] for s2.
124- v_s1 = Sse2 . Add ( v_s1 , Sse2 . SumAbsoluteDifferences ( bytes1 , zero ) . AsInt32 ( ) ) ;
131+ v_s1 = Sse2 . Add ( v_s1 , Sse2 . SumAbsoluteDifferences ( bytes1 , zero ) . AsUInt32 ( ) ) ;
125132 Vector128 < short > mad1 = Ssse3 . MultiplyAddAdjacent ( bytes1 , tap1 ) ;
126- v_s2 = Sse2 . Add ( v_s2 , Sse2 . MultiplyAddAdjacent ( mad1 , ones ) ) ;
133+ v_s2 = Sse2 . Add ( v_s2 , Sse2 . MultiplyAddAdjacent ( mad1 , ones ) . AsUInt32 ( ) ) ;
127134
128- v_s1 = Sse2 . Add ( v_s1 , Sse2 . SumAbsoluteDifferences ( bytes2 , zero ) . AsInt32 ( ) ) ;
135+ v_s1 = Sse2 . Add ( v_s1 , Sse2 . SumAbsoluteDifferences ( bytes2 , zero ) . AsUInt32 ( ) ) ;
129136 Vector128 < short > mad2 = Ssse3 . MultiplyAddAdjacent ( bytes2 , tap2 ) ;
130- v_s2 = Sse2 . Add ( v_s2 , Sse2 . MultiplyAddAdjacent ( mad2 , ones ) ) ;
137+ v_s2 = Sse2 . Add ( v_s2 , Sse2 . MultiplyAddAdjacent ( mad2 , ones ) . AsUInt32 ( ) ) ;
131138
132139 localBufferPtr += BLOCK_SIZE ;
133140 }
@@ -139,148 +146,114 @@ private static unsafe uint CalculateSse(uint adler, ReadOnlySpan<byte> buffer)
139146 const byte S2301 = 0b1011_0001 ; // A B C D -> B A D C
140147 const byte S1032 = 0b0100_1110 ; // A B C D -> C D A B
141148
142- v_s1 = Sse2 . Add ( v_s1 , Sse2 . Shuffle ( v_s1 , S2301 ) ) ;
143149 v_s1 = Sse2 . Add ( v_s1 , Sse2 . Shuffle ( v_s1 , S1032 ) ) ;
144150
145- s1 += ( uint ) v_s1 . ToScalar ( ) ;
151+ s1 += v_s1 . ToScalar ( ) ;
146152
147153 v_s2 = Sse2 . Add ( v_s2 , Sse2 . Shuffle ( v_s2 , S2301 ) ) ;
148154 v_s2 = Sse2 . Add ( v_s2 , Sse2 . Shuffle ( v_s2 , S1032 ) ) ;
149155
150- s2 = ( uint ) v_s2 . ToScalar ( ) ;
156+ s2 = v_s2 . ToScalar ( ) ;
151157
152158 // Reduce.
153159 s1 %= BASE ;
154160 s2 %= BASE ;
155161 }
156- }
157-
158- ref byte bufferRef = ref MemoryMarshal . GetReference ( buffer ) ;
159162
160- if ( length > 0 )
161- {
162- if ( length >= 16 )
163+ if ( length > 0 )
163164 {
164- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
165- s2 += s1 ;
166- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
167- s2 += s1 ;
168- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
169- s2 += s1 ;
170- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
171- s2 += s1 ;
172- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
173- s2 += s1 ;
174- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
175- s2 += s1 ;
176- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
177- s2 += s1 ;
178- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
179- s2 += s1 ;
180- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
181- s2 += s1 ;
182- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
183- s2 += s1 ;
184- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
185- s2 += s1 ;
186- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
187- s2 += s1 ;
188- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
189- s2 += s1 ;
190- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
191- s2 += s1 ;
192- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
193- s2 += s1 ;
194- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
195- s2 += s1 ;
196- length -= 16 ;
197- }
165+ if ( length >= 16 )
166+ {
167+ s2 += s1 += localBufferPtr [ 0 ] ;
168+ s2 += s1 += localBufferPtr [ 1 ] ;
169+ s2 += s1 += localBufferPtr [ 2 ] ;
170+ s2 += s1 += localBufferPtr [ 3 ] ;
171+ s2 += s1 += localBufferPtr [ 4 ] ;
172+ s2 += s1 += localBufferPtr [ 5 ] ;
173+ s2 += s1 += localBufferPtr [ 6 ] ;
174+ s2 += s1 += localBufferPtr [ 7 ] ;
175+ s2 += s1 += localBufferPtr [ 8 ] ;
176+ s2 += s1 += localBufferPtr [ 9 ] ;
177+ s2 += s1 += localBufferPtr [ 10 ] ;
178+ s2 += s1 += localBufferPtr [ 11 ] ;
179+ s2 += s1 += localBufferPtr [ 12 ] ;
180+ s2 += s1 += localBufferPtr [ 13 ] ;
181+ s2 += s1 += localBufferPtr [ 14 ] ;
182+ s2 += s1 += localBufferPtr [ 15 ] ;
183+
184+ localBufferPtr += 16 ;
185+ length -= 16 ;
186+ }
198187
199- while ( length -- > 0 )
200- {
201- s2 += s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
202- }
188+ while ( length -- > 0 )
189+ {
190+ s2 += s1 += * localBufferPtr ++ ;
191+ }
203192
204- if ( s1 >= BASE )
205- {
206- s1 -= BASE ;
193+ if ( s1 >= BASE )
194+ {
195+ s1 -= BASE ;
196+ }
197+
198+ s2 %= BASE ;
207199 }
208200
209- s2 %= BASE ;
201+ return s1 | ( s2 << 16 ) ;
210202 }
211-
212- return s1 | ( s2 << 16 ) ;
213203 }
214204#endif
215205
216206 [ MethodImpl ( InliningOptions . HotPath | InliningOptions . ShortMethod ) ]
217- private static uint CalculateScalar ( uint adler , ReadOnlySpan < byte > buffer )
207+ private static unsafe uint CalculateScalar ( uint adler , ReadOnlySpan < byte > buffer )
218208 {
219209 uint s1 = adler & 0xFFFF ;
220210 uint s2 = ( adler >> 16 ) & 0xFFFF ;
221211 uint k ;
222212
223- ref byte bufferRef = ref MemoryMarshal . GetReference < byte > ( buffer ) ;
224- uint length = ( uint ) buffer . Length ;
225- int index = 0 ;
226-
227- while ( length > 0 )
213+ fixed ( byte * bufferPtr = buffer )
228214 {
229- k = length < NMAX ? length : NMAX ;
230- length -= k ;
215+ var localBufferPtr = bufferPtr ;
216+ uint length = ( uint ) buffer . Length ;
231217
232- while ( k >= 16 )
218+ while ( length > 0 )
233219 {
234- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
235- s2 += s1 ;
236- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
237- s2 += s1 ;
238- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
239- s2 += s1 ;
240- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
241- s2 += s1 ;
242- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
243- s2 += s1 ;
244- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
245- s2 += s1 ;
246- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
247- s2 += s1 ;
248- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
249- s2 += s1 ;
250- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
251- s2 += s1 ;
252- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
253- s2 += s1 ;
254- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
255- s2 += s1 ;
256- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
257- s2 += s1 ;
258- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
259- s2 += s1 ;
260- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
261- s2 += s1 ;
262- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
263- s2 += s1 ;
264- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
265- s2 += s1 ;
266- k -= 16 ;
267- }
220+ k = length < NMAX ? length : NMAX ;
221+ length -= k ;
268222
269- if ( k != 0 )
270- {
271- do
223+ while ( k >= 16 )
224+ {
225+ s2 += s1 += localBufferPtr [ 0 ] ;
226+ s2 += s1 += localBufferPtr [ 1 ] ;
227+ s2 += s1 += localBufferPtr [ 2 ] ;
228+ s2 += s1 += localBufferPtr [ 3 ] ;
229+ s2 += s1 += localBufferPtr [ 4 ] ;
230+ s2 += s1 += localBufferPtr [ 5 ] ;
231+ s2 += s1 += localBufferPtr [ 6 ] ;
232+ s2 += s1 += localBufferPtr [ 7 ] ;
233+ s2 += s1 += localBufferPtr [ 8 ] ;
234+ s2 += s1 += localBufferPtr [ 9 ] ;
235+ s2 += s1 += localBufferPtr [ 10 ] ;
236+ s2 += s1 += localBufferPtr [ 11 ] ;
237+ s2 += s1 += localBufferPtr [ 12 ] ;
238+ s2 += s1 += localBufferPtr [ 13 ] ;
239+ s2 += s1 += localBufferPtr [ 14 ] ;
240+ s2 += s1 += localBufferPtr [ 15 ] ;
241+
242+ localBufferPtr += 16 ;
243+ k -= 16 ;
244+ }
245+
246+ while ( k -- > 0 )
272247 {
273- s1 += Unsafe . Add ( ref bufferRef , index ++ ) ;
274- s2 += s1 ;
248+ s2 += s1 += * localBufferPtr ++ ;
275249 }
276- while ( -- k != 0 ) ;
250+
251+ s1 %= BASE ;
252+ s2 %= BASE ;
277253 }
278254
279- s1 %= BASE ;
280- s2 %= BASE ;
255+ return ( s2 << 16 ) | s1 ;
281256 }
282-
283- return ( s2 << 16 ) | s1 ;
284257 }
285258 }
286259}
0 commit comments