Skip to content

Commit 179dc94

Browse files
Update Crc32 based on feedback
1 parent f8809f5 commit 179dc94

File tree

4 files changed

+78
-71
lines changed

4 files changed

+78
-71
lines changed

src/ImageSharp/Formats/Png/Zlib/Adler32.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ internal static class Adler32
3131
#if SUPPORTS_RUNTIME_INTRINSICS
3232
private const int MinBufferSize = 64;
3333

34+
// The C# compiler emits this as a compile-time constant embedded in the PE file.
3435
private static ReadOnlySpan<byte> Tap1Tap2 => new byte[]
3536
{
3637
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, // tap1

src/ImageSharp/Formats/Png/Zlib/Crc32.cs

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
namespace SixLabors.ImageSharp.Formats.Png.Zlib
1313
{
1414
/// <summary>
15-
/// Calculates the 32 bit Cyclic Redundancy Check (CRC) checksum of a given buffer according to the
16-
/// IEEE 802.3 specification.
15+
/// Calculates the 32 bit Cyclic Redundancy Check (CRC) checksum of a given buffer
16+
/// according to the IEEE 802.3 specification.
1717
/// </summary>
1818
internal static partial class Crc32
1919
{
@@ -28,10 +28,13 @@ internal static partial class Crc32
2828

2929
// Definitions of the bit-reflected domain constants k1, k2, k3, etc and
3030
// the CRC32+Barrett polynomials given at the end of the paper.
31-
private static ulong[] k1k2 = { 0x0154442bd4, 0x01c6e41596 };
32-
private static ulong[] k3k4 = { 0x01751997d0, 0x00ccaa009e };
33-
private static ulong[] k5k0 = { 0x0163cd6124, 0x0000000000 };
34-
private static ulong[] poly = { 0x01db710641, 0x01f7011641 };
31+
private static readonly ulong[] K05Poly =
32+
{
33+
0x0154442bd4, 0x01c6e41596, // k1, k2
34+
0x01751997d0, 0x00ccaa009e, // k3, k4
35+
0x0163cd6124, 0x0000000000, // k5, k0
36+
0x01db710641, 0x01f7011641 // polynomial
37+
};
3538
#endif
3639

3740
/// <summary>
@@ -79,13 +82,11 @@ private static unsafe uint CalculateSse(uint crc, ReadOnlySpan<byte> buffer)
7982
int chunksize = buffer.Length & ~ChunksizeMask;
8083
int length = chunksize;
8184

82-
fixed (byte* bufferPtr = &buffer[0])
83-
fixed (ulong* k1k2Ptr = &k1k2[0])
84-
fixed (ulong* k3k4Ptr = &k3k4[0])
85-
fixed (ulong* k5k0Ptr = &k5k0[0])
86-
fixed (ulong* polyPtr = &poly[0])
85+
fixed (byte* bufferPtr = buffer)
86+
fixed (ulong* k05PolyPtr = K05Poly)
8787
{
8888
byte* localBufferPtr = bufferPtr;
89+
ulong* localK05PolyPtr = k05PolyPtr;
8990

9091
// There's at least one block of 64.
9192
Vector128<ulong> x1 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x00));
@@ -95,7 +96,9 @@ private static unsafe uint CalculateSse(uint crc, ReadOnlySpan<byte> buffer)
9596
Vector128<ulong> x5;
9697

9798
x1 = Sse2.Xor(x1, Sse2.ConvertScalarToVector128UInt32(crc).AsUInt64());
98-
Vector128<ulong> x0 = Sse2.LoadVector128(k1k2Ptr);
99+
100+
// k1, k2
101+
Vector128<ulong> x0 = Sse2.LoadVector128(localK05PolyPtr + 0x0);
99102

100103
localBufferPtr += 64;
101104
length -= 64;
@@ -133,7 +136,8 @@ private static unsafe uint CalculateSse(uint crc, ReadOnlySpan<byte> buffer)
133136
}
134137

135138
// Fold into 128-bits.
136-
x0 = Sse2.LoadVector128(k3k4Ptr);
139+
// k3, k4
140+
x0 = Sse2.LoadVector128(k05PolyPtr + 0x2);
137141

138142
x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
139143
x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
@@ -170,15 +174,17 @@ private static unsafe uint CalculateSse(uint crc, ReadOnlySpan<byte> buffer)
170174
x1 = Sse2.ShiftRightLogical128BitLane(x1, 8);
171175
x1 = Sse2.Xor(x1, x2);
172176

173-
x0 = Sse2.LoadScalarVector128(k5k0Ptr);
177+
// k5, k0
178+
x0 = Sse2.LoadScalarVector128(localK05PolyPtr + 0x4);
174179

175180
x2 = Sse2.ShiftRightLogical128BitLane(x1, 4);
176181
x1 = Sse2.And(x1, x3);
177182
x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
178183
x1 = Sse2.Xor(x1, x2);
179184

180185
// Barret reduce to 32-bits.
181-
x0 = Sse2.LoadVector128(polyPtr);
186+
// polynomial
187+
x0 = Sse2.LoadVector128(localK05PolyPtr + 0x6);
182188

183189
x2 = Sse2.And(x1, x3);
184190
x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x10);

tests/ImageSharp.Benchmarks/General/Adler32Benchmark.cs

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -41,32 +41,32 @@ public uint SixLaborsCalculate()
4141

4242
// ########## 17/05/2020 ##########
4343
//
44-
// | Method | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
45-
// |--------------------- |-------------- |------ |------------:|-------------:|-----------:|------:|--------:|------:|------:|------:|----------:|
46-
// | SharpZipLibCalculate | .NET 4.7.2 | 1024 | 847.94 ns | 180.284 ns | 9.882 ns | 1.00 | 0.00 | - | - | - | - |
47-
// | SixLaborsCalculate | .NET 4.7.2 | 1024 | 458.80 ns | 146.235 ns | 8.016 ns | 0.54 | 0.02 | - | - | - | - |
48-
// | | | | | | | | | | | | |
49-
// | SharpZipLibCalculate | .NET Core 2.1 | 1024 | 817.11 ns | 31.211 ns | 1.711 ns | 1.00 | 0.00 | - | - | - | - |
50-
// | SixLaborsCalculate | .NET Core 2.1 | 1024 | 421.48 ns | 86.149 ns | 4.722 ns | 0.52 | 0.01 | - | - | - | - |
51-
// | | | | | | | | | | | | |
52-
// | SharpZipLibCalculate | .NET Core 3.1 | 1024 | 879.38 ns | 37.804 ns | 2.072 ns | 1.00 | 0.00 | - | - | - | - |
53-
// | SixLaborsCalculate | .NET Core 3.1 | 1024 | 57.27 ns | 2.008 ns | 0.110 ns | 0.07 | 0.00 | - | - | - | - |
54-
// | | | | | | | | | | | | |
55-
// | SharpZipLibCalculate | .NET 4.7.2 | 2048 | 1,660.62 ns | 46.912 ns | 2.571 ns | 1.00 | 0.00 | - | - | - | - |
56-
// | SixLaborsCalculate | .NET 4.7.2 | 2048 | 938.41 ns | 3,137.008 ns | 171.950 ns | 0.57 | 0.10 | - | - | - | - |
57-
// | | | | | | | | | | | | |
58-
// | SharpZipLibCalculate | .NET Core 2.1 | 2048 | 1,616.69 ns | 172.974 ns | 9.481 ns | 1.00 | 0.00 | - | - | - | - |
59-
// | SixLaborsCalculate | .NET Core 2.1 | 2048 | 871.52 ns | 485.678 ns | 26.622 ns | 0.54 | 0.02 | - | - | - | - |
60-
// | | | | | | | | | | | | |
61-
// | SharpZipLibCalculate | .NET Core 3.1 | 2048 | 1,746.34 ns | 110.539 ns | 6.059 ns | 1.00 | 0.00 | - | - | - | - |
62-
// | SixLaborsCalculate | .NET Core 3.1 | 2048 | 96.31 ns | 24.491 ns | 1.342 ns | 0.06 | 0.00 | - | - | - | - |
63-
// | | | | | | | | | | | | |
64-
// | SharpZipLibCalculate | .NET 4.7.2 | 4096 | 3,102.18 ns | 484.204 ns | 26.541 ns | 1.00 | 0.00 | - | - | - | - |
65-
// | SixLaborsCalculate | .NET 4.7.2 | 4096 | 1,729.49 ns | 104.446 ns | 5.725 ns | 0.56 | 0.00 | - | - | - | - |
66-
// | | | | | | | | | | | | |
67-
// | SharpZipLibCalculate | .NET Core 2.1 | 4096 | 3,251.55 ns | 607.086 ns | 33.276 ns | 1.00 | 0.00 | - | - | - | - |
68-
// | SixLaborsCalculate | .NET Core 2.1 | 4096 | 1,669.22 ns | 25.194 ns | 1.381 ns | 0.51 | 0.01 | - | - | - | - |
69-
// | | | | | | | | | | | | |
70-
// | SharpZipLibCalculate | .NET Core 3.1 | 4096 | 3,514.15 ns | 719.548 ns | 39.441 ns | 1.00 | 0.00 | - | - | - | - |
71-
// | SixLaborsCalculate | .NET Core 3.1 | 4096 | 180.12 ns | 55.425 ns | 3.038 ns | 0.05 | 0.00 | - | - | - | - |
44+
// | Method | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
45+
// |--------------------- |-------------- |------ |------------:|------------:|----------:|------:|--------:|------:|------:|------:|----------:|
46+
// | SharpZipLibCalculate | .NET 4.7.2 | 1024 | 793.18 ns | 775.66 ns | 42.516 ns | 1.00 | 0.00 | - | - | - | - |
47+
// | SixLaborsCalculate | .NET 4.7.2 | 1024 | 384.86 ns | 15.64 ns | 0.857 ns | 0.49 | 0.03 | - | - | - | - |
48+
// | | | | | | | | | | | | |
49+
// | SharpZipLibCalculate | .NET Core 2.1 | 1024 | 790.31 ns | 353.34 ns | 19.368 ns | 1.00 | 0.00 | - | - | - | - |
50+
// | SixLaborsCalculate | .NET Core 2.1 | 1024 | 465.28 ns | 652.41 ns | 35.761 ns | 0.59 | 0.03 | - | - | - | - |
51+
// | | | | | | | | | | | | |
52+
// | SharpZipLibCalculate | .NET Core 3.1 | 1024 | 877.25 ns | 97.89 ns | 5.365 ns | 1.00 | 0.00 | - | - | - | - |
53+
// | SixLaborsCalculate | .NET Core 3.1 | 1024 | 45.60 ns | 13.28 ns | 0.728 ns | 0.05 | 0.00 | - | - | - | - |
54+
// | | | | | | | | | | | | |
55+
// | SharpZipLibCalculate | .NET 4.7.2 | 2048 | 1,537.04 ns | 428.44 ns | 23.484 ns | 1.00 | 0.00 | - | - | - | - |
56+
// | SixLaborsCalculate | .NET 4.7.2 | 2048 | 849.76 ns | 1,066.34 ns | 58.450 ns | 0.55 | 0.04 | - | - | - | - |
57+
// | | | | | | | | | | | | |
58+
// | SharpZipLibCalculate | .NET Core 2.1 | 2048 | 1,616.97 ns | 276.70 ns | 15.167 ns | 1.00 | 0.00 | - | - | - | - |
59+
// | SixLaborsCalculate | .NET Core 2.1 | 2048 | 790.77 ns | 691.71 ns | 37.915 ns | 0.49 | 0.03 | - | - | - | - |
60+
// | | | | | | | | | | | | |
61+
// | SharpZipLibCalculate | .NET Core 3.1 | 2048 | 1,735.11 ns | 1,374.22 ns | 75.325 ns | 1.00 | 0.00 | - | - | - | - |
62+
// | SixLaborsCalculate | .NET Core 3.1 | 2048 | 87.80 ns | 56.84 ns | 3.116 ns | 0.05 | 0.00 | - | - | - | - |
63+
// | | | | | | | | | | | | |
64+
// | SharpZipLibCalculate | .NET 4.7.2 | 4096 | 3,054.53 ns | 796.41 ns | 43.654 ns | 1.00 | 0.00 | - | - | - | - |
65+
// | SixLaborsCalculate | .NET 4.7.2 | 4096 | 1,538.90 ns | 487.02 ns | 26.695 ns | 0.50 | 0.01 | - | - | - | - |
66+
// | | | | | | | | | | | | |
67+
// | SharpZipLibCalculate | .NET Core 2.1 | 4096 | 3,223.48 ns | 32.32 ns | 1.771 ns | 1.00 | 0.00 | - | - | - | - |
68+
// | SixLaborsCalculate | .NET Core 2.1 | 4096 | 1,547.60 ns | 309.72 ns | 16.977 ns | 0.48 | 0.01 | - | - | - | - |
69+
// | | | | | | | | | | | | |
70+
// | SharpZipLibCalculate | .NET Core 3.1 | 4096 | 3,672.33 ns | 1,095.81 ns | 60.065 ns | 1.00 | 0.00 | - | - | - | - |
71+
// | SixLaborsCalculate | .NET Core 3.1 | 4096 | 159.44 ns | 36.31 ns | 1.990 ns | 0.04 | 0.00 | - | - | - | - |
7272
}

0 commit comments

Comments
 (0)