More codegen improvements to shared methods

Sergio0694 · Sergio0694 · commit 1a3e1e7a66a2 · 2020-12-15T23:16:26.000+01:00
diff --git a/src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs b/src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs
@@ -1,4 +1,4 @@
-﻿// Copyright (c) Six Labors.
+// Copyright (c) Six Labors.
 // Licensed under the Apache License, Version 2.0.
 
 using System;
@@ -25,12 +25,14 @@ public static class SRgbCompanding
         [MethodImpl(InliningOptions.ShortMethod)]
         public static void Expand(Span<Vector4> vectors)
         {
-            ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
+            ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
+            ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
 
-            for (int i = 0; i < vectors.Length; i++)
+            while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
             {
-                ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
-                Expand(ref v);
+                Expand(ref vectorsStart);
+
+                vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
             }
         }
 
@@ -41,12 +43,14 @@ public static void Expand(Span<Vector4> vectors)
         [MethodImpl(InliningOptions.ShortMethod)]
         public static void Compress(Span<Vector4> vectors)
         {
-            ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
+            ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
+            ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
 
-            for (int i = 0; i < vectors.Length; i++)
+            while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
             {
-                ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
-                Compress(ref v);
+                Compress(ref vectorsStart);
+
+                vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
             }
         }
 
@@ -90,4 +94,4 @@ public static void Compress(ref Vector4 vector)
         [MethodImpl(InliningOptions.ShortMethod)]
         public static float Compress(float channel) => channel <= 0.0031308F ? 12.92F * channel : (1.055F * MathF.Pow(channel, 0.416666666666667F)) - 0.055F;
     }
-}
+}
diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs
@@ -41,13 +41,11 @@ public static int GreatestCommonDivisor(int a, int b)
 
         /// <summary>
         /// Determine the Least Common Multiple (LCM) of two numbers.
+        /// See https://en.wikipedia.org/wiki/Least_common_multiple#Reduction_by_the_greatest_common_divisor.
         /// </summary>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static int LeastCommonMultiple(int a, int b)
-        {
-            // https://en.wikipedia.org/wiki/Least_common_multiple#Reduction_by_the_greatest_common_divisor
-            return (a / GreatestCommonDivisor(a, b)) * b;
-        }
+            => a / GreatestCommonDivisor(a, b) * b;
 
         /// <summary>
         /// Calculates <paramref name="x"/> % 2
@@ -290,10 +288,14 @@ public static void Clamp(Span<byte> span, byte min, byte max)
 
             if (remainder.Length > 0)
             {
-                for (int i = 0; i < remainder.Length; i++)
+                ref byte remainderStart = ref MemoryMarshal.GetReference(remainder);
+                ref byte remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
+
+                while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
                 {
-                    ref byte v = ref remainder[i];
-                    v = Clamp(v, min, max);
+                    remainderStart = Clamp(remainderStart, min, max);
+
+                    remainderStart = ref Unsafe.Add(ref remainderStart, 1);
                 }
             }
         }
@@ -311,10 +313,14 @@ public static void Clamp(Span<uint> span, uint min, uint max)
 
             if (remainder.Length > 0)
             {
-                for (int i = 0; i < remainder.Length; i++)
+                ref uint remainderStart = ref MemoryMarshal.GetReference(remainder);
+                ref uint remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
+
+                while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
                 {
-                    ref uint v = ref remainder[i];
-                    v = Clamp(v, min, max);
+                    remainderStart = Clamp(remainderStart, min, max);
+
+                    remainderStart = ref Unsafe.Add(ref remainderStart, 1);
                 }
             }
         }
@@ -332,10 +338,14 @@ public static void Clamp(Span<int> span, int min, int max)
 
             if (remainder.Length > 0)
             {
-                for (int i = 0; i < remainder.Length; i++)
+                ref int remainderStart = ref MemoryMarshal.GetReference(remainder);
+                ref int remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
+
+                while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
                 {
-                    ref int v = ref remainder[i];
-                    v = Clamp(v, min, max);
+                    remainderStart = Clamp(remainderStart, min, max);
+
+                    remainderStart = ref Unsafe.Add(ref remainderStart, 1);
                 }
             }
         }
@@ -353,10 +363,14 @@ public static void Clamp(Span<float> span, float min, float max)
 
             if (remainder.Length > 0)
             {
-                for (int i = 0; i < remainder.Length; i++)
+                ref float remainderStart = ref MemoryMarshal.GetReference(remainder);
+                ref float remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
+
+                while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
                 {
-                    ref float v = ref remainder[i];
-                    v = Clamp(v, min, max);
+                    remainderStart = Clamp(remainderStart, min, max);
+
+                    remainderStart = ref Unsafe.Add(ref remainderStart, 1);
                 }
             }
         }
@@ -374,10 +388,14 @@ public static void Clamp(Span<double> span, double min, double max)
 
             if (remainder.Length > 0)
             {
-                for (int i = 0; i < remainder.Length; i++)
+                ref double remainderStart = ref MemoryMarshal.GetReference(remainder);
+                ref double remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
+
+                while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
                 {
-                    ref double v = ref remainder[i];
-                    v = Clamp(v, min, max);
+                    remainderStart = Clamp(remainderStart, min, max);
+
+                    remainderStart = ref Unsafe.Add(ref remainderStart, 1);
                 }
             }
         }
@@ -472,10 +490,8 @@ public static void Premultiply(Span<Vector4> vectors)
 #if SUPPORTS_RUNTIME_INTRINSICS
             if (Avx2.IsSupported && vectors.Length >= 2)
             {
-                ref Vector256<float> vectorsBase =
-                    ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
-
                 // Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
+                ref Vector256<float> vectorsBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
                 ref Vector256<float> vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
 
                 while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
@@ -495,12 +511,14 @@ public static void Premultiply(Span<Vector4> vectors)
             else
 #endif
             {
-                ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
+                ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
+                ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
 
-                for (int i = 0; i < vectors.Length; i++)
+                while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
                 {
-                    ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
-                    Premultiply(ref v);
+                    Premultiply(ref vectorsStart);
+
+                    vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
                 }
             }
         }
@@ -515,10 +533,8 @@ public static void UnPremultiply(Span<Vector4> vectors)
 #if SUPPORTS_RUNTIME_INTRINSICS
             if (Avx2.IsSupported && vectors.Length >= 2)
             {
-                ref Vector256<float> vectorsBase =
-                    ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
-
                 // Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
+                ref Vector256<float> vectorsBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
                 ref Vector256<float> vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
 
                 while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
@@ -538,12 +554,14 @@ public static void UnPremultiply(Span<Vector4> vectors)
             else
 #endif
             {
-                ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
+                ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
+                ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
 
-                for (int i = 0; i < vectors.Length; i++)
+                while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
                 {
-                    ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
-                    UnPremultiply(ref v);
+                    UnPremultiply(ref vectorsStart);
+
+                    vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
                 }
             }
         }
@@ -633,53 +651,54 @@ public static unsafe void CubeRootOnXYZ(Span<Vector4> vectors)
                     vectors128Ref = y4;
                     vectors128Ref = ref Unsafe.Add(ref vectors128Ref, 1);
                 }
-
-                return;
             }
+            else
 #endif
-            ref Vector4 vectorsRef = ref MemoryMarshal.GetReference(vectors);
-            ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsRef, vectors.Length);
-
-            // Fallback with scalar preprocessing and vectorized approximation steps
-            while (Unsafe.IsAddressLessThan(ref vectorsRef, ref vectorsEnd))
             {
-                Vector4 v = vectorsRef;
-
-                double
-                    x64 = v.X,
-                    y64 = v.Y,
-                    z64 = v.Z;
-                float a = v.W;
+                ref Vector4 vectorsRef = ref MemoryMarshal.GetReference(vectors);
+                ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsRef, vectors.Length);
 
-                ulong
-                    xl = *(ulong*)&x64,
-                    yl = *(ulong*)&y64,
-                    zl = *(ulong*)&z64;
-
-                // Here we use a trick to compute the starting value x0 for the cube root. This is because doing
-                // pow(x, 1 / gamma) is the same as the gamma-th root of x, and since gamme is 3 in this case,
-                // this means what we actually want is to find the cube root of our clamped values.
-                // For more info on the  constant below, see:
-                // https://community.intel.com/t5/Intel-C-Compiler/Fast-approximate-of-transcendental-operations/td-p/1044543.
-                // Here we perform the same trick on all RGB channels separately to help the CPU execute them in paralle, and
-                // store the alpha channel to preserve it. Then we set these values to the fields of a temporary 128-bit
-                // register, and use it to accelerate two steps of the Newton approximation using SIMD.
-                xl = 0x2a9f8a7be393b600 + (xl / 3);
-                yl = 0x2a9f8a7be393b600 + (yl / 3);
-                zl = 0x2a9f8a7be393b600 + (zl / 3);
-
-                Vector4 y4;
-                y4.X = (float)*(double*)&xl;
-                y4.Y = (float)*(double*)&yl;
-                y4.Z = (float)*(double*)&zl;
-                y4.W = 0;
-
-                y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
-                y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
-                y4.W = a;
-
-                vectorsRef = y4;
-                vectorsRef = ref Unsafe.Add(ref vectorsRef, 1);
+                // Fallback with scalar preprocessing and vectorized approximation steps
+                while (Unsafe.IsAddressLessThan(ref vectorsRef, ref vectorsEnd))
+                {
+                    Vector4 v = vectorsRef;
+
+                    double
+                        x64 = v.X,
+                        y64 = v.Y,
+                        z64 = v.Z;
+                    float a = v.W;
+
+                    ulong
+                        xl = *(ulong*)&x64,
+                        yl = *(ulong*)&y64,
+                        zl = *(ulong*)&z64;
+
+                    // Here we use a trick to compute the starting value x0 for the cube root. This is because doing
+                    // pow(x, 1 / gamma) is the same as the gamma-th root of x, and since gamme is 3 in this case,
+                    // this means what we actually want is to find the cube root of our clamped values.
+                    // For more info on the  constant below, see:
+                    // https://community.intel.com/t5/Intel-C-Compiler/Fast-approximate-of-transcendental-operations/td-p/1044543.
+                    // Here we perform the same trick on all RGB channels separately to help the CPU execute them in paralle, and
+                    // store the alpha channel to preserve it. Then we set these values to the fields of a temporary 128-bit
+                    // register, and use it to accelerate two steps of the Newton approximation using SIMD.
+                    xl = 0x2a9f8a7be393b600 + (xl / 3);
+                    yl = 0x2a9f8a7be393b600 + (yl / 3);
+                    zl = 0x2a9f8a7be393b600 + (zl / 3);
+
+                    Vector4 y4;
+                    y4.X = (float)*(double*)&xl;
+                    y4.Y = (float)*(double*)&yl;
+                    y4.Z = (float)*(double*)&zl;
+                    y4.W = 0;
+
+                    y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
+                    y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
+                    y4.W = a;
+
+                    vectorsRef = y4;
+                    vectorsRef = ref Unsafe.Add(ref vectorsRef, 1);
+                }
             }
         }
     }
diff --git a/src/ImageSharp/PixelFormats/Utils/Vector4Converters.Default.cs b/src/ImageSharp/PixelFormats/Utils/Vector4Converters.Default.cs

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-// Copyright (c) Six Labors.`
	`1`	`+// Copyright (c) Six Labors.`
`2`	`2`	`// Licensed under the Apache License, Version 2.0.`
`3`	`3`
`4`	`4`	`using System;`
`@@ -25,12 +25,14 @@ public static class SRgbCompanding`
`25`	`25`	`[MethodImpl(InliningOptions.ShortMethod)]`
`26`	`26`	`public static void Expand(Span<Vector4> vectors)`
`27`	`27`	`{`
`28`		`- ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);`
	`28`	`+ ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);`
	`29`	`+ ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);`
`29`	`30`
`30`		`- for (int i = 0; i < vectors.Length; i++)`
	`31`	`+ while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))`
`31`	`32`	`{`
`32`		`- ref Vector4 v = ref Unsafe.Add(ref baseRef, i);`
`33`		`- Expand(ref v);`
	`33`	`+ Expand(ref vectorsStart);`
	`34`	`+`
	`35`	`+ vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);`
`34`	`36`	`}`
`35`	`37`	`}`
`36`	`38`
`@@ -41,12 +43,14 @@ public static void Expand(Span<Vector4> vectors)`
`41`	`43`	`[MethodImpl(InliningOptions.ShortMethod)]`
`42`	`44`	`public static void Compress(Span<Vector4> vectors)`
`43`	`45`	`{`
`44`		`- ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);`
	`46`	`+ ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);`
	`47`	`+ ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);`
`45`	`48`
`46`		`- for (int i = 0; i < vectors.Length; i++)`
	`49`	`+ while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))`
`47`	`50`	`{`
`48`		`- ref Vector4 v = ref Unsafe.Add(ref baseRef, i);`
`49`		`- Compress(ref v);`
	`51`	`+ Compress(ref vectorsStart);`
	`52`	`+`
	`53`	`+ vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);`
`50`	`54`	`}`
`51`	`55`	`}`
`52`	`56`
`@@ -90,4 +94,4 @@ public static void Compress(ref Vector4 vector)`
`90`	`94`	`[MethodImpl(InliningOptions.ShortMethod)]`
`91`	`95`	`public static float Compress(float channel) => channel <= 0.0031308F ? 12.92F * channel : (1.055F * MathF.Pow(channel, 0.416666666666667F)) - 0.055F;`
`92`	`96`	`}`
`93`		`-}`
	`97`	`+}`