Skip to content

Commit 1a3e1e7

Browse files
committed
More codegen improvements to shared methods
1 parent 979baf7 commit 1a3e1e7

File tree

3 files changed

+138
-107
lines changed

3 files changed

+138
-107
lines changed

src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) Six Labors.
1+
// Copyright (c) Six Labors.
22
// Licensed under the Apache License, Version 2.0.
33

44
using System;
@@ -25,12 +25,14 @@ public static class SRgbCompanding
2525
[MethodImpl(InliningOptions.ShortMethod)]
2626
public static void Expand(Span<Vector4> vectors)
2727
{
28-
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
28+
ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
29+
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
2930

30-
for (int i = 0; i < vectors.Length; i++)
31+
while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
3132
{
32-
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
33-
Expand(ref v);
33+
Expand(ref vectorsStart);
34+
35+
vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
3436
}
3537
}
3638

@@ -41,12 +43,14 @@ public static void Expand(Span<Vector4> vectors)
4143
[MethodImpl(InliningOptions.ShortMethod)]
4244
public static void Compress(Span<Vector4> vectors)
4345
{
44-
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
46+
ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
47+
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
4548

46-
for (int i = 0; i < vectors.Length; i++)
49+
while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
4750
{
48-
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
49-
Compress(ref v);
51+
Compress(ref vectorsStart);
52+
53+
vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
5054
}
5155
}
5256

@@ -90,4 +94,4 @@ public static void Compress(ref Vector4 vector)
9094
[MethodImpl(InliningOptions.ShortMethod)]
9195
public static float Compress(float channel) => channel <= 0.0031308F ? 12.92F * channel : (1.055F * MathF.Pow(channel, 0.416666666666667F)) - 0.055F;
9296
}
93-
}
97+
}

src/ImageSharp/Common/Helpers/Numerics.cs

Lines changed: 95 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,11 @@ public static int GreatestCommonDivisor(int a, int b)
4141

4242
/// <summary>
4343
/// Determine the Least Common Multiple (LCM) of two numbers.
44+
/// See https://en.wikipedia.org/wiki/Least_common_multiple#Reduction_by_the_greatest_common_divisor.
4445
/// </summary>
4546
[MethodImpl(MethodImplOptions.AggressiveInlining)]
4647
public static int LeastCommonMultiple(int a, int b)
47-
{
48-
// https://en.wikipedia.org/wiki/Least_common_multiple#Reduction_by_the_greatest_common_divisor
49-
return (a / GreatestCommonDivisor(a, b)) * b;
50-
}
48+
=> a / GreatestCommonDivisor(a, b) * b;
5149

5250
/// <summary>
5351
/// Calculates <paramref name="x"/> % 2
@@ -290,10 +288,14 @@ public static void Clamp(Span<byte> span, byte min, byte max)
290288

291289
if (remainder.Length > 0)
292290
{
293-
for (int i = 0; i < remainder.Length; i++)
291+
ref byte remainderStart = ref MemoryMarshal.GetReference(remainder);
292+
ref byte remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
293+
294+
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
294295
{
295-
ref byte v = ref remainder[i];
296-
v = Clamp(v, min, max);
296+
remainderStart = Clamp(remainderStart, min, max);
297+
298+
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
297299
}
298300
}
299301
}
@@ -311,10 +313,14 @@ public static void Clamp(Span<uint> span, uint min, uint max)
311313

312314
if (remainder.Length > 0)
313315
{
314-
for (int i = 0; i < remainder.Length; i++)
316+
ref uint remainderStart = ref MemoryMarshal.GetReference(remainder);
317+
ref uint remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
318+
319+
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
315320
{
316-
ref uint v = ref remainder[i];
317-
v = Clamp(v, min, max);
321+
remainderStart = Clamp(remainderStart, min, max);
322+
323+
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
318324
}
319325
}
320326
}
@@ -332,10 +338,14 @@ public static void Clamp(Span<int> span, int min, int max)
332338

333339
if (remainder.Length > 0)
334340
{
335-
for (int i = 0; i < remainder.Length; i++)
341+
ref int remainderStart = ref MemoryMarshal.GetReference(remainder);
342+
ref int remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
343+
344+
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
336345
{
337-
ref int v = ref remainder[i];
338-
v = Clamp(v, min, max);
346+
remainderStart = Clamp(remainderStart, min, max);
347+
348+
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
339349
}
340350
}
341351
}
@@ -353,10 +363,14 @@ public static void Clamp(Span<float> span, float min, float max)
353363

354364
if (remainder.Length > 0)
355365
{
356-
for (int i = 0; i < remainder.Length; i++)
366+
ref float remainderStart = ref MemoryMarshal.GetReference(remainder);
367+
ref float remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
368+
369+
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
357370
{
358-
ref float v = ref remainder[i];
359-
v = Clamp(v, min, max);
371+
remainderStart = Clamp(remainderStart, min, max);
372+
373+
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
360374
}
361375
}
362376
}
@@ -374,10 +388,14 @@ public static void Clamp(Span<double> span, double min, double max)
374388

375389
if (remainder.Length > 0)
376390
{
377-
for (int i = 0; i < remainder.Length; i++)
391+
ref double remainderStart = ref MemoryMarshal.GetReference(remainder);
392+
ref double remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
393+
394+
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
378395
{
379-
ref double v = ref remainder[i];
380-
v = Clamp(v, min, max);
396+
remainderStart = Clamp(remainderStart, min, max);
397+
398+
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
381399
}
382400
}
383401
}
@@ -472,10 +490,8 @@ public static void Premultiply(Span<Vector4> vectors)
472490
#if SUPPORTS_RUNTIME_INTRINSICS
473491
if (Avx2.IsSupported && vectors.Length >= 2)
474492
{
475-
ref Vector256<float> vectorsBase =
476-
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
477-
478493
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
494+
ref Vector256<float> vectorsBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
479495
ref Vector256<float> vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
480496

481497
while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
@@ -495,12 +511,14 @@ public static void Premultiply(Span<Vector4> vectors)
495511
else
496512
#endif
497513
{
498-
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
514+
ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
515+
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
499516

500-
for (int i = 0; i < vectors.Length; i++)
517+
while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
501518
{
502-
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
503-
Premultiply(ref v);
519+
Premultiply(ref vectorsStart);
520+
521+
vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
504522
}
505523
}
506524
}
@@ -515,10 +533,8 @@ public static void UnPremultiply(Span<Vector4> vectors)
515533
#if SUPPORTS_RUNTIME_INTRINSICS
516534
if (Avx2.IsSupported && vectors.Length >= 2)
517535
{
518-
ref Vector256<float> vectorsBase =
519-
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
520-
521536
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
537+
ref Vector256<float> vectorsBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
522538
ref Vector256<float> vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
523539

524540
while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
@@ -538,12 +554,14 @@ public static void UnPremultiply(Span<Vector4> vectors)
538554
else
539555
#endif
540556
{
541-
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
557+
ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
558+
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
542559

543-
for (int i = 0; i < vectors.Length; i++)
560+
while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
544561
{
545-
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
546-
UnPremultiply(ref v);
562+
UnPremultiply(ref vectorsStart);
563+
564+
vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
547565
}
548566
}
549567
}
@@ -633,53 +651,54 @@ public static unsafe void CubeRootOnXYZ(Span<Vector4> vectors)
633651
vectors128Ref = y4;
634652
vectors128Ref = ref Unsafe.Add(ref vectors128Ref, 1);
635653
}
636-
637-
return;
638654
}
655+
else
639656
#endif
640-
ref Vector4 vectorsRef = ref MemoryMarshal.GetReference(vectors);
641-
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsRef, vectors.Length);
642-
643-
// Fallback with scalar preprocessing and vectorized approximation steps
644-
while (Unsafe.IsAddressLessThan(ref vectorsRef, ref vectorsEnd))
645657
{
646-
Vector4 v = vectorsRef;
647-
648-
double
649-
x64 = v.X,
650-
y64 = v.Y,
651-
z64 = v.Z;
652-
float a = v.W;
658+
ref Vector4 vectorsRef = ref MemoryMarshal.GetReference(vectors);
659+
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsRef, vectors.Length);
653660

654-
ulong
655-
xl = *(ulong*)&x64,
656-
yl = *(ulong*)&y64,
657-
zl = *(ulong*)&z64;
658-
659-
// Here we use a trick to compute the starting value x0 for the cube root. This is because doing
660-
// pow(x, 1 / gamma) is the same as the gamma-th root of x, and since gamme is 3 in this case,
661-
// this means what we actually want is to find the cube root of our clamped values.
662-
// For more info on the constant below, see:
663-
// https://community.intel.com/t5/Intel-C-Compiler/Fast-approximate-of-transcendental-operations/td-p/1044543.
664-
// Here we perform the same trick on all RGB channels separately to help the CPU execute them in paralle, and
665-
// store the alpha channel to preserve it. Then we set these values to the fields of a temporary 128-bit
666-
// register, and use it to accelerate two steps of the Newton approximation using SIMD.
667-
xl = 0x2a9f8a7be393b600 + (xl / 3);
668-
yl = 0x2a9f8a7be393b600 + (yl / 3);
669-
zl = 0x2a9f8a7be393b600 + (zl / 3);
670-
671-
Vector4 y4;
672-
y4.X = (float)*(double*)&xl;
673-
y4.Y = (float)*(double*)&yl;
674-
y4.Z = (float)*(double*)&zl;
675-
y4.W = 0;
676-
677-
y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
678-
y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
679-
y4.W = a;
680-
681-
vectorsRef = y4;
682-
vectorsRef = ref Unsafe.Add(ref vectorsRef, 1);
661+
// Fallback with scalar preprocessing and vectorized approximation steps
662+
while (Unsafe.IsAddressLessThan(ref vectorsRef, ref vectorsEnd))
663+
{
664+
Vector4 v = vectorsRef;
665+
666+
double
667+
x64 = v.X,
668+
y64 = v.Y,
669+
z64 = v.Z;
670+
float a = v.W;
671+
672+
ulong
673+
xl = *(ulong*)&x64,
674+
yl = *(ulong*)&y64,
675+
zl = *(ulong*)&z64;
676+
677+
// Here we use a trick to compute the starting value x0 for the cube root. This is because doing
678+
// pow(x, 1 / gamma) is the same as the gamma-th root of x, and since gamme is 3 in this case,
679+
// this means what we actually want is to find the cube root of our clamped values.
680+
// For more info on the constant below, see:
681+
// https://community.intel.com/t5/Intel-C-Compiler/Fast-approximate-of-transcendental-operations/td-p/1044543.
682+
// Here we perform the same trick on all RGB channels separately to help the CPU execute them in paralle, and
683+
// store the alpha channel to preserve it. Then we set these values to the fields of a temporary 128-bit
684+
// register, and use it to accelerate two steps of the Newton approximation using SIMD.
685+
xl = 0x2a9f8a7be393b600 + (xl / 3);
686+
yl = 0x2a9f8a7be393b600 + (yl / 3);
687+
zl = 0x2a9f8a7be393b600 + (zl / 3);
688+
689+
Vector4 y4;
690+
y4.X = (float)*(double*)&xl;
691+
y4.Y = (float)*(double*)&yl;
692+
y4.Z = (float)*(double*)&zl;
693+
y4.W = 0;
694+
695+
y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
696+
y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
697+
y4.W = a;
698+
699+
vectorsRef = y4;
700+
vectorsRef = ref Unsafe.Add(ref vectorsRef, 1);
701+
}
683702
}
684703
}
685704
}

0 commit comments

Comments
 (0)