Skip to content

Commit b37044f

Browse files
Merge pull request #1411 from tkp1n/tkp1n/avx2-color-converter
Vectorize (AVX2) JPEG Color Converter
2 parents 9f51a92 + 0aa3ba5 commit b37044f

29 files changed

+1643
-455
lines changed

src/ImageSharp/Common/Helpers/SimdUtils.cs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,25 @@ internal static partial class SimdUtils
2525
public static bool HasVector8 { get; } =
2626
Vector.IsHardwareAccelerated && Vector<float>.Count == 8 && Vector<int>.Count == 8;
2727

28+
/// <summary>
29+
/// Gets a value indicating whether <see cref="Vector{T}"/> code is being JIT-ed to SSE instructions
30+
/// where float and integer registers are of size 128 byte.
31+
/// </summary>
32+
public static bool HasVector4 { get; } =
33+
Vector.IsHardwareAccelerated && Vector<float>.Count == 4;
34+
35+
public static bool HasAvx2
36+
{
37+
get
38+
{
39+
#if SUPPORTS_RUNTIME_INTRINSICS
40+
return Avx2.IsSupported;
41+
#else
42+
return false;
43+
#endif
44+
}
45+
}
46+
2847
/// <summary>
2948
/// Transform all scalars in 'v' in a way that converting them to <see cref="int"/> would have rounding semantics.
3049
/// </summary>
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// Copyright (c) Six Labors.
2+
// Licensed under the Apache License, Version 2.0.
3+
4+
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
5+
{
6+
internal abstract partial class JpegColorConverter
7+
{
8+
internal abstract class Avx2JpegColorConverter : VectorizedJpegColorConverter
9+
{
10+
protected Avx2JpegColorConverter(JpegColorSpace colorSpace, int precision)
11+
: base(colorSpace, precision, 8)
12+
{
13+
}
14+
15+
protected sealed override bool IsAvailable => SimdUtils.HasAvx2;
16+
}
17+
}
18+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// Copyright (c) Six Labors.
2+
// Licensed under the Apache License, Version 2.0.
3+
4+
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
5+
{
6+
internal abstract partial class JpegColorConverter
7+
{
8+
internal abstract class BasicJpegColorConverter : JpegColorConverter
9+
{
10+
protected BasicJpegColorConverter(JpegColorSpace colorSpace, int precision)
11+
: base(colorSpace, precision)
12+
{
13+
}
14+
15+
protected override bool IsAvailable => true;
16+
}
17+
}
18+
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// Copyright (c) Six Labors.
2+
// Licensed under the Apache License, Version 2.0.
3+
4+
using System;
5+
using System.Numerics;
6+
using System.Runtime.CompilerServices;
7+
using System.Runtime.InteropServices;
8+
#if SUPPORTS_RUNTIME_INTRINSICS
9+
using System.Runtime.Intrinsics;
10+
using System.Runtime.Intrinsics.X86;
11+
using static SixLabors.ImageSharp.SimdUtils;
12+
#endif
13+
14+
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
15+
{
16+
internal abstract partial class JpegColorConverter
17+
{
18+
internal sealed class FromCmykAvx2 : Avx2JpegColorConverter
19+
{
20+
public FromCmykAvx2(int precision)
21+
: base(JpegColorSpace.Cmyk, precision)
22+
{
23+
}
24+
25+
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
26+
{
27+
#if SUPPORTS_RUNTIME_INTRINSICS
28+
ref Vector256<float> cBase =
29+
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
30+
ref Vector256<float> mBase =
31+
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
32+
ref Vector256<float> yBase =
33+
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
34+
ref Vector256<float> kBase =
35+
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component3));
36+
37+
ref Vector256<float> resultBase =
38+
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
39+
40+
// Used for the color conversion
41+
var scale = Vector256.Create(1 / this.MaximumValue);
42+
var one = Vector256.Create(1F);
43+
44+
// Used for packing
45+
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
46+
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
47+
48+
int n = result.Length / 8;
49+
for (int i = 0; i < n; i++)
50+
{
51+
Vector256<float> k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol);
52+
Vector256<float> c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol);
53+
Vector256<float> m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol);
54+
Vector256<float> y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol);
55+
56+
k = Avx.Multiply(k, scale);
57+
58+
c = Avx.Multiply(Avx.Multiply(c, k), scale);
59+
m = Avx.Multiply(Avx.Multiply(m, k), scale);
60+
y = Avx.Multiply(Avx.Multiply(y, k), scale);
61+
62+
Vector256<float> cmLo = Avx.UnpackLow(c, m);
63+
Vector256<float> yoLo = Avx.UnpackLow(y, one);
64+
Vector256<float> cmHi = Avx.UnpackHigh(c, m);
65+
Vector256<float> yoHi = Avx.UnpackHigh(y, one);
66+
67+
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
68+
69+
destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00);
70+
Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10);
71+
Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00);
72+
Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10);
73+
}
74+
#endif
75+
}
76+
77+
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
78+
FromCmykBasic.ConvertCore(values, result, this.MaximumValue);
79+
}
80+
}
81+
}
Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,32 +8,36 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
88
{
99
internal abstract partial class JpegColorConverter
1010
{
11-
internal sealed class FromCmyk : JpegColorConverter
11+
internal sealed class FromCmykBasic : BasicJpegColorConverter
1212
{
13-
public FromCmyk(int precision)
13+
public FromCmykBasic(int precision)
1414
: base(JpegColorSpace.Cmyk, precision)
1515
{
1616
}
1717

1818
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
1919
{
20-
// TODO: We can optimize a lot here with Vector<float> and SRCS.Unsafe()!
20+
ConvertCore(values, result, this.MaximumValue);
21+
}
22+
23+
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
24+
{
2125
ReadOnlySpan<float> cVals = values.Component0;
2226
ReadOnlySpan<float> mVals = values.Component1;
2327
ReadOnlySpan<float> yVals = values.Component2;
2428
ReadOnlySpan<float> kVals = values.Component3;
2529

2630
var v = new Vector4(0, 0, 0, 1F);
2731

28-
var maximum = 1 / this.MaximumValue;
32+
var maximum = 1 / maxValue;
2933
var scale = new Vector4(maximum, maximum, maximum, 1F);
3034

3135
for (int i = 0; i < result.Length; i++)
3236
{
3337
float c = cVals[i];
3438
float m = mVals[i];
3539
float y = yVals[i];
36-
float k = kVals[i] / this.MaximumValue;
40+
float k = kVals[i] / maxValue;
3741

3842
v.X = c * k;
3943
v.Y = m * k;
@@ -47,4 +51,4 @@ public override void ConvertToRgba(in ComponentValues values, Span<Vector4> resu
4751
}
4852
}
4953
}
50-
}
54+
}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// Copyright (c) Six Labors.
2+
// Licensed under the Apache License, Version 2.0.
3+
4+
using System;
5+
using System.Numerics;
6+
using System.Runtime.CompilerServices;
7+
using System.Runtime.InteropServices;
8+
using SixLabors.ImageSharp.Tuples;
9+
10+
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
11+
{
12+
internal abstract partial class JpegColorConverter
13+
{
14+
internal sealed class FromCmykVector8 : Vector8JpegColorConverter
15+
{
16+
public FromCmykVector8(int precision)
17+
: base(JpegColorSpace.Cmyk, precision)
18+
{
19+
}
20+
21+
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
22+
{
23+
ref Vector<float> cBase =
24+
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
25+
ref Vector<float> mBase =
26+
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
27+
ref Vector<float> yBase =
28+
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
29+
ref Vector<float> kBase =
30+
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component3));
31+
32+
ref Vector4Octet resultBase =
33+
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
34+
35+
Vector4Pair cc = default;
36+
Vector4Pair mm = default;
37+
Vector4Pair yy = default;
38+
ref Vector<float> ccRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref cc);
39+
ref Vector<float> mmRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref mm);
40+
ref Vector<float> yyRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref yy);
41+
42+
var scale = new Vector<float>(1 / this.MaximumValue);
43+
44+
// Walking 8 elements at one step:
45+
int n = result.Length / 8;
46+
for (int i = 0; i < n; i++)
47+
{
48+
Vector<float> c = Unsafe.Add(ref cBase, i);
49+
Vector<float> m = Unsafe.Add(ref mBase, i);
50+
Vector<float> y = Unsafe.Add(ref yBase, i);
51+
Vector<float> k = Unsafe.Add(ref kBase, i) * scale;
52+
53+
c = (c * k) * scale;
54+
m = (m * k) * scale;
55+
y = (y * k) * scale;
56+
57+
ccRefAsVector = c;
58+
mmRefAsVector = m;
59+
yyRefAsVector = y;
60+
61+
// Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
62+
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
63+
destination.Pack(ref cc, ref mm, ref yy);
64+
}
65+
}
66+
67+
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
68+
FromCmykBasic.ConvertCore(values, result, this.MaximumValue);
69+
}
70+
}
71+
}
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// Copyright (c) Six Labors.
2+
// Licensed under the Apache License, Version 2.0.
3+
4+
using System;
5+
using System.Numerics;
6+
using System.Runtime.CompilerServices;
7+
using System.Runtime.InteropServices;
8+
#if SUPPORTS_RUNTIME_INTRINSICS
9+
using System.Runtime.Intrinsics;
10+
using System.Runtime.Intrinsics.X86;
11+
using static SixLabors.ImageSharp.SimdUtils;
12+
#endif
13+
14+
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
15+
{
16+
internal abstract partial class JpegColorConverter
17+
{
18+
internal sealed class FromGrayscaleAvx2 : Avx2JpegColorConverter
19+
{
20+
public FromGrayscaleAvx2(int precision)
21+
: base(JpegColorSpace.Grayscale, precision)
22+
{
23+
}
24+
25+
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
26+
{
27+
#if SUPPORTS_RUNTIME_INTRINSICS
28+
ref Vector256<float> gBase =
29+
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
30+
31+
ref Vector256<float> resultBase =
32+
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
33+
34+
// Used for the color conversion
35+
var scale = Vector256.Create(1 / this.MaximumValue);
36+
var one = Vector256.Create(1F);
37+
38+
// Used for packing
39+
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
40+
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
41+
42+
int n = result.Length / 8;
43+
for (int i = 0; i < n; i++)
44+
{
45+
Vector256<float> g = Avx.Multiply(Unsafe.Add(ref gBase, i), scale);
46+
47+
g = Avx2.PermuteVar8x32(g, vcontrol);
48+
49+
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
50+
51+
destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000);
52+
Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Shuffle(g, g, 0b01_01_01_01), one, 0b1000_1000);
53+
Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Shuffle(g, g, 0b10_10_10_10), one, 0b1000_1000);
54+
Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Shuffle(g, g, 0b11_11_11_11), one, 0b1000_1000);
55+
}
56+
#endif
57+
}
58+
59+
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
60+
FromGrayscaleBasic.ConvertCore(values, result, this.MaximumValue);
61+
}
62+
}
63+
}
Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,21 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
1010
{
1111
internal abstract partial class JpegColorConverter
1212
{
13-
internal sealed class FromGrayscale : JpegColorConverter
13+
internal sealed class FromGrayscaleBasic : BasicJpegColorConverter
1414
{
15-
public FromGrayscale(int precision)
15+
public FromGrayscaleBasic(int precision)
1616
: base(JpegColorSpace.Grayscale, precision)
1717
{
1818
}
1919

2020
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
2121
{
22-
var maximum = 1 / this.MaximumValue;
22+
ConvertCore(values, result, this.MaximumValue);
23+
}
24+
25+
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
26+
{
27+
var maximum = 1 / maxValue;
2328
var scale = new Vector4(maximum, maximum, maximum, 1F);
2429

2530
ref float sBase = ref MemoryMarshal.GetReference(values.Component0);
@@ -35,4 +40,4 @@ public override void ConvertToRgba(in ComponentValues values, Span<Vector4> resu
3540
}
3641
}
3742
}
38-
}
43+
}

0 commit comments

Comments
 (0)