|
1 | | -// Copyright (c) Six Labors. |
| 1 | +// Copyright (c) Six Labors. |
2 | 2 | // Licensed under the Apache License, Version 2.0. |
3 | 3 |
|
4 | 4 | using System; |
5 | 5 | using System.Numerics; |
6 | 6 | using System.Runtime.CompilerServices; |
7 | 7 | using System.Runtime.InteropServices; |
8 | | - |
| 8 | +#if SUPPORTS_RUNTIME_INTRINSICS |
| 9 | +using System.Runtime.Intrinsics; |
| 10 | +using System.Runtime.Intrinsics.X86; |
| 11 | +using static SixLabors.ImageSharp.SimdUtils; |
| 12 | +#endif |
9 | 13 | using SixLabors.ImageSharp.Tuples; |
10 | 14 |
|
11 | 15 | // ReSharper disable ImpureMethodCallOnReadonlyValueField |
@@ -47,6 +51,73 @@ internal static void ConvertCore(in ComponentValues values, Span<Vector4> result |
47 | 51 | "JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!"); |
48 | 52 | } |
49 | 53 |
|
| 54 | +#if SUPPORTS_RUNTIME_INTRINSICS |
| 55 | + ref Vector256<float> yBase = |
| 56 | + ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
| 57 | + ref Vector256<float> cbBase = |
| 58 | + ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1)); |
| 59 | + ref Vector256<float> crBase = |
| 60 | + ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2)); |
| 61 | + |
| 62 | + ref Vector256<float> resultBase = |
| 63 | + ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result)); |
| 64 | + |
| 65 | + // Used for the color conversion |
| 66 | + var chromaOffset = Vector256.Create(-halfValue); |
| 67 | + var scale = Vector256.Create(1 / maxValue); |
| 68 | + var rCrMult = Vector256.Create(1.402F); |
| 69 | + var gCbMult = Vector256.Create(-0.344136F); |
| 70 | + var gCrMult = Vector256.Create(-0.714136F); |
| 71 | + var bCbMult = Vector256.Create(1.772F); |
| 72 | + |
| 73 | + // Used for packing. |
| 74 | + var va = Vector256.Create(1F); |
| 75 | + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); |
| 76 | + Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control); |
| 77 | + |
| 78 | + // Walking 8 elements at one step: |
| 79 | + int n = result.Length / 8; |
| 80 | + for (int i = 0; i < n; i++) |
| 81 | + { |
| 82 | + // y = yVals[i]; |
| 83 | + // cb = cbVals[i] - 128F; |
| 84 | + // cr = crVals[i] - 128F; |
| 85 | + Vector256<float> y = Unsafe.Add(ref yBase, i); |
| 86 | + Vector256<float> cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); |
| 87 | + Vector256<float> cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); |
| 88 | + |
| 89 | + y = Avx2.PermuteVar8x32(y, vcontrol); |
| 90 | + cb = Avx2.PermuteVar8x32(cb, vcontrol); |
| 91 | + cr = Avx2.PermuteVar8x32(cr, vcontrol); |
| 92 | + |
| 93 | + // r = y + (1.402F * cr); |
| 94 | + // g = y - (0.344136F * cb) - (0.714136F * cr); |
| 95 | + // b = y + (1.772F * cb); |
| 96 | + // Adding & multiplying 8 elements at one time: |
| 97 | + Vector256<float> r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); |
| 98 | + Vector256<float> g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); |
| 99 | + Vector256<float> b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); |
| 100 | + |
| 101 | + // TODO: We should be savving to RGBA not Vector4 |
| 102 | + r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale); |
| 103 | + g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); |
| 104 | + b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); |
| 105 | + |
| 106 | + Vector256<float> vte = Avx.UnpackLow(r, b); |
| 107 | + Vector256<float> vto = Avx.UnpackLow(g, va); |
| 108 | + |
| 109 | + ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4); |
| 110 | + |
| 111 | + destination = Avx.UnpackLow(vte, vto); |
| 112 | + Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); |
| 113 | + |
| 114 | + vte = Avx.UnpackHigh(r, b); |
| 115 | + vto = Avx.UnpackHigh(g, va); |
| 116 | + |
| 117 | + Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); |
| 118 | + Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); |
| 119 | + } |
| 120 | +#else |
50 | 121 | ref Vector<float> yBase = |
51 | 122 | ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
52 | 123 | ref Vector<float> cbBase = |
@@ -104,6 +175,7 @@ internal static void ConvertCore(in ComponentValues values, Span<Vector4> result |
104 | 175 | ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); |
105 | 176 | destination.Pack(ref rr, ref gg, ref bb); |
106 | 177 | } |
| 178 | +#endif |
107 | 179 | } |
108 | 180 | } |
109 | 181 | } |
|
0 commit comments