Skip to content

Commit 8806d6b

Browse files
committed
Add Avx version of CollectColorRedTransforms
1 parent c15e62c commit 8806d6b

File tree

1 file changed

+46
-1
lines changed

1 file changed

+46
-1
lines changed

src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ internal static unsafe class PredictorEncoder
3939

4040
private static readonly Vector128<byte> CollectColorRedTransformsAndMask = Vector128.Create((short)0xff).AsByte();
4141

42+
private static readonly Vector256<byte> CollectColorRedTransformsGreenMask256 = Vector256.Create(0x00ff00).AsByte();
43+
44+
private static readonly Vector256<byte> CollectColorRedTransformsAndMask256 = Vector256.Create((short)0xff).AsByte();
45+
4246
private static readonly Vector128<byte> CollectColorBlueTransformsGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
4347

4448
private static readonly Vector128<byte> CollectColorBlueTransformsGreenBlueMask = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
@@ -1071,7 +1075,48 @@ private static double GetPredictionCostCrossColorBlue(
10711075
private static void CollectColorRedTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span<int> histo)
10721076
{
10731077
#if SUPPORTS_RUNTIME_INTRINSICS
1074-
if (Sse41.IsSupported)
1078+
if (Avx2.IsSupported && tileWidth > 16)
1079+
{
1080+
var multsg = Vector256.Create(LosslessUtils.Cst5b(greenToRed));
1081+
const int span = 16;
1082+
Span<ushort> values = stackalloc ushort[span];
1083+
for (int y = 0; y < tileHeight; y++)
1084+
{
1085+
Span<uint> srcSpan = bgra.Slice(y * stride);
1086+
ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
1087+
for (int x = 0; x + span <= tileWidth; x += span)
1088+
{
1089+
int input0Idx = x;
1090+
int input1Idx = x + (span / 2);
1091+
Vector256<byte> input0 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
1092+
Vector256<byte> input1 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
1093+
Vector256<byte> g0 = Avx2.And(input0, CollectColorRedTransformsGreenMask256); // 0 0 | g 0
1094+
Vector256<byte> g1 = Avx2.And(input1, CollectColorRedTransformsGreenMask256);
1095+
Vector256<ushort> g = Avx2.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
1096+
Vector256<int> a0 = Avx2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
1097+
Vector256<int> a1 = Avx2.ShiftRightLogical(input1.AsInt32(), 16);
1098+
Vector256<ushort> a = Avx2.PackUnsignedSaturate(a0, a1); // x r
1099+
Vector256<short> b = Avx2.MultiplyHigh(g.AsInt16(), multsg); // x dr
1100+
Vector256<byte> c = Avx2.Subtract(a.AsByte(), b.AsByte()); // x r'
1101+
Vector256<byte> d = Avx2.And(c, CollectColorRedTransformsAndMask256); // 0 r'
1102+
1103+
ref ushort outputRef = ref MemoryMarshal.GetReference(values);
1104+
Unsafe.As<ushort, Vector256<ushort>>(ref outputRef) = d.AsUInt16();
1105+
1106+
for (int i = 0; i < span; i++)
1107+
{
1108+
++histo[values[i]];
1109+
}
1110+
}
1111+
}
1112+
1113+
int leftOver = tileWidth & (span - 1);
1114+
if (leftOver > 0)
1115+
{
1116+
CollectColorRedTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToRed, histo);
1117+
}
1118+
}
1119+
else if (Sse41.IsSupported)
10751120
{
10761121
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed));
10771122
const int span = 8;

0 commit comments

Comments
 (0)