Skip to content

Commit af4e4ff

Browse files
committed
Fix vectorized subsampling
1 parent 2d88f2c commit af4e4ff

File tree

1 file changed

+22
-19
lines changed

1 file changed

+22
-19
lines changed

src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -494,32 +494,35 @@ private static void Scale16X16To8X8Vectorized(ref Block8x8F destination, ReadOnl
494494
var f2 = Vector256.Create(2f);
495495
var f025 = Vector256.Create(0.25f);
496496
Vector256<int> switchInnerDoubleWords = Unsafe.As<byte, Vector256<int>>(ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskSwitchInnerDWords8x32));
497-
498-
ref Vector256<float> in1 = ref Unsafe.As<Block8x8F, Vector256<float>>(ref MemoryMarshal.GetReference(source));
499-
ref Vector256<float> in2 = ref Unsafe.As<Block8x8F, Vector256<float>>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), 1));
500497
ref Vector256<float> destRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref destination);
501498

502-
for (int i = 0; i < 8; i++)
499+
for (int i = 0; i < 2; i++)
503500
{
504-
Vector256<float> a = in1;
505-
Vector256<float> b = Unsafe.Add(ref in1, 1);
506-
Vector256<float> c = in2;
507-
Vector256<float> d = Unsafe.Add(ref in2, 1);
501+
ref Vector256<float> in1 = ref Unsafe.As<Block8x8F, Vector256<float>>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), 2 * i));
502+
ref Vector256<float> in2 = ref Unsafe.As<Block8x8F, Vector256<float>>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), (2 * i) + 1));
503+
504+
for (int j = 0; j < 4; j++)
505+
{
506+
Vector256<float> a = in1;
507+
Vector256<float> b = Unsafe.Add(ref in1, 1);
508+
Vector256<float> c = in2;
509+
Vector256<float> d = Unsafe.Add(ref in2, 1);
508510

509-
Vector256<float> calc1 = Avx.Shuffle(a, c, 0b10_00_10_00);
510-
Vector256<float> calc2 = Avx.Shuffle(a, c, 0b11_01_11_01);
511-
Vector256<float> calc3 = Avx.Shuffle(b, d, 0b10_00_10_00);
512-
Vector256<float> calc4 = Avx.Shuffle(b, d, 0b11_01_11_01);
511+
Vector256<float> calc1 = Avx.Shuffle(a, c, 0b10_00_10_00);
512+
Vector256<float> calc2 = Avx.Shuffle(a, c, 0b11_01_11_01);
513+
Vector256<float> calc3 = Avx.Shuffle(b, d, 0b10_00_10_00);
514+
Vector256<float> calc4 = Avx.Shuffle(b, d, 0b11_01_11_01);
513515

514-
Vector256<float> sum = Avx.Add(Avx.Add(calc1, calc2), Avx.Add(calc3, calc4));
515-
Vector256<float> add = Avx.Add(sum, f2);
516-
Vector256<float> res = Avx.Multiply(add, f025);
516+
Vector256<float> sum = Avx.Add(Avx.Add(calc1, calc2), Avx.Add(calc3, calc4));
517+
Vector256<float> add = Avx.Add(sum, f2);
518+
Vector256<float> res = Avx.Multiply(add, f025);
517519

518-
destRef = Avx2.PermuteVar8x32(res, switchInnerDoubleWords);
519-
destRef = ref Unsafe.Add(ref destRef, 1);
520+
destRef = Avx2.PermuteVar8x32(res, switchInnerDoubleWords);
521+
destRef = ref Unsafe.Add(ref destRef, 1);
520522

521-
in1 = ref Unsafe.Add(ref in1, 2);
522-
in2 = ref Unsafe.Add(ref in2, 2);
523+
in1 = ref Unsafe.Add(ref in1, 2);
524+
in2 = ref Unsafe.Add(ref in2, 2);
525+
}
523526
}
524527
#endif
525528
}

0 commit comments

Comments
 (0)