Skip to content

Commit 954d233

Browse files
Merge pull request #1517 from tkp1n/feature/vectorize-scale-to-dst-block
Vectorize Scale16X16To8X8
2 parents 7eb5cc0 + 4fb1859 commit 954d233

File tree

3 files changed

+88
-0
lines changed

3 files changed

+88
-0
lines changed

src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ public static class HwIntrinsics
1919

2020
public static ReadOnlySpan<byte> PermuteMaskEvenOdd8x32 => new byte[] { 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 };
2121

22+
public static ReadOnlySpan<byte> PermuteMaskSwitchInnerDWords8x32 => new byte[] { 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0 };
23+
2224
private static ReadOnlySpan<byte> ShuffleMaskPad4Nx16 => new byte[] { 0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80 };
2325

2426
private static ReadOnlySpan<byte> ShuffleMaskSlice4Nx16 => new byte[] { 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80 };

src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,57 @@ public static unsafe void Quantize(
485485
/// <param name="source">The source block.</param>
486486
public static unsafe void Scale16X16To8X8(ref Block8x8F destination, ReadOnlySpan<Block8x8F> source)
487487
{
488+
#if SUPPORTS_RUNTIME_INTRINSICS
489+
if (Avx2.IsSupported)
490+
{
491+
Scale16X16To8X8Vectorized(ref destination, source);
492+
return;
493+
}
494+
#endif
495+
496+
Scale16X16To8X8Scalar(ref destination, source);
497+
}
498+
499+
private static void Scale16X16To8X8Vectorized(ref Block8x8F destination, ReadOnlySpan<Block8x8F> source)
500+
{
501+
#if SUPPORTS_RUNTIME_INTRINSICS
502+
Debug.Assert(Avx2.IsSupported, "AVX2 is required to execute this method");
503+
504+
var f2 = Vector256.Create(2f);
505+
var f025 = Vector256.Create(0.25f);
506+
Vector256<int> switchInnerDoubleWords = Unsafe.As<byte, Vector256<int>>(ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskSwitchInnerDWords8x32));
507+
508+
ref Vector256<float> in1 = ref Unsafe.As<Block8x8F, Vector256<float>>(ref MemoryMarshal.GetReference(source));
509+
ref Vector256<float> in2 = ref Unsafe.As<Block8x8F, Vector256<float>>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), 1));
510+
ref Vector256<float> destRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref destination);
511+
512+
for (int i = 0; i < 8; i++)
513+
{
514+
Vector256<float> a = in1;
515+
Vector256<float> b = Unsafe.Add(ref in1, 1);
516+
Vector256<float> c = in2;
517+
Vector256<float> d = Unsafe.Add(ref in2, 1);
518+
519+
Vector256<float> calc1 = Avx.Shuffle(a, c, 0b10_00_10_00);
520+
Vector256<float> calc2 = Avx.Shuffle(a, c, 0b11_01_11_01);
521+
Vector256<float> calc3 = Avx.Shuffle(b, d, 0b10_00_10_00);
522+
Vector256<float> calc4 = Avx.Shuffle(b, d, 0b11_01_11_01);
523+
524+
Vector256<float> sum = Avx.Add(Avx.Add(calc1, calc2), Avx.Add(calc3, calc4));
525+
Vector256<float> add = Avx.Add(sum, f2);
526+
Vector256<float> res = Avx.Multiply(add, f025);
527+
528+
destRef = Avx2.PermuteVar8x32(res, switchInnerDoubleWords);
529+
destRef = ref Unsafe.Add(ref destRef, 1);
530+
531+
in1 = ref Unsafe.Add(ref in1, 2);
532+
in2 = ref Unsafe.Add(ref in2, 2);
533+
}
534+
#endif
535+
}
536+
537+
private static unsafe void Scale16X16To8X8Scalar(ref Block8x8F destination, ReadOnlySpan<Block8x8F> source)
538+
{
488539
for (int i = 0; i < 4; i++)
489540
{
490541
int dstOff = ((i & 2) << 4) | ((i & 1) << 2);
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
using System;
2+
using BenchmarkDotNet.Attributes;
3+
using SixLabors.ImageSharp.Formats.Jpeg.Components;
4+
5+
namespace SixLabors.ImageSharp.Benchmarks.Format.Jpeg.Components
6+
{
7+
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
8+
public class Block8x8F_Scale16X16To8X8
9+
{
10+
private Block8x8F source;
11+
private readonly Block8x8F[] target = new Block8x8F[4];
12+
13+
[GlobalSetup]
14+
public void Setup()
15+
{
16+
var random = new Random();
17+
18+
float[] f = new float[8*8];
19+
for (int i = 0; i < f.Length; i++)
20+
{
21+
f[i] = (float)random.NextDouble();
22+
}
23+
24+
for (int i = 0; i < 4; i++)
25+
{
26+
this.target[i] = Block8x8F.Load(f);
27+
}
28+
29+
this.source = Block8x8F.Load(f);
30+
}
31+
32+
[Benchmark]
33+
public void Scale16X16To8X8() => Block8x8F.Scale16X16To8X8(ref this.source, this.target);
34+
}
35+
}

0 commit comments

Comments
 (0)