Skip to content

Commit 5bb01ac

Browse files
Merge pull request #1385 from SixLabors/js/feature-testing
Advanced Feature Testing
2 parents 4e61d87 + b3ce02c commit 5bb01ac

File tree

11 files changed

+804
-308
lines changed

11 files changed

+804
-308
lines changed

src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs

Lines changed: 0 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -10,86 +10,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
1010
{
1111
internal partial struct Block8x8F
1212
{
13-
/// <summary>
14-
/// Fallback method to transpose a block into the destination block on non AVX supported CPUs.
15-
/// </summary>
16-
/// <param name="d">The destination block</param>
17-
[MethodImpl(InliningOptions.ShortMethod)]
18-
public void TransposeIntoFallback(ref Block8x8F d)
19-
{
20-
d.V0L.X = V0L.X;
21-
d.V1L.X = V0L.Y;
22-
d.V2L.X = V0L.Z;
23-
d.V3L.X = V0L.W;
24-
d.V4L.X = V0R.X;
25-
d.V5L.X = V0R.Y;
26-
d.V6L.X = V0R.Z;
27-
d.V7L.X = V0R.W;
28-
29-
d.V0L.Y = V1L.X;
30-
d.V1L.Y = V1L.Y;
31-
d.V2L.Y = V1L.Z;
32-
d.V3L.Y = V1L.W;
33-
d.V4L.Y = V1R.X;
34-
d.V5L.Y = V1R.Y;
35-
d.V6L.Y = V1R.Z;
36-
d.V7L.Y = V1R.W;
37-
38-
d.V0L.Z = V2L.X;
39-
d.V1L.Z = V2L.Y;
40-
d.V2L.Z = V2L.Z;
41-
d.V3L.Z = V2L.W;
42-
d.V4L.Z = V2R.X;
43-
d.V5L.Z = V2R.Y;
44-
d.V6L.Z = V2R.Z;
45-
d.V7L.Z = V2R.W;
46-
47-
d.V0L.W = V3L.X;
48-
d.V1L.W = V3L.Y;
49-
d.V2L.W = V3L.Z;
50-
d.V3L.W = V3L.W;
51-
d.V4L.W = V3R.X;
52-
d.V5L.W = V3R.Y;
53-
d.V6L.W = V3R.Z;
54-
d.V7L.W = V3R.W;
55-
56-
d.V0R.X = V4L.X;
57-
d.V1R.X = V4L.Y;
58-
d.V2R.X = V4L.Z;
59-
d.V3R.X = V4L.W;
60-
d.V4R.X = V4R.X;
61-
d.V5R.X = V4R.Y;
62-
d.V6R.X = V4R.Z;
63-
d.V7R.X = V4R.W;
64-
65-
d.V0R.Y = V5L.X;
66-
d.V1R.Y = V5L.Y;
67-
d.V2R.Y = V5L.Z;
68-
d.V3R.Y = V5L.W;
69-
d.V4R.Y = V5R.X;
70-
d.V5R.Y = V5R.Y;
71-
d.V6R.Y = V5R.Z;
72-
d.V7R.Y = V5R.W;
73-
74-
d.V0R.Z = V6L.X;
75-
d.V1R.Z = V6L.Y;
76-
d.V2R.Z = V6L.Z;
77-
d.V3R.Z = V6L.W;
78-
d.V4R.Z = V6R.X;
79-
d.V5R.Z = V6R.Y;
80-
d.V6R.Z = V6R.Z;
81-
d.V7R.Z = V6R.W;
82-
83-
d.V0R.W = V7L.X;
84-
d.V1R.W = V7L.Y;
85-
d.V2R.W = V7L.Z;
86-
d.V3R.W = V7L.W;
87-
d.V4R.W = V7R.X;
88-
d.V5R.W = V7R.Y;
89-
d.V6R.W = V7R.Z;
90-
d.V7R.W = V7R.W;
91-
}
92-
9313
/// <summary>
9414
/// Level shift by +maximum/2, clip to [0, maximum]
9515
/// </summary>

src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -23,38 +23,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
2323
{
2424
internal partial struct Block8x8F
2525
{
26-
/// <summary>
27-
/// Fallback method to transpose a block into the destination block on non AVX supported CPUs.
28-
/// </summary>
29-
/// <param name="d">The destination block</param>
30-
[MethodImpl(InliningOptions.ShortMethod)]
31-
public void TransposeIntoFallback(ref Block8x8F d)
32-
{
33-
<#
34-
PushIndent(" ");
35-
36-
for (int i = 0; i < 8; i++)
37-
{
38-
char destCoord = coordz[i % 4];
39-
char destSide = (i / 4) % 2 == 0 ? 'L' : 'R';
40-
41-
for (int j = 0; j < 8; j++)
42-
{
43-
if(i > 0 && j == 0){
44-
WriteLine("");
45-
}
46-
47-
char srcCoord = coordz[j % 4];
48-
char srcSide = (j / 4) % 2 == 0 ? 'L' : 'R';
49-
50-
var expression = $"d.V{j}{destSide}.{destCoord} = V{i}{srcSide}.{srcCoord};\r\n";
51-
Write(expression);
52-
}
53-
}
54-
PopIndent();
55-
#>
56-
}
57-
5826
/// <summary>
5927
/// Level shift by +maximum/2, clip to [0, maximum]
6028
/// </summary>

src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

Lines changed: 135 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -611,87 +611,146 @@ public void TransposeInto(ref Block8x8F d)
611611
#if SUPPORTS_RUNTIME_INTRINSICS
612612
if (Avx.IsSupported)
613613
{
614-
this.TransposeIntoAvx(ref d);
614+
// https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536
615+
Vector256<float> r0 = Avx.InsertVector128(
616+
Unsafe.As<Vector4, Vector128<float>>(ref this.V0L).ToVector256(),
617+
Unsafe.As<Vector4, Vector128<float>>(ref this.V4L),
618+
1);
619+
620+
Vector256<float> r1 = Avx.InsertVector128(
621+
Unsafe.As<Vector4, Vector128<float>>(ref this.V1L).ToVector256(),
622+
Unsafe.As<Vector4, Vector128<float>>(ref this.V5L),
623+
1);
624+
625+
Vector256<float> r2 = Avx.InsertVector128(
626+
Unsafe.As<Vector4, Vector128<float>>(ref this.V2L).ToVector256(),
627+
Unsafe.As<Vector4, Vector128<float>>(ref this.V6L),
628+
1);
629+
630+
Vector256<float> r3 = Avx.InsertVector128(
631+
Unsafe.As<Vector4, Vector128<float>>(ref this.V3L).ToVector256(),
632+
Unsafe.As<Vector4, Vector128<float>>(ref this.V7L),
633+
1);
634+
635+
Vector256<float> r4 = Avx.InsertVector128(
636+
Unsafe.As<Vector4, Vector128<float>>(ref this.V0R).ToVector256(),
637+
Unsafe.As<Vector4, Vector128<float>>(ref this.V4R),
638+
1);
639+
640+
Vector256<float> r5 = Avx.InsertVector128(
641+
Unsafe.As<Vector4, Vector128<float>>(ref this.V1R).ToVector256(),
642+
Unsafe.As<Vector4, Vector128<float>>(ref this.V5R),
643+
1);
644+
645+
Vector256<float> r6 = Avx.InsertVector128(
646+
Unsafe.As<Vector4, Vector128<float>>(ref this.V2R).ToVector256(),
647+
Unsafe.As<Vector4, Vector128<float>>(ref this.V6R),
648+
1);
649+
650+
Vector256<float> r7 = Avx.InsertVector128(
651+
Unsafe.As<Vector4, Vector128<float>>(ref this.V3R).ToVector256(),
652+
Unsafe.As<Vector4, Vector128<float>>(ref this.V7R),
653+
1);
654+
655+
Vector256<float> t0 = Avx.UnpackLow(r0, r1);
656+
Vector256<float> t2 = Avx.UnpackLow(r2, r3);
657+
Vector256<float> v = Avx.Shuffle(t0, t2, 0x4E);
658+
Unsafe.As<Vector4, Vector256<float>>(ref d.V0L) = Avx.Blend(t0, v, 0xCC);
659+
Unsafe.As<Vector4, Vector256<float>>(ref d.V1L) = Avx.Blend(t2, v, 0x33);
660+
661+
Vector256<float> t4 = Avx.UnpackLow(r4, r5);
662+
Vector256<float> t6 = Avx.UnpackLow(r6, r7);
663+
v = Avx.Shuffle(t4, t6, 0x4E);
664+
Unsafe.As<Vector4, Vector256<float>>(ref d.V4L) = Avx.Blend(t4, v, 0xCC);
665+
Unsafe.As<Vector4, Vector256<float>>(ref d.V5L) = Avx.Blend(t6, v, 0x33);
666+
667+
Vector256<float> t1 = Avx.UnpackHigh(r0, r1);
668+
Vector256<float> t3 = Avx.UnpackHigh(r2, r3);
669+
v = Avx.Shuffle(t1, t3, 0x4E);
670+
Unsafe.As<Vector4, Vector256<float>>(ref d.V2L) = Avx.Blend(t1, v, 0xCC);
671+
Unsafe.As<Vector4, Vector256<float>>(ref d.V3L) = Avx.Blend(t3, v, 0x33);
672+
673+
Vector256<float> t5 = Avx.UnpackHigh(r4, r5);
674+
Vector256<float> t7 = Avx.UnpackHigh(r6, r7);
675+
v = Avx.Shuffle(t5, t7, 0x4E);
676+
Unsafe.As<Vector4, Vector256<float>>(ref d.V6L) = Avx.Blend(t5, v, 0xCC);
677+
Unsafe.As<Vector4, Vector256<float>>(ref d.V7L) = Avx.Blend(t7, v, 0x33);
615678
}
616679
else
617680
#endif
618681
{
619-
this.TransposeIntoFallback(ref d);
682+
d.V0L.X = this.V0L.X;
683+
d.V1L.X = this.V0L.Y;
684+
d.V2L.X = this.V0L.Z;
685+
d.V3L.X = this.V0L.W;
686+
d.V4L.X = this.V0R.X;
687+
d.V5L.X = this.V0R.Y;
688+
d.V6L.X = this.V0R.Z;
689+
d.V7L.X = this.V0R.W;
690+
691+
d.V0L.Y = this.V1L.X;
692+
d.V1L.Y = this.V1L.Y;
693+
d.V2L.Y = this.V1L.Z;
694+
d.V3L.Y = this.V1L.W;
695+
d.V4L.Y = this.V1R.X;
696+
d.V5L.Y = this.V1R.Y;
697+
d.V6L.Y = this.V1R.Z;
698+
d.V7L.Y = this.V1R.W;
699+
700+
d.V0L.Z = this.V2L.X;
701+
d.V1L.Z = this.V2L.Y;
702+
d.V2L.Z = this.V2L.Z;
703+
d.V3L.Z = this.V2L.W;
704+
d.V4L.Z = this.V2R.X;
705+
d.V5L.Z = this.V2R.Y;
706+
d.V6L.Z = this.V2R.Z;
707+
d.V7L.Z = this.V2R.W;
708+
709+
d.V0L.W = this.V3L.X;
710+
d.V1L.W = this.V3L.Y;
711+
d.V2L.W = this.V3L.Z;
712+
d.V3L.W = this.V3L.W;
713+
d.V4L.W = this.V3R.X;
714+
d.V5L.W = this.V3R.Y;
715+
d.V6L.W = this.V3R.Z;
716+
d.V7L.W = this.V3R.W;
717+
718+
d.V0R.X = this.V4L.X;
719+
d.V1R.X = this.V4L.Y;
720+
d.V2R.X = this.V4L.Z;
721+
d.V3R.X = this.V4L.W;
722+
d.V4R.X = this.V4R.X;
723+
d.V5R.X = this.V4R.Y;
724+
d.V6R.X = this.V4R.Z;
725+
d.V7R.X = this.V4R.W;
726+
727+
d.V0R.Y = this.V5L.X;
728+
d.V1R.Y = this.V5L.Y;
729+
d.V2R.Y = this.V5L.Z;
730+
d.V3R.Y = this.V5L.W;
731+
d.V4R.Y = this.V5R.X;
732+
d.V5R.Y = this.V5R.Y;
733+
d.V6R.Y = this.V5R.Z;
734+
d.V7R.Y = this.V5R.W;
735+
736+
d.V0R.Z = this.V6L.X;
737+
d.V1R.Z = this.V6L.Y;
738+
d.V2R.Z = this.V6L.Z;
739+
d.V3R.Z = this.V6L.W;
740+
d.V4R.Z = this.V6R.X;
741+
d.V5R.Z = this.V6R.Y;
742+
d.V6R.Z = this.V6R.Z;
743+
d.V7R.Z = this.V6R.W;
744+
745+
d.V0R.W = this.V7L.X;
746+
d.V1R.W = this.V7L.Y;
747+
d.V2R.W = this.V7L.Z;
748+
d.V3R.W = this.V7L.W;
749+
d.V4R.W = this.V7R.X;
750+
d.V5R.W = this.V7R.Y;
751+
d.V6R.W = this.V7R.Z;
752+
d.V7R.W = this.V7R.W;
620753
}
621754
}
622-
623-
#if SUPPORTS_RUNTIME_INTRINSICS
624-
/// <summary>
625-
/// AVX-only variant for executing <see cref="TransposeInto(ref Block8x8F)"/>.
626-
/// <see href="https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536"/>
627-
/// </summary>
628-
[MethodImpl(InliningOptions.ShortMethod)]
629-
public void TransposeIntoAvx(ref Block8x8F d)
630-
{
631-
Vector256<float> r0 = Avx.InsertVector128(
632-
Unsafe.As<Vector4, Vector128<float>>(ref this.V0L).ToVector256(),
633-
Unsafe.As<Vector4, Vector128<float>>(ref this.V4L),
634-
1);
635-
636-
Vector256<float> r1 = Avx.InsertVector128(
637-
Unsafe.As<Vector4, Vector128<float>>(ref this.V1L).ToVector256(),
638-
Unsafe.As<Vector4, Vector128<float>>(ref this.V5L),
639-
1);
640-
641-
Vector256<float> r2 = Avx.InsertVector128(
642-
Unsafe.As<Vector4, Vector128<float>>(ref this.V2L).ToVector256(),
643-
Unsafe.As<Vector4, Vector128<float>>(ref this.V6L),
644-
1);
645-
646-
Vector256<float> r3 = Avx.InsertVector128(
647-
Unsafe.As<Vector4, Vector128<float>>(ref this.V3L).ToVector256(),
648-
Unsafe.As<Vector4, Vector128<float>>(ref this.V7L),
649-
1);
650-
651-
Vector256<float> r4 = Avx.InsertVector128(
652-
Unsafe.As<Vector4, Vector128<float>>(ref this.V0R).ToVector256(),
653-
Unsafe.As<Vector4, Vector128<float>>(ref this.V4R),
654-
1);
655-
656-
Vector256<float> r5 = Avx.InsertVector128(
657-
Unsafe.As<Vector4, Vector128<float>>(ref this.V1R).ToVector256(),
658-
Unsafe.As<Vector4, Vector128<float>>(ref this.V5R),
659-
1);
660-
661-
Vector256<float> r6 = Avx.InsertVector128(
662-
Unsafe.As<Vector4, Vector128<float>>(ref this.V2R).ToVector256(),
663-
Unsafe.As<Vector4, Vector128<float>>(ref this.V6R),
664-
1);
665-
666-
Vector256<float> r7 = Avx.InsertVector128(
667-
Unsafe.As<Vector4, Vector128<float>>(ref this.V3R).ToVector256(),
668-
Unsafe.As<Vector4, Vector128<float>>(ref this.V7R),
669-
1);
670-
671-
Vector256<float> t0 = Avx.UnpackLow(r0, r1);
672-
Vector256<float> t2 = Avx.UnpackLow(r2, r3);
673-
Vector256<float> v = Avx.Shuffle(t0, t2, 0x4E);
674-
Unsafe.As<Vector4, Vector256<float>>(ref d.V0L) = Avx.Blend(t0, v, 0xCC);
675-
Unsafe.As<Vector4, Vector256<float>>(ref d.V1L) = Avx.Blend(t2, v, 0x33);
676-
677-
Vector256<float> t4 = Avx.UnpackLow(r4, r5);
678-
Vector256<float> t6 = Avx.UnpackLow(r6, r7);
679-
v = Avx.Shuffle(t4, t6, 0x4E);
680-
Unsafe.As<Vector4, Vector256<float>>(ref d.V4L) = Avx.Blend(t4, v, 0xCC);
681-
Unsafe.As<Vector4, Vector256<float>>(ref d.V5L) = Avx.Blend(t6, v, 0x33);
682-
683-
Vector256<float> t1 = Avx.UnpackHigh(r0, r1);
684-
Vector256<float> t3 = Avx.UnpackHigh(r2, r3);
685-
v = Avx.Shuffle(t1, t3, 0x4E);
686-
Unsafe.As<Vector4, Vector256<float>>(ref d.V2L) = Avx.Blend(t1, v, 0xCC);
687-
Unsafe.As<Vector4, Vector256<float>>(ref d.V3L) = Avx.Blend(t3, v, 0x33);
688-
689-
Vector256<float> t5 = Avx.UnpackHigh(r4, r5);
690-
Vector256<float> t7 = Avx.UnpackHigh(r6, r7);
691-
v = Avx.Shuffle(t5, t7, 0x4E);
692-
Unsafe.As<Vector4, Vector256<float>>(ref d.V6L) = Avx.Blend(t5, v, 0xCC);
693-
Unsafe.As<Vector4, Vector256<float>>(ref d.V7L) = Avx.Blend(t7, v, 0x33);
694-
}
695-
#endif
696755
}
697756
}

tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,17 @@
66

77
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
88
{
9+
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
910
public class Block8x8F_Transpose
1011
{
1112
private static readonly Block8x8F Source = Create8x8FloatData();
1213

13-
[Benchmark(Baseline=true)]
14-
public void TransposeIntoVector4()
15-
{
16-
var dest = default(Block8x8F);
17-
Source.TransposeIntoFallback(ref dest);
18-
}
19-
20-
#if SUPPORTS_RUNTIME_INTRINSICS
2114
[Benchmark]
22-
public void TransposeIntoAvx()
15+
public void TransposeInto()
2316
{
2417
var dest = default(Block8x8F);
25-
Source.TransposeIntoAvx(ref dest);
18+
Source.TransposeInto(ref dest);
2619
}
27-
#endif
2820

2921
private static Block8x8F Create8x8FloatData()
3022
{

0 commit comments

Comments
 (0)