Skip to content

Commit 42f015e

Browse files
Merge pull request #1404 from SixLabors/js/shuffle-intrinsics
Add 4 Channel Shuffle Intrinsics
2 parents 120080b + dabc237 commit 42f015e

20 files changed

+1077
-307
lines changed
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
// Copyright (c) Six Labors.
2+
// Licensed under the Apache License, Version 2.0.
3+
4+
using System;
5+
using System.Buffers.Binary;
6+
using System.Runtime.CompilerServices;
7+
using System.Runtime.InteropServices;
8+
9+
namespace SixLabors.ImageSharp
10+
{
11+
/// <summary>
12+
/// Defines the contract for methods that allow the shuffling of pixel components.
13+
/// Used for shuffling on platforms that do not support Hardware Intrinsics.
14+
/// </summary>
15+
internal interface IComponentShuffle
16+
{
17+
/// <summary>
18+
/// Gets the shuffle control.
19+
/// </summary>
20+
byte Control { get; }
21+
22+
/// <summary>
23+
/// Shuffle 8-bit integers within 128-bit lanes in <paramref name="source"/>
24+
/// using the control and store the results in <paramref name="dest"/>.
25+
/// </summary>
26+
/// <param name="source">The source span of bytes.</param>
27+
/// <param name="dest">The destination span of bytes.</param>
28+
void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest);
29+
}
30+
31+
internal readonly struct DefaultShuffle4 : IComponentShuffle
32+
{
33+
public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0)
34+
: this(SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0))
35+
{
36+
}
37+
38+
public DefaultShuffle4(byte control) => this.Control = control;
39+
40+
public byte Control { get; }
41+
42+
[MethodImpl(InliningOptions.ShortMethod)]
43+
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
44+
{
45+
ref byte sBase = ref MemoryMarshal.GetReference(source);
46+
ref byte dBase = ref MemoryMarshal.GetReference(dest);
47+
SimdUtils.Shuffle.InverseMmShuffle(
48+
this.Control,
49+
out int p3,
50+
out int p2,
51+
out int p1,
52+
out int p0);
53+
54+
for (int i = 0; i < source.Length; i += 4)
55+
{
56+
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i);
57+
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i);
58+
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i);
59+
Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i);
60+
}
61+
}
62+
}
63+
64+
internal readonly struct WXYZShuffle4 : IComponentShuffle
65+
{
66+
public byte Control => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3);
67+
68+
[MethodImpl(InliningOptions.ShortMethod)]
69+
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
70+
{
71+
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
72+
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
73+
ref uint sBase = ref MemoryMarshal.GetReference(s);
74+
ref uint dBase = ref MemoryMarshal.GetReference(d);
75+
76+
// The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
77+
// and ROTR (Rotate Right) emitting efficient CPU instructions:
78+
// https://github.com/dotnet/coreclr/pull/1830
79+
for (int i = 0; i < s.Length; i++)
80+
{
81+
uint packed = Unsafe.Add(ref sBase, i);
82+
83+
// packed = [W Z Y X]
84+
// ROTL(8, packed) = [Z Y X W]
85+
Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24);
86+
}
87+
}
88+
}
89+
90+
internal readonly struct WZYXShuffle4 : IComponentShuffle
91+
{
92+
public byte Control => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3);
93+
94+
[MethodImpl(InliningOptions.ShortMethod)]
95+
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
96+
{
97+
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
98+
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
99+
ref uint sBase = ref MemoryMarshal.GetReference(s);
100+
ref uint dBase = ref MemoryMarshal.GetReference(d);
101+
102+
for (int i = 0; i < s.Length; i++)
103+
{
104+
uint packed = Unsafe.Add(ref sBase, i);
105+
106+
// packed = [W Z Y X]
107+
// REVERSE(packedArgb) = [X Y Z W]
108+
Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed);
109+
}
110+
}
111+
}
112+
113+
internal readonly struct YZWXShuffle4 : IComponentShuffle
114+
{
115+
public byte Control => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1);
116+
117+
[MethodImpl(InliningOptions.ShortMethod)]
118+
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
119+
{
120+
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
121+
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
122+
ref uint sBase = ref MemoryMarshal.GetReference(s);
123+
ref uint dBase = ref MemoryMarshal.GetReference(d);
124+
125+
for (int i = 0; i < s.Length; i++)
126+
{
127+
uint packed = Unsafe.Add(ref sBase, i);
128+
129+
// packed = [W Z Y X]
130+
// ROTR(8, packedArgb) = [Y Z W X]
131+
Unsafe.Add(ref dBase, i) = (packed >> 8) | (packed << 24);
132+
}
133+
}
134+
}
135+
136+
internal readonly struct ZYXWShuffle4 : IComponentShuffle
137+
{
138+
public byte Control => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2);
139+
140+
[MethodImpl(InliningOptions.ShortMethod)]
141+
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
142+
{
143+
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
144+
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
145+
ref uint sBase = ref MemoryMarshal.GetReference(s);
146+
ref uint dBase = ref MemoryMarshal.GetReference(d);
147+
148+
for (int i = 0; i < s.Length; i++)
149+
{
150+
uint packed = Unsafe.Add(ref sBase, i);
151+
152+
// packed = [W Z Y X]
153+
// tmp1 = [W 0 Y 0]
154+
// tmp2 = [0 Z 0 X]
155+
// tmp3=ROTL(16, tmp2) = [0 X 0 Z]
156+
// tmp1 + tmp3 = [W X Y Z]
157+
uint tmp1 = packed & 0xFF00FF00;
158+
uint tmp2 = packed & 0x00FF00FF;
159+
uint tmp3 = (tmp2 << 16) | (tmp2 >> 16);
160+
161+
Unsafe.Add(ref dBase, i) = tmp1 + tmp3;
162+
}
163+
}
164+
}
165+
}

0 commit comments

Comments
 (0)