Skip to content

Commit 345e7c6

Browse files
committed
Move color space transform methods into own class
1 parent f585870 commit 345e7c6

File tree

2 files changed

+270
-260
lines changed

2 files changed

+270
-260
lines changed
Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
// Copyright (c) Six Labors.
2+
// Licensed under the Apache License, Version 2.0.
3+
4+
using System;
5+
using System.Runtime.CompilerServices;
6+
using System.Runtime.InteropServices;
7+
#if SUPPORTS_RUNTIME_INTRINSICS
8+
using System.Runtime.Intrinsics;
9+
using System.Runtime.Intrinsics.X86;
10+
#endif
11+
12+
namespace SixLabors.ImageSharp.Formats.Webp.Lossless
13+
{
14+
internal static class ColorSpaceTransformUtils
15+
{
16+
#if SUPPORTS_RUNTIME_INTRINSICS
17+
private static readonly Vector128<byte> CollectColorRedTransformsGreenMask = Vector128.Create(0x00ff00).AsByte();
18+
19+
private static readonly Vector128<byte> CollectColorRedTransformsAndMask = Vector128.Create((short)0xff).AsByte();
20+
21+
private static readonly Vector256<byte> CollectColorRedTransformsGreenMask256 = Vector256.Create(0x00ff00).AsByte();
22+
23+
private static readonly Vector256<byte> CollectColorRedTransformsAndMask256 = Vector256.Create((short)0xff).AsByte();
24+
25+
private static readonly Vector128<byte> CollectColorBlueTransformsGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
26+
27+
private static readonly Vector128<byte> CollectColorBlueTransformsGreenBlueMask = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
28+
29+
private static readonly Vector128<byte> CollectColorBlueTransformsBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
30+
31+
private static readonly Vector128<byte> CollectColorBlueTransformsShuffleLowMask = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255);
32+
33+
private static readonly Vector128<byte> CollectColorBlueTransformsShuffleHighMask = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14);
34+
35+
private static readonly Vector256<byte> CollectColorBlueTransformsShuffleLowMask256 = Vector256.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 18, 255, 22, 255, 26, 255, 30, 255, 255, 255, 255, 255, 255, 255, 255);
36+
37+
private static readonly Vector256<byte> CollectColorBlueTransformsShuffleHighMask256 = Vector256.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 18, 255, 22, 255, 26, 255, 30);
38+
39+
private static readonly Vector256<byte> CollectColorBlueTransformsGreenBlueMask256 = Vector256.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
40+
41+
private static readonly Vector256<byte> CollectColorBlueTransformsBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
42+
43+
private static readonly Vector256<byte> CollectColorBlueTransformsGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
44+
#endif
45+
46+
public static void CollectColorBlueTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span<int> histo)
47+
{
48+
#if SUPPORTS_RUNTIME_INTRINSICS
49+
if (Avx2.IsSupported && tileWidth >= 16)
50+
{
51+
const int span = 16;
52+
Span<ushort> values = stackalloc ushort[span];
53+
var multsr = Vector256.Create(LosslessUtils.Cst5b(redToBlue));
54+
var multsg = Vector256.Create(LosslessUtils.Cst5b(greenToBlue));
55+
for (int y = 0; y < tileHeight; y++)
56+
{
57+
Span<uint> srcSpan = bgra.Slice(y * stride);
58+
ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
59+
for (int x = 0; x + span <= tileWidth; x += span)
60+
{
61+
int input0Idx = x;
62+
int input1Idx = x + (span / 2);
63+
Vector256<byte> input0 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
64+
Vector256<byte> input1 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
65+
Vector256<byte> r0 = Avx2.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask256);
66+
Vector256<byte> r1 = Avx2.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask256);
67+
Vector256<byte> r = Avx2.Or(r0, r1);
68+
Vector256<byte> gb0 = Avx2.And(input0, CollectColorBlueTransformsGreenBlueMask256);
69+
Vector256<byte> gb1 = Avx2.And(input1, CollectColorBlueTransformsGreenBlueMask256);
70+
Vector256<ushort> gb = Avx2.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
71+
Vector256<byte> g = Avx2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask256);
72+
Vector256<short> a = Avx2.MultiplyHigh(r.AsInt16(), multsr);
73+
Vector256<short> b = Avx2.MultiplyHigh(g.AsInt16(), multsg);
74+
Vector256<byte> c = Avx2.Subtract(gb.AsByte(), b.AsByte());
75+
Vector256<byte> d = Avx2.Subtract(c, a.AsByte());
76+
Vector256<byte> e = Avx2.And(d, CollectColorBlueTransformsBlueMask256);
77+
78+
ref ushort outputRef = ref MemoryMarshal.GetReference(values);
79+
Unsafe.As<ushort, Vector256<ushort>>(ref outputRef) = e.AsUInt16();
80+
81+
for (int i = 0; i < span; i++)
82+
{
83+
++histo[values[i]];
84+
}
85+
}
86+
}
87+
88+
int leftOver = tileWidth & (span - 1);
89+
if (leftOver > 0)
90+
{
91+
CollectColorBlueTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToBlue, redToBlue, histo);
92+
}
93+
}
94+
else if (Sse41.IsSupported)
95+
{
96+
const int span = 8;
97+
Span<ushort> values = stackalloc ushort[span];
98+
var multsr = Vector128.Create(LosslessUtils.Cst5b(redToBlue));
99+
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToBlue));
100+
for (int y = 0; y < tileHeight; y++)
101+
{
102+
Span<uint> srcSpan = bgra.Slice(y * stride);
103+
ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
104+
for (int x = 0; x + span <= tileWidth; x += span)
105+
{
106+
int input0Idx = x;
107+
int input1Idx = x + (span / 2);
108+
Vector128<byte> input0 = Unsafe.As<uint, Vector128<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
109+
Vector128<byte> input1 = Unsafe.As<uint, Vector128<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
110+
Vector128<byte> r0 = Ssse3.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask);
111+
Vector128<byte> r1 = Ssse3.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask);
112+
Vector128<byte> r = Sse2.Or(r0, r1);
113+
Vector128<byte> gb0 = Sse2.And(input0, CollectColorBlueTransformsGreenBlueMask);
114+
Vector128<byte> gb1 = Sse2.And(input1, CollectColorBlueTransformsGreenBlueMask);
115+
Vector128<ushort> gb = Sse41.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
116+
Vector128<byte> g = Sse2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask);
117+
Vector128<short> a = Sse2.MultiplyHigh(r.AsInt16(), multsr);
118+
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg);
119+
Vector128<byte> c = Sse2.Subtract(gb.AsByte(), b.AsByte());
120+
Vector128<byte> d = Sse2.Subtract(c, a.AsByte());
121+
Vector128<byte> e = Sse2.And(d, CollectColorBlueTransformsBlueMask);
122+
123+
ref ushort outputRef = ref MemoryMarshal.GetReference(values);
124+
Unsafe.As<ushort, Vector128<ushort>>(ref outputRef) = e.AsUInt16();
125+
126+
for (int i = 0; i < span; i++)
127+
{
128+
++histo[values[i]];
129+
}
130+
}
131+
}
132+
133+
int leftOver = tileWidth & (span - 1);
134+
if (leftOver > 0)
135+
{
136+
CollectColorBlueTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToBlue, redToBlue, histo);
137+
}
138+
}
139+
else
140+
#endif
141+
{
142+
CollectColorBlueTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo);
143+
}
144+
}
145+
146+
private static void CollectColorBlueTransformsNoneVectorized(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span<int> histo)
147+
{
148+
int pos = 0;
149+
while (tileHeight-- > 0)
150+
{
151+
for (int x = 0; x < tileWidth; x++)
152+
{
153+
int idx = LosslessUtils.TransformColorBlue((sbyte)greenToBlue, (sbyte)redToBlue, bgra[pos + x]);
154+
++histo[idx];
155+
}
156+
157+
pos += stride;
158+
}
159+
}
160+
161+
public static void CollectColorRedTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span<int> histo)
162+
{
163+
#if SUPPORTS_RUNTIME_INTRINSICS
164+
if (Avx2.IsSupported && tileWidth >= 16)
165+
{
166+
var multsg = Vector256.Create(LosslessUtils.Cst5b(greenToRed));
167+
const int span = 16;
168+
Span<ushort> values = stackalloc ushort[span];
169+
for (int y = 0; y < tileHeight; y++)
170+
{
171+
Span<uint> srcSpan = bgra.Slice(y * stride);
172+
ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
173+
for (int x = 0; x + span <= tileWidth; x += span)
174+
{
175+
int input0Idx = x;
176+
int input1Idx = x + (span / 2);
177+
Vector256<byte> input0 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
178+
Vector256<byte> input1 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
179+
Vector256<byte> g0 = Avx2.And(input0, CollectColorRedTransformsGreenMask256); // 0 0 | g 0
180+
Vector256<byte> g1 = Avx2.And(input1, CollectColorRedTransformsGreenMask256);
181+
Vector256<ushort> g = Avx2.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
182+
Vector256<int> a0 = Avx2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
183+
Vector256<int> a1 = Avx2.ShiftRightLogical(input1.AsInt32(), 16);
184+
Vector256<ushort> a = Avx2.PackUnsignedSaturate(a0, a1); // x r
185+
Vector256<short> b = Avx2.MultiplyHigh(g.AsInt16(), multsg); // x dr
186+
Vector256<byte> c = Avx2.Subtract(a.AsByte(), b.AsByte()); // x r'
187+
Vector256<byte> d = Avx2.And(c, CollectColorRedTransformsAndMask256); // 0 r'
188+
189+
ref ushort outputRef = ref MemoryMarshal.GetReference(values);
190+
Unsafe.As<ushort, Vector256<ushort>>(ref outputRef) = d.AsUInt16();
191+
192+
for (int i = 0; i < span; i++)
193+
{
194+
++histo[values[i]];
195+
}
196+
}
197+
}
198+
199+
int leftOver = tileWidth & (span - 1);
200+
if (leftOver > 0)
201+
{
202+
CollectColorRedTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToRed, histo);
203+
}
204+
}
205+
else if (Sse41.IsSupported)
206+
{
207+
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed));
208+
const int span = 8;
209+
Span<ushort> values = stackalloc ushort[span];
210+
for (int y = 0; y < tileHeight; y++)
211+
{
212+
Span<uint> srcSpan = bgra.Slice(y * stride);
213+
ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
214+
for (int x = 0; x + span <= tileWidth; x += span)
215+
{
216+
int input0Idx = x;
217+
int input1Idx = x + (span / 2);
218+
Vector128<byte> input0 = Unsafe.As<uint, Vector128<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
219+
Vector128<byte> input1 = Unsafe.As<uint, Vector128<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
220+
Vector128<byte> g0 = Sse2.And(input0, CollectColorRedTransformsGreenMask); // 0 0 | g 0
221+
Vector128<byte> g1 = Sse2.And(input1, CollectColorRedTransformsGreenMask);
222+
Vector128<ushort> g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
223+
Vector128<int> a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
224+
Vector128<int> a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16);
225+
Vector128<ushort> a = Sse41.PackUnsignedSaturate(a0, a1); // x r
226+
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr
227+
Vector128<byte> c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r'
228+
Vector128<byte> d = Sse2.And(c, CollectColorRedTransformsAndMask); // 0 r'
229+
230+
ref ushort outputRef = ref MemoryMarshal.GetReference(values);
231+
Unsafe.As<ushort, Vector128<ushort>>(ref outputRef) = d.AsUInt16();
232+
233+
for (int i = 0; i < span; i++)
234+
{
235+
++histo[values[i]];
236+
}
237+
}
238+
}
239+
240+
int leftOver = tileWidth & (span - 1);
241+
if (leftOver > 0)
242+
{
243+
CollectColorRedTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToRed, histo);
244+
}
245+
}
246+
else
247+
#endif
248+
{
249+
CollectColorRedTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToRed, histo);
250+
}
251+
}
252+
253+
private static void CollectColorRedTransformsNoneVectorized(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span<int> histo)
254+
{
255+
int pos = 0;
256+
while (tileHeight-- > 0)
257+
{
258+
for (int x = 0; x < tileWidth; x++)
259+
{
260+
int idx = LosslessUtils.TransformColorRed((sbyte)greenToRed, bgra[pos + x]);
261+
++histo[idx];
262+
}
263+
264+
pos += stride;
265+
}
266+
}
267+
}
268+
}

0 commit comments

Comments
 (0)