Skip to content

Commit b14a78c

Browse files
authored
Merge pull request #1882 from SixLabors/bp/collecthistogramavx
Add AVX2 version of CollectHistogram
2 parents 93e4faa + 5cc83fb commit b14a78c

File tree

2 files changed

+151
-4
lines changed

2 files changed

+151
-4
lines changed

src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@
33

44
using System;
55
using System.Runtime.CompilerServices;
6+
using System.Runtime.InteropServices;
7+
#if SUPPORTS_RUNTIME_INTRINSICS
8+
using System.Runtime.Intrinsics;
9+
using System.Runtime.Intrinsics.X86;
10+
#endif
611

712
namespace SixLabors.ImageSharp.Formats.Webp.Lossy
813
{
@@ -19,6 +24,10 @@ internal sealed class Vp8Histogram
1924
/// </summary>
2025
private const int MaxCoeffThresh = 31;
2126

27+
#if SUPPORTS_RUNTIME_INTRINSICS
28+
private static readonly Vector256<short> MaxCoeffThreshVec = Vector256.Create((short)MaxCoeffThresh);
29+
#endif
30+
2231
private int maxValue;
2332

2433
private int lastNonZero;
@@ -52,11 +61,38 @@ public void CollectHistogram(Span<byte> reference, Span<byte> pred, int startBlo
5261
Vp8Encoding.FTransform(reference.Slice(WebpLookupTables.Vp8DspScan[j]), pred.Slice(WebpLookupTables.Vp8DspScan[j]), this.output, this.scratch);
5362

5463
// Convert coefficients to bin.
55-
for (int k = 0; k < 16; ++k)
64+
#if SUPPORTS_RUNTIME_INTRINSICS
65+
if (Avx2.IsSupported)
5666
{
57-
int v = Math.Abs(this.output[k]) >> 3;
58-
int clippedValue = ClipMax(v, MaxCoeffThresh);
59-
++this.distribution[clippedValue];
67+
// Load.
68+
ref short outputRef = ref MemoryMarshal.GetReference<short>(this.output);
69+
Vector256<byte> out0 = Unsafe.As<short, Vector256<byte>>(ref outputRef);
70+
71+
// v = abs(out) >> 3
72+
Vector256<ushort> abs0 = Avx2.Abs(out0.AsInt16());
73+
Vector256<short> v0 = Avx2.ShiftRightArithmetic(abs0.AsInt16(), 3);
74+
75+
// bin = min(v, MAX_COEFF_THRESH)
76+
Vector256<short> min0 = Avx2.Min(v0, MaxCoeffThreshVec);
77+
78+
// Store.
79+
Unsafe.As<short, Vector256<short>>(ref outputRef) = min0;
80+
81+
// Convert coefficients to bin.
82+
for (int k = 0; k < 16; ++k)
83+
{
84+
++this.distribution[this.output[k]];
85+
}
86+
}
87+
else
88+
#endif
89+
{
90+
for (int k = 0; k < 16; ++k)
91+
{
92+
int v = Math.Abs(this.output[k]) >> 3;
93+
int clippedValue = ClipMax(v, MaxCoeffThresh);
94+
++this.distribution[clippedValue];
95+
}
6096
}
6197
}
6298

tests/ImageSharp.Tests/Formats/WebP/Vp8HistogramTests.cs

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
using System.Collections.Generic;
55
using SixLabors.ImageSharp.Formats.Webp.Lossy;
6+
using SixLabors.ImageSharp.Tests.TestUtilities;
67
using Xunit;
78

89
namespace SixLabors.ImageSharp.Tests.Formats.Webp
@@ -67,6 +68,108 @@ public static IEnumerable<object[]> Data
6768
}
6869
}
6970

71+
private static void RunCollectHistogramTest()
72+
{
73+
// arrange
74+
var histogram = new Vp8Histogram();
75+
76+
byte[] reference =
77+
{
78+
154, 154, 151, 151, 149, 148, 151, 157, 163, 163, 154, 132, 102, 98, 104, 108, 107, 104, 104, 103,
79+
101, 106, 123, 119, 170, 171, 172, 171, 168, 175, 171, 173, 151, 151, 149, 150, 147, 147, 146, 159,
80+
164, 165, 154, 129, 92, 90, 101, 105, 104, 103, 104, 101, 100, 105, 123, 117, 172, 172, 172, 168,
81+
170, 177, 170, 175, 151, 149, 150, 150, 147, 147, 156, 161, 161, 161, 151, 126, 93, 90, 102, 107,
82+
104, 103, 104, 101, 104, 104, 122, 117, 172, 172, 170, 168, 170, 177, 172, 175, 150, 149, 152, 151,
83+
148, 151, 160, 159, 157, 157, 148, 133, 96, 90, 103, 107, 104, 104, 101, 100, 102, 102, 121, 117,
84+
170, 170, 169, 171, 171, 179, 173, 175, 149, 151, 152, 151, 148, 154, 162, 157, 154, 154, 151, 132,
85+
92, 89, 101, 108, 104, 102, 101, 101, 103, 103, 123, 118, 171, 168, 177, 173, 171, 178, 172, 176,
86+
152, 152, 152, 151, 154, 162, 161, 155, 149, 157, 156, 129, 92, 87, 101, 107, 102, 100, 107, 100,
87+
101, 102, 123, 118, 170, 175, 182, 172, 171, 179, 173, 175, 152, 151, 154, 155, 160, 162, 161, 153,
88+
150, 156, 153, 129, 92, 91, 102, 106, 100, 109, 115, 99, 101, 102, 124, 120, 171, 179, 178, 172,
89+
171, 181, 171, 173, 154, 154, 154, 162, 160, 158, 156, 152, 153, 157, 151, 128, 86, 86, 102, 105,
90+
102, 122, 114, 99, 101, 102, 125, 120, 178, 173, 177, 172, 171, 180, 172, 173, 154, 152, 158, 163,
91+
150, 148, 148, 156, 151, 158, 152, 129, 87, 87, 101, 105, 204, 204, 204, 204, 204, 204, 204, 204,
92+
204, 204, 204, 204, 204, 204, 204, 204, 154, 151, 165, 156, 141, 137, 146, 158, 152, 159, 152, 133,
93+
90, 88, 99, 106, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204,
94+
154, 160, 164, 150, 126, 127, 149, 159, 155, 161, 153, 131, 84, 86, 97, 103, 204, 204, 204, 204,
95+
204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 157, 167, 157, 137, 102, 128, 155, 161,
96+
157, 159, 154, 134, 84, 82, 97, 102, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204,
97+
204, 204, 204, 204, 163, 163, 150, 113, 78, 132, 156, 162, 159, 160, 154, 132, 83, 78, 91, 97, 204,
98+
204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 163, 157, 137, 80, 78,
99+
131, 154, 163, 157, 159, 149, 131, 82, 77, 94, 100, 204, 204, 204, 204, 204, 204, 204, 204, 204,
100+
204, 204, 204, 204, 204, 204, 204, 159, 151, 108, 72, 88, 132, 156, 162, 159, 157, 151, 130, 79, 78,
101+
95, 102, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 151, 130,
102+
82, 82, 89, 134, 154, 161, 161, 157, 152, 129, 81, 77, 95, 102, 204, 204, 204, 204, 204, 204, 204,
103+
204, 204, 204, 204, 204, 204, 204, 204, 204
104+
};
105+
byte[] pred =
106+
{
107+
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129,
108+
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128,
109+
128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
110+
129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
111+
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128,
112+
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129,
113+
129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
114+
128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
115+
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129,
116+
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128,
117+
128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
118+
129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
119+
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128,
120+
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129,
121+
129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
122+
128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
123+
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129,
124+
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128,
125+
128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
126+
129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
127+
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128,
128+
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129,
129+
129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
130+
128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
131+
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129,
132+
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 127, 127, 127, 127, 127, 127, 127, 127,
133+
127, 127, 127, 127, 127, 127, 127, 127, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
134+
129, 129, 129, 129, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
135+
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 127, 127, 127, 127,
136+
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 129, 129, 129, 129, 129, 129, 129, 129,
137+
129, 129, 129, 129, 129, 129, 129, 129, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
138+
127, 127, 127, 127, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
139+
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 129, 129, 129, 129,
140+
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 127, 127, 127, 127, 127, 127, 127, 127,
141+
127, 127, 127, 127, 127, 127, 127, 127, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
142+
129, 129, 129, 129, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
143+
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 127, 127, 127, 127,
144+
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 129, 129, 129, 129, 129, 129, 129, 129,
145+
129, 129, 129, 129, 129, 129, 129, 129, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
146+
127, 127, 127, 127, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
147+
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 129, 129, 129, 129,
148+
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 127, 127, 127, 127, 127, 127, 127, 127,
149+
127, 127, 127, 127, 127, 127, 127, 127, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
150+
129, 129, 129, 129, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
151+
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 127, 127, 127, 127,
152+
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 129, 129, 129, 129, 129, 129, 129, 129,
153+
129, 129, 129, 129, 129, 129, 129, 129, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
154+
127, 127, 127, 127, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
155+
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 129, 129, 129, 129,
156+
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 127, 127, 127, 127, 127, 127, 127, 127,
157+
127, 127, 127, 127, 127, 127, 127, 127, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
158+
129, 129, 129, 129
159+
};
160+
int expectedAlpha = 146;
161+
162+
// act
163+
histogram.CollectHistogram(reference, pred, 0, 10);
164+
int actualAlpha = histogram.GetAlpha();
165+
166+
// assert
167+
Assert.Equal(expectedAlpha, actualAlpha);
168+
}
169+
170+
[Fact]
171+
public void RunCollectHistogramTest_Works() => RunCollectHistogramTest();
172+
70173
[Fact]
71174
public void GetAlpha_WithEmptyHistogram_Works()
72175
{
@@ -111,5 +214,13 @@ public void Merge_Works(byte[] reference, byte[] pred)
111214
// assert
112215
Assert.Equal(1054, alpha);
113216
}
217+
218+
#if SUPPORTS_RUNTIME_INTRINSICS
219+
[Fact]
220+
public void CollectHistogramTest_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCollectHistogramTest, HwIntrinsics.AllowAll);
221+
222+
[Fact]
223+
public void CollectHistogramTest_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCollectHistogramTest, HwIntrinsics.DisableHWIntrinsic);
224+
#endif
114225
}
115226
}

0 commit comments

Comments
 (0)