Skip to content

Commit f27c678

Browse files
Merge pull request #1390 from SixLabors/js/block8x8f-optimizations
Optimize Block8x8F low hanging fruit and fix naming
2 parents fddb32b + b136b71 commit f27c678

File tree

10 files changed

+332
-135
lines changed

10 files changed

+332
-135
lines changed

src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ internal partial struct Block8x8F
1313
/// <summary>
1414
/// Level shift by +maximum/2, clip to [0, maximum]
1515
/// </summary>
16-
public void NormalizeColorsInplace(float maximum)
16+
public void NormalizeColorsInPlace(float maximum)
1717
{
1818
var CMin4 = new Vector4(0F);
1919
var CMax4 = new Vector4(maximum);
@@ -38,10 +38,10 @@ public void NormalizeColorsInplace(float maximum)
3838
}
3939

4040
/// <summary>
41-
/// AVX2-only variant for executing <see cref="NormalizeColorsInplace"/> and <see cref="RoundInplace"/> in one step.
41+
/// AVX2-only variant for executing <see cref="NormalizeColorsInPlace"/> and <see cref="RoundInPlace"/> in one step.
4242
/// </summary>
4343
[MethodImpl(InliningOptions.ShortMethod)]
44-
public void NormalizeColorsAndRoundInplaceVector8(float maximum)
44+
public void NormalizeColorsAndRoundInPlaceVector8(float maximum)
4545
{
4646
var off = new Vector<float>(MathF.Ceiling(maximum / 2));
4747
var max = new Vector<float>(maximum);

src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
2626
/// <summary>
2727
/// Level shift by +maximum/2, clip to [0, maximum]
2828
/// </summary>
29-
public void NormalizeColorsInplace(float maximum)
29+
public void NormalizeColorsInPlace(float maximum)
3030
{
3131
var CMin4 = new Vector4(0F);
3232
var CMax4 = new Vector4(maximum);
@@ -49,10 +49,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
4949
}
5050

5151
/// <summary>
52-
/// AVX2-only variant for executing <see cref="NormalizeColorsInplace"/> and <see cref="RoundInplace"/> in one step.
52+
/// AVX2-only variant for executing <see cref="NormalizeColorsInPlace"/> and <see cref="RoundInPlace"/> in one step.
5353
/// </summary>
5454
[MethodImpl(InliningOptions.ShortMethod)]
55-
public void NormalizeColorsAndRoundInplaceVector8(float maximum)
55+
public void NormalizeColorsAndRoundInPlaceVector8(float maximum)
5656
{
5757
var off = new Vector<float>(MathF.Ceiling(maximum / 2));
5858
var max = new Vector<float>(maximum);

src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

Lines changed: 143 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -281,73 +281,156 @@ public float[] ToArray()
281281
/// </summary>
282282
/// <param name="value">The value to multiply by.</param>
283283
[MethodImpl(InliningOptions.ShortMethod)]
284-
public void MultiplyInplace(float value)
285-
{
286-
this.V0L *= value;
287-
this.V0R *= value;
288-
this.V1L *= value;
289-
this.V1R *= value;
290-
this.V2L *= value;
291-
this.V2R *= value;
292-
this.V3L *= value;
293-
this.V3R *= value;
294-
this.V4L *= value;
295-
this.V4R *= value;
296-
this.V5L *= value;
297-
this.V5R *= value;
298-
this.V6L *= value;
299-
this.V6R *= value;
300-
this.V7L *= value;
301-
this.V7R *= value;
284+
public void MultiplyInPlace(float value)
285+
{
286+
#if SUPPORTS_RUNTIME_INTRINSICS
287+
if (Avx.IsSupported)
288+
{
289+
var valueVec = Vector256.Create(value);
290+
Unsafe.As<Vector4, Vector256<float>>(ref this.V0L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V0L), valueVec);
291+
Unsafe.As<Vector4, Vector256<float>>(ref this.V1L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V1L), valueVec);
292+
Unsafe.As<Vector4, Vector256<float>>(ref this.V2L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V2L), valueVec);
293+
Unsafe.As<Vector4, Vector256<float>>(ref this.V3L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V3L), valueVec);
294+
Unsafe.As<Vector4, Vector256<float>>(ref this.V4L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V4L), valueVec);
295+
Unsafe.As<Vector4, Vector256<float>>(ref this.V5L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V5L), valueVec);
296+
Unsafe.As<Vector4, Vector256<float>>(ref this.V6L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V6L), valueVec);
297+
Unsafe.As<Vector4, Vector256<float>>(ref this.V7L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V7L), valueVec);
298+
}
299+
else
300+
#endif
301+
{
302+
var valueVec = new Vector4(value);
303+
this.V0L *= valueVec;
304+
this.V0R *= valueVec;
305+
this.V1L *= valueVec;
306+
this.V1R *= valueVec;
307+
this.V2L *= valueVec;
308+
this.V2R *= valueVec;
309+
this.V3L *= valueVec;
310+
this.V3R *= valueVec;
311+
this.V4L *= valueVec;
312+
this.V4R *= valueVec;
313+
this.V5L *= valueVec;
314+
this.V5R *= valueVec;
315+
this.V6L *= valueVec;
316+
this.V6R *= valueVec;
317+
this.V7L *= valueVec;
318+
this.V7R *= valueVec;
319+
}
302320
}
303321

304322
/// <summary>
305323
/// Multiply all elements of the block by the corresponding elements of 'other'.
306324
/// </summary>
307325
[MethodImpl(InliningOptions.ShortMethod)]
308-
public void MultiplyInplace(ref Block8x8F other)
309-
{
310-
this.V0L *= other.V0L;
311-
this.V0R *= other.V0R;
312-
this.V1L *= other.V1L;
313-
this.V1R *= other.V1R;
314-
this.V2L *= other.V2L;
315-
this.V2R *= other.V2R;
316-
this.V3L *= other.V3L;
317-
this.V3R *= other.V3R;
318-
this.V4L *= other.V4L;
319-
this.V4R *= other.V4R;
320-
this.V5L *= other.V5L;
321-
this.V5R *= other.V5R;
322-
this.V6L *= other.V6L;
323-
this.V6R *= other.V6R;
324-
this.V7L *= other.V7L;
325-
this.V7R *= other.V7R;
326+
public unsafe void MultiplyInPlace(ref Block8x8F other)
327+
{
328+
#if SUPPORTS_RUNTIME_INTRINSICS
329+
if (Avx.IsSupported)
330+
{
331+
Unsafe.As<Vector4, Vector256<float>>(ref this.V0L)
332+
= Avx.Multiply(
333+
Unsafe.As<Vector4, Vector256<float>>(ref this.V0L),
334+
Unsafe.As<Vector4, Vector256<float>>(ref other.V0L));
335+
336+
Unsafe.As<Vector4, Vector256<float>>(ref this.V1L)
337+
= Avx.Multiply(
338+
Unsafe.As<Vector4, Vector256<float>>(ref this.V1L),
339+
Unsafe.As<Vector4, Vector256<float>>(ref other.V1L));
340+
341+
Unsafe.As<Vector4, Vector256<float>>(ref this.V2L)
342+
= Avx.Multiply(
343+
Unsafe.As<Vector4, Vector256<float>>(ref this.V2L),
344+
Unsafe.As<Vector4, Vector256<float>>(ref other.V2L));
345+
346+
Unsafe.As<Vector4, Vector256<float>>(ref this.V3L)
347+
= Avx.Multiply(
348+
Unsafe.As<Vector4, Vector256<float>>(ref this.V3L),
349+
Unsafe.As<Vector4, Vector256<float>>(ref other.V3L));
350+
351+
Unsafe.As<Vector4, Vector256<float>>(ref this.V4L)
352+
= Avx.Multiply(
353+
Unsafe.As<Vector4, Vector256<float>>(ref this.V4L),
354+
Unsafe.As<Vector4, Vector256<float>>(ref other.V4L));
355+
356+
Unsafe.As<Vector4, Vector256<float>>(ref this.V5L)
357+
= Avx.Multiply(
358+
Unsafe.As<Vector4, Vector256<float>>(ref this.V5L),
359+
Unsafe.As<Vector4, Vector256<float>>(ref other.V5L));
360+
361+
Unsafe.As<Vector4, Vector256<float>>(ref this.V6L)
362+
= Avx.Multiply(
363+
Unsafe.As<Vector4, Vector256<float>>(ref this.V6L),
364+
Unsafe.As<Vector4, Vector256<float>>(ref other.V6L));
365+
366+
Unsafe.As<Vector4, Vector256<float>>(ref this.V7L)
367+
= Avx.Multiply(
368+
Unsafe.As<Vector4, Vector256<float>>(ref this.V7L),
369+
Unsafe.As<Vector4, Vector256<float>>(ref other.V7L));
370+
}
371+
else
372+
#endif
373+
{
374+
this.V0L *= other.V0L;
375+
this.V0R *= other.V0R;
376+
this.V1L *= other.V1L;
377+
this.V1R *= other.V1R;
378+
this.V2L *= other.V2L;
379+
this.V2R *= other.V2R;
380+
this.V3L *= other.V3L;
381+
this.V3R *= other.V3R;
382+
this.V4L *= other.V4L;
383+
this.V4R *= other.V4R;
384+
this.V5L *= other.V5L;
385+
this.V5R *= other.V5R;
386+
this.V6L *= other.V6L;
387+
this.V6R *= other.V6R;
388+
this.V7L *= other.V7L;
389+
this.V7R *= other.V7R;
390+
}
326391
}
327392

328393
/// <summary>
329394
/// Adds a vector to all elements of the block.
330395
/// </summary>
331-
/// <param name="diff">The added vector</param>
396+
/// <param name="value">The added vector.</param>
332397
[MethodImpl(InliningOptions.ShortMethod)]
333-
public void AddToAllInplace(Vector4 diff)
334-
{
335-
this.V0L += diff;
336-
this.V0R += diff;
337-
this.V1L += diff;
338-
this.V1R += diff;
339-
this.V2L += diff;
340-
this.V2R += diff;
341-
this.V3L += diff;
342-
this.V3R += diff;
343-
this.V4L += diff;
344-
this.V4R += diff;
345-
this.V5L += diff;
346-
this.V5R += diff;
347-
this.V6L += diff;
348-
this.V6R += diff;
349-
this.V7L += diff;
350-
this.V7R += diff;
398+
public void AddInPlace(float value)
399+
{
400+
#if SUPPORTS_RUNTIME_INTRINSICS
401+
if (Avx.IsSupported)
402+
{
403+
var valueVec = Vector256.Create(value);
404+
Unsafe.As<Vector4, Vector256<float>>(ref this.V0L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V0L), valueVec);
405+
Unsafe.As<Vector4, Vector256<float>>(ref this.V1L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V1L), valueVec);
406+
Unsafe.As<Vector4, Vector256<float>>(ref this.V2L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V2L), valueVec);
407+
Unsafe.As<Vector4, Vector256<float>>(ref this.V3L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V3L), valueVec);
408+
Unsafe.As<Vector4, Vector256<float>>(ref this.V4L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V4L), valueVec);
409+
Unsafe.As<Vector4, Vector256<float>>(ref this.V5L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V5L), valueVec);
410+
Unsafe.As<Vector4, Vector256<float>>(ref this.V6L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V6L), valueVec);
411+
Unsafe.As<Vector4, Vector256<float>>(ref this.V7L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V7L), valueVec);
412+
}
413+
else
414+
#endif
415+
{
416+
var valueVec = new Vector4(value);
417+
this.V0L += valueVec;
418+
this.V0R += valueVec;
419+
this.V1L += valueVec;
420+
this.V1R += valueVec;
421+
this.V2L += valueVec;
422+
this.V2R += valueVec;
423+
this.V3L += valueVec;
424+
this.V3R += valueVec;
425+
this.V4L += valueVec;
426+
this.V4R += valueVec;
427+
this.V5L += valueVec;
428+
this.V5R += valueVec;
429+
this.V6L += valueVec;
430+
this.V6R += valueVec;
431+
this.V7L += valueVec;
432+
this.V7R += valueVec;
433+
}
351434
}
352435

353436
/// <summary>
@@ -468,23 +551,23 @@ public Block8x8 RoundAsInt16Block()
468551
/// <summary>
469552
/// Level shift by +maximum/2, clip to [0..maximum], and round all the values in the block.
470553
/// </summary>
471-
public void NormalizeColorsAndRoundInplace(float maximum)
554+
public void NormalizeColorsAndRoundInPlace(float maximum)
472555
{
473556
if (SimdUtils.HasVector8)
474557
{
475-
this.NormalizeColorsAndRoundInplaceVector8(maximum);
558+
this.NormalizeColorsAndRoundInPlaceVector8(maximum);
476559
}
477560
else
478561
{
479-
this.NormalizeColorsInplace(maximum);
480-
this.RoundInplace();
562+
this.NormalizeColorsInPlace(maximum);
563+
this.RoundInPlace();
481564
}
482565
}
483566

484567
/// <summary>
485568
/// Rounds all values in the block.
486569
/// </summary>
487-
public void RoundInplace()
570+
public void RoundInPlace()
488571
{
489572
for (int i = 0; i < Size; i++)
490573
{

src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,14 @@ public void ProcessBlockColorsInto(
8181
b.LoadFrom(ref sourceBlock);
8282

8383
// Dequantize:
84-
b.MultiplyInplace(ref this.DequantiazationTable);
84+
b.MultiplyInPlace(ref this.DequantiazationTable);
8585

8686
FastFloatingPointDCT.TransformIDCT(ref b, ref this.WorkspaceBlock1, ref this.WorkspaceBlock2);
8787

8888
// To conform better to libjpeg we actually NEED TO loose precision here.
8989
// This is because they store blocks as Int16 between all the operations.
9090
// To be "more accurate", we need to emulate this by rounding!
91-
this.WorkspaceBlock1.NormalizeColorsAndRoundInplace(maximumValue);
91+
this.WorkspaceBlock1.NormalizeColorsAndRoundInPlace(maximumValue);
9292

9393
this.WorkspaceBlock1.ScaledCopyTo(
9494
ref destAreaOrigin,

src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Bloc
6161
IDCT8x4_RightPart(ref temp, ref dest);
6262

6363
// TODO: What if we leave the blocks in a scaled-by-x8 state until final color packing?
64-
dest.MultiplyInplace(C_0_125);
64+
dest.MultiplyInPlace(C_0_125);
6565
}
6666

6767
/// <summary>
@@ -324,7 +324,7 @@ public static void TransformFDCT(
324324
src.TransposeInto(ref temp);
325325
if (offsetSourceByNeg128)
326326
{
327-
temp.AddToAllInplace(new Vector4(-128));
327+
temp.AddInPlace(-128F);
328328
}
329329

330330
FDCT8x4_LeftPart(ref temp, ref dest);
@@ -335,7 +335,7 @@ public static void TransformFDCT(
335335
FDCT8x4_LeftPart(ref temp, ref dest);
336336
FDCT8x4_RightPart(ref temp, ref dest);
337337

338-
dest.MultiplyInplace(C_0_125);
338+
dest.MultiplyInPlace(C_0_125);
339339
}
340340
}
341341
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// Copyright (c) Six Labors.
2+
// Licensed under the Apache License, Version 2.0.
3+
4+
using BenchmarkDotNet.Attributes;
5+
using SixLabors.ImageSharp.Formats.Jpeg.Components;
6+
7+
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
8+
{
9+
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
10+
public class Block8x8F_AddInPlace
11+
{
12+
[Benchmark]
13+
public float AddInplace()
14+
{
15+
float f = 42F;
16+
Block8x8F b = default;
17+
b.AddInPlace(f);
18+
return f;
19+
}
20+
}
21+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// Copyright (c) Six Labors.
2+
// Licensed under the Apache License, Version 2.0.
3+
4+
using BenchmarkDotNet.Attributes;
5+
using SixLabors.ImageSharp.Formats.Jpeg.Components;
6+
7+
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
8+
{
9+
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
10+
public class Block8x8F_MultiplyInPlaceBlock
11+
{
12+
private static readonly Block8x8F Source = Create8x8FloatData();
13+
14+
[Benchmark]
15+
public void MultiplyInPlaceBlock()
16+
{
17+
Block8x8F dest = default;
18+
Source.MultiplyInPlace(ref dest);
19+
}
20+
21+
private static Block8x8F Create8x8FloatData()
22+
{
23+
var result = new float[64];
24+
for (int i = 0; i < 8; i++)
25+
{
26+
for (int j = 0; j < 8; j++)
27+
{
28+
result[(i * 8) + j] = (i * 10) + j;
29+
}
30+
}
31+
32+
var source = default(Block8x8F);
33+
source.LoadFrom(result);
34+
return source;
35+
}
36+
}
37+
}

0 commit comments

Comments
 (0)