Skip to content

Commit f585870

Browse files
Merge branch 'master' into bp/ColorBlueTransformAvx
2 parents 55f04f6 + 69c30f8 commit f585870

File tree

4 files changed

+432
-56
lines changed

4 files changed

+432
-56
lines changed

src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs

Lines changed: 39 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -704,28 +704,7 @@ public static int TTransformSse41(Span<byte> inputA, Span<byte> inputB, Span<ush
704704
// a20 a21 a22 a23 b20 b21 b22 b23
705705
// a30 a31 a32 a33 b30 b31 b32 b33
706706
// Transpose the two 4x4.
707-
Vector128<short> transpose00 = Sse2.UnpackLow(b0, b1);
708-
Vector128<short> transpose01 = Sse2.UnpackLow(b2, b3);
709-
Vector128<short> transpose02 = Sse2.UnpackHigh(b0, b1);
710-
Vector128<short> transpose03 = Sse2.UnpackHigh(b2, b3);
711-
712-
// a00 a10 a01 a11 a02 a12 a03 a13
713-
// a20 a30 a21 a31 a22 a32 a23 a33
714-
// b00 b10 b01 b11 b02 b12 b03 b13
715-
// b20 b30 b21 b31 b22 b32 b23 b33
716-
Vector128<int> transpose10 = Sse2.UnpackLow(transpose00.AsInt32(), transpose01.AsInt32());
717-
Vector128<int> transpose11 = Sse2.UnpackLow(transpose02.AsInt32(), transpose03.AsInt32());
718-
Vector128<int> transpose12 = Sse2.UnpackHigh(transpose00.AsInt32(), transpose01.AsInt32());
719-
Vector128<int> transpose13 = Sse2.UnpackHigh(transpose02.AsInt32(), transpose03.AsInt32());
720-
721-
// a00 a10 a20 a30 a01 a11 a21 a31
722-
// b00 b10 b20 b30 b01 b11 b21 b31
723-
// a02 a12 a22 a32 a03 a13 a23 a33
724-
// b02 b12 a22 b32 b03 b13 b23 b33
725-
Vector128<long> output0 = Sse2.UnpackLow(transpose10.AsInt64(), transpose11.AsInt64());
726-
Vector128<long> output1 = Sse2.UnpackHigh(transpose10.AsInt64(), transpose11.AsInt64());
727-
Vector128<long> output2 = Sse2.UnpackLow(transpose12.AsInt64(), transpose13.AsInt64());
728-
Vector128<long> output3 = Sse2.UnpackHigh(transpose12.AsInt64(), transpose13.AsInt64());
707+
Vp8Transpose_2_4x4_16b(b0, b1, b2, b3, out Vector128<long> output0, out Vector128<long> output1, out Vector128<long> output2, out Vector128<long> output3);
729708

730709
// a00 a10 a20 a30 b00 b10 b20 b30
731710
// a01 a11 a21 a31 b01 b11 b21 b31
@@ -769,6 +748,44 @@ public static int TTransformSse41(Span<byte> inputA, Span<byte> inputB, Span<ush
769748

770749
return Numerics.ReduceSum(result);
771750
}
751+
752+
// Transpose two 4x4 16b matrices horizontally stored in registers.
753+
[MethodImpl(InliningOptions.ShortMethod)]
754+
public static void Vp8Transpose_2_4x4_16b(Vector128<short> b0, Vector128<short> b1, Vector128<short> b2, Vector128<short> b3, out Vector128<long> output0, out Vector128<long> output1, out Vector128<long> output2, out Vector128<long> output3)
755+
{
756+
// Transpose the two 4x4.
757+
// a00 a01 a02 a03 b00 b01 b02 b03
758+
// a10 a11 a12 a13 b10 b11 b12 b13
759+
// a20 a21 a22 a23 b20 b21 b22 b23
760+
// a30 a31 a32 a33 b30 b31 b32 b33
761+
Vector128<short> transpose00 = Sse2.UnpackLow(b0, b1);
762+
Vector128<short> transpose01 = Sse2.UnpackLow(b2, b3);
763+
Vector128<short> transpose02 = Sse2.UnpackHigh(b0, b1);
764+
Vector128<short> transpose03 = Sse2.UnpackHigh(b2, b3);
765+
766+
// a00 a10 a01 a11 a02 a12 a03 a13
767+
// a20 a30 a21 a31 a22 a32 a23 a33
768+
// b00 b10 b01 b11 b02 b12 b03 b13
769+
// b20 b30 b21 b31 b22 b32 b23 b33
770+
Vector128<int> transpose10 = Sse2.UnpackLow(transpose00.AsInt32(), transpose01.AsInt32());
771+
Vector128<int> transpose11 = Sse2.UnpackLow(transpose02.AsInt32(), transpose03.AsInt32());
772+
Vector128<int> transpose12 = Sse2.UnpackHigh(transpose00.AsInt32(), transpose01.AsInt32());
773+
Vector128<int> transpose13 = Sse2.UnpackHigh(transpose02.AsInt32(), transpose03.AsInt32());
774+
775+
// a00 a10 a20 a30 a01 a11 a21 a31
776+
// b00 b10 b20 b30 b01 b11 b21 b31
777+
// a02 a12 a22 a32 a03 a13 a23 a33
778+
// b02 b12 a22 b32 b03 b13 b23 b33
779+
output0 = Sse2.UnpackLow(transpose10.AsInt64(), transpose11.AsInt64());
780+
output1 = Sse2.UnpackHigh(transpose10.AsInt64(), transpose11.AsInt64());
781+
output2 = Sse2.UnpackLow(transpose12.AsInt64(), transpose13.AsInt64());
782+
output3 = Sse2.UnpackHigh(transpose12.AsInt64(), transpose13.AsInt64());
783+
784+
// a00 a10 a20 a30 b00 b10 b20 b30
785+
// a01 a11 a21 a31 b01 b11 b21 b31
786+
// a02 a12 a22 a32 b02 b12 b22 b32
787+
// a03 a13 a23 a33 b03 b13 b23 b33
788+
}
772789
#endif
773790

774791
public static void TransformTwo(Span<short> src, Span<byte> dst, Span<int> scratch)

src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ public static int ReconstructIntra16(Vp8EncIterator it, Vp8SegmentInfo dqm, Vp8M
329329
LossyUtils.TransformWht(dcTmp, tmp, scratch);
330330
for (n = 0; n < 16; n += 2)
331331
{
332-
Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8Scan[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8Scan[n]), true, scratch);
332+
Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8Scan[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8Scan[n]), scratch);
333333
}
334334

335335
return nz;
@@ -342,7 +342,7 @@ public static int ReconstructIntra4(Vp8EncIterator it, Vp8SegmentInfo dqm, Span<
342342
Span<int> scratch = it.Scratch3.AsSpan(0, 16);
343343
Vp8Encoding.FTransform(src, reference, tmp, scratch);
344344
int nz = QuantizeBlock(tmp, levels, ref dqm.Y1);
345-
Vp8Encoding.ITransform(reference, tmp, yuvOut, false, scratch);
345+
Vp8Encoding.ITransformOne(reference, tmp, yuvOut, scratch);
346346

347347
return nz;
348348
}
@@ -375,7 +375,7 @@ public static int ReconstructUv(Vp8EncIterator it, Vp8SegmentInfo dqm, Vp8ModeSc
375375

376376
for (n = 0; n < 8; n += 2)
377377
{
378-
Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8ScanUv[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8ScanUv[n]), true, scratch);
378+
Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8ScanUv[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8ScanUv[n]), scratch);
379379
}
380380

381381
return nz << 16;

0 commit comments

Comments
 (0)