1313// ReSharper disable InconsistentNaming
1414namespace SixLabors . ImageSharp . Formats . Webp . Lossy
1515{
16- internal static unsafe class LossyUtils
16+ internal static class LossyUtils
1717 {
18+ #if SUPPORTS_RUNTIME_INTRINSICS
19+ private static readonly Vector128 < byte > Mean16x4Mask = Vector128 . Create ( ( short ) 0x00ff ) . AsByte ( ) ;
20+ #endif
21+
1822 [ MethodImpl ( InliningOptions . ShortMethod ) ]
1923 public static int Vp8Sse16X16 ( Span < byte > a , Span < byte > b ) => GetSse ( a , b , 16 , 16 ) ;
2024
@@ -938,26 +942,55 @@ public static void HFilter8i(Span<byte> u, Span<byte> v, int offset, int stride,
938942 FilterLoop24 ( v , offsetPlus4 , 1 , stride , 8 , thresh , ithresh , hevThresh ) ;
939943 }
940944
941- [ MethodImpl ( InliningOptions . ShortMethod ) ]
942- public static uint LoadUv ( byte u , byte v ) =>
943- ( uint ) ( u | ( v << 16 ) ) ; // We process u and v together stashed into 32bit(16bit each).
944-
945- [ MethodImpl ( InliningOptions . ShortMethod ) ]
946- public static void YuvToBgr ( int y , int u , int v , Span < byte > bgr )
945+ public static void Mean16x4 ( Span < byte > input , Span < uint > dc )
947946 {
948- bgr [ 0 ] = ( byte ) YuvToB ( y , u ) ;
949- bgr [ 1 ] = ( byte ) YuvToG ( y , u , v ) ;
950- bgr [ 2 ] = ( byte ) YuvToR ( y , v ) ;
951- }
952-
953- [ MethodImpl ( InliningOptions . ShortMethod ) ]
954- public static int YuvToB ( int y , int u ) => Clip8 ( MultHi ( y , 19077 ) + MultHi ( u , 33050 ) - 17685 ) ;
955-
956- [ MethodImpl ( InliningOptions . ShortMethod ) ]
957- public static int YuvToG ( int y , int u , int v ) => Clip8 ( MultHi ( y , 19077 ) - MultHi ( u , 6419 ) - MultHi ( v , 13320 ) + 8708 ) ;
947+ #if SUPPORTS_RUNTIME_INTRINSICS
948+ if ( Ssse3 . IsSupported )
949+ {
950+ Vector128 < byte > a0 = Unsafe . As < byte , Vector128 < byte > > ( ref MemoryMarshal . GetReference ( input ) ) ;
951+ Vector128 < byte > a1 = Unsafe . As < byte , Vector128 < byte > > ( ref MemoryMarshal . GetReference ( input . Slice ( WebpConstants . Bps , 16 ) ) ) ;
952+ Vector128 < byte > a2 = Unsafe . As < byte , Vector128 < byte > > ( ref MemoryMarshal . GetReference ( input . Slice ( WebpConstants . Bps * 2 , 16 ) ) ) ;
953+ Vector128 < byte > a3 = Unsafe . As < byte , Vector128 < byte > > ( ref MemoryMarshal . GetReference ( input . Slice ( WebpConstants . Bps * 3 , 16 ) ) ) ;
954+ Vector128 < short > b0 = Sse2 . ShiftRightLogical ( a0 . AsInt16 ( ) , 8 ) ; // hi byte
955+ Vector128 < short > b1 = Sse2 . ShiftRightLogical ( a1 . AsInt16 ( ) , 8 ) ;
956+ Vector128 < short > b2 = Sse2 . ShiftRightLogical ( a2 . AsInt16 ( ) , 8 ) ;
957+ Vector128 < short > b3 = Sse2 . ShiftRightLogical ( a3 . AsInt16 ( ) , 8 ) ;
958+ Vector128 < byte > c0 = Sse2 . And ( a0 , Mean16x4Mask ) ; // lo byte
959+ Vector128 < byte > c1 = Sse2 . And ( a1 , Mean16x4Mask ) ;
960+ Vector128 < byte > c2 = Sse2 . And ( a2 , Mean16x4Mask ) ;
961+ Vector128 < byte > c3 = Sse2 . And ( a3 , Mean16x4Mask ) ;
962+ Vector128 < int > d0 = Sse2 . Add ( b0 . AsInt32 ( ) , c0 . AsInt32 ( ) ) ;
963+ Vector128 < int > d1 = Sse2 . Add ( b1 . AsInt32 ( ) , c1 . AsInt32 ( ) ) ;
964+ Vector128 < int > d2 = Sse2 . Add ( b2 . AsInt32 ( ) , c2 . AsInt32 ( ) ) ;
965+ Vector128 < int > d3 = Sse2 . Add ( b3 . AsInt32 ( ) , c3 . AsInt32 ( ) ) ;
966+ Vector128 < int > e0 = Sse2 . Add ( d0 , d1 ) ;
967+ Vector128 < int > e1 = Sse2 . Add ( d2 , d3 ) ;
968+ Vector128 < int > f0 = Sse2 . Add ( e0 , e1 ) ;
969+ Vector128 < short > hadd = Ssse3 . HorizontalAdd ( f0 . AsInt16 ( ) , f0 . AsInt16 ( ) ) ;
970+ Vector128 < uint > wide = Sse2 . UnpackLow ( hadd , Vector128 < short > . Zero ) . AsUInt32 ( ) ;
971+
972+ ref uint outputRef = ref MemoryMarshal . GetReference ( dc ) ;
973+ Unsafe . As < uint , Vector128 < uint > > ( ref outputRef ) = wide;
974+ }
975+ else
976+ #endif
977+ {
978+ for ( int k = 0 ; k < 4 ; k ++ )
979+ {
980+ uint avg = 0 ;
981+ for ( int y = 0 ; y < 4 ; y ++ )
982+ {
983+ for ( int x = 0 ; x < 4 ; x ++ )
984+ {
985+ avg += input [ x + ( y * WebpConstants . Bps ) ] ;
986+ }
987+ }
958988
959- [ MethodImpl ( InliningOptions . ShortMethod ) ]
960- public static int YuvToR ( int y , int v ) => Clip8 ( MultHi ( y , 19077 ) + MultHi ( v , 26149 ) - 14234 ) ;
989+ dc [ k ] = avg ;
990+ input = input . Slice ( 4 ) ; // go to next 4x4 block.
991+ }
992+ }
993+ }
961994
962995 [ MethodImpl ( InliningOptions . ShortMethod ) ]
963996 public static byte Avg2 ( byte a , byte b ) => ( byte ) ( ( a + b + 1 ) >> 1 ) ;
@@ -1163,9 +1196,6 @@ private static bool Hev(Span<byte> p, int offset, int step, int thresh)
11631196 return WebpLookupTables . Abs0 ( p1 - p0 ) > thresh || WebpLookupTables . Abs0 ( q1 - q0 ) > thresh ;
11641197 }
11651198
1166- [ MethodImpl ( InliningOptions . ShortMethod ) ]
1167- private static int MultHi ( int v , int coeff ) => ( v * coeff ) >> 8 ;
1168-
11691199 [ MethodImpl ( InliningOptions . ShortMethod ) ]
11701200 private static void Store ( Span < byte > dst , int x , int y , int v )
11711201 {
@@ -1188,13 +1218,6 @@ private static void Store2(Span<byte> dst, int y, int dc, int d, int c)
11881218 [ MethodImpl ( InliningOptions . ShortMethod ) ]
11891219 private static int Mul2 ( int a ) => ( a * 35468 ) >> 16 ;
11901220
1191- [ MethodImpl ( InliningOptions . ShortMethod ) ]
1192- private static byte Clip8 ( int v )
1193- {
1194- int yuvMask = ( 256 << 6 ) - 1 ;
1195- return ( byte ) ( ( v & ~ yuvMask ) == 0 ? v >> 6 : v < 0 ? 0 : 255 ) ;
1196- }
1197-
11981221 [ MethodImpl ( InliningOptions . ShortMethod ) ]
11991222 private static void Put8x8uv ( byte value , Span < byte > dst )
12001223 {
0 commit comments