@@ -9,18 +9,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
99{
1010 internal static partial class FastFloatingPointDCT
1111 {
12- #pragma warning disable SA1310 , SA1311 , IDE1006 // naming rule violation warnings
13- private static readonly Vector256 < float > mm256_F_0_7071 = Vector256 . Create ( 0.707106781f ) ;
14- private static readonly Vector256 < float > mm256_F_0_3826 = Vector256 . Create ( 0.382683433f ) ;
15- private static readonly Vector256 < float > mm256_F_0_5411 = Vector256 . Create ( 0.541196100f ) ;
16- private static readonly Vector256 < float > mm256_F_1_3065 = Vector256 . Create ( 1.306562965f ) ;
17-
18- private static readonly Vector256 < float > mm256_F_1_4142 = Vector256 . Create ( 1.414213562f ) ;
19- private static readonly Vector256 < float > mm256_F_1_8477 = Vector256 . Create ( 1.847759065f ) ;
20- private static readonly Vector256 < float > mm256_F_n1_0823 = Vector256 . Create ( - 1.082392200f ) ;
21- private static readonly Vector256 < float > mm256_F_n2_6131 = Vector256 . Create ( - 2.613125930f ) ;
22- #pragma warning restore SA1310 , SA1311 , IDE1006
23-
2412 /// <summary>
2513 /// Apply floating point FDCT inplace using simd operations.
2614 /// </summary>
@@ -48,6 +36,7 @@ static void FDCT8x8_1D_Avx(ref Block8x8F block)
4836 Vector256 < float > tmp3 = Avx . Add ( block . V3 , block . V4 ) ;
4937 Vector256 < float > tmp4 = Avx . Subtract ( block . V3 , block . V4 ) ;
5038
39+
5140 // Even part
5241 Vector256 < float > tmp10 = Avx . Add ( tmp0 , tmp3 ) ;
5342 Vector256 < float > tmp13 = Avx . Subtract ( tmp0 , tmp3 ) ;
@@ -57,6 +46,7 @@ static void FDCT8x8_1D_Avx(ref Block8x8F block)
5746 block . V0 = Avx . Add ( tmp10 , tmp11 ) ;
5847 block . V4 = Avx . Subtract ( tmp10 , tmp11 ) ;
5948
49+ Vector256 < float > mm256_F_0_7071 = Vector256 . Create ( 0.707106781f ) ;
6050 Vector256 < float > z1 = Avx . Multiply ( Avx . Add ( tmp12 , tmp13 ) , mm256_F_0_7071 ) ;
6151 block . V2 = Avx . Add ( tmp13 , z1 ) ;
6252 block . V6 = Avx . Subtract ( tmp13 , z1 ) ;
@@ -66,9 +56,9 @@ static void FDCT8x8_1D_Avx(ref Block8x8F block)
6656 tmp11 = Avx . Add ( tmp5 , tmp6 ) ;
6757 tmp12 = Avx . Add ( tmp6 , tmp7 ) ;
6858
69- Vector256 < float > z5 = Avx . Multiply ( Avx . Subtract ( tmp10 , tmp12 ) , mm256_F_0_3826 ) ;
70- Vector256 < float > z2 = SimdUtils . HwIntrinsics . MultiplyAdd ( z5 , mm256_F_0_5411 , tmp10 ) ;
71- Vector256 < float > z4 = SimdUtils . HwIntrinsics . MultiplyAdd ( z5 , mm256_F_1_3065 , tmp12 ) ;
59+ Vector256 < float > z5 = Avx . Multiply ( Avx . Subtract ( tmp10 , tmp12 ) , Vector256 . Create ( 0.382683433f ) ) ; // mm256_F_0_3826
60+ Vector256 < float > z2 = SimdUtils . HwIntrinsics . MultiplyAdd ( z5 , Vector256 . Create ( 0.541196100f ) , tmp10 ) ; // mm256_F_0_5411
61+ Vector256 < float > z4 = SimdUtils . HwIntrinsics . MultiplyAdd ( z5 , Vector256 . Create ( 1.306562965f ) , tmp12 ) ; // mm256_F_1_3065
7262 Vector256 < float > z3 = Avx . Multiply ( tmp11 , mm256_F_0_7071 ) ;
7363
7464 Vector256 < float > z11 = Avx . Add ( tmp7 , z3 ) ;
@@ -109,6 +99,7 @@ static void IDCT8x8_1D_Avx(ref Block8x8F block)
10999 Vector256 < float > tmp10 = Avx . Add ( z5 , tmp2 ) ;
110100 Vector256 < float > tmp11 = Avx . Subtract ( z5 , tmp2 ) ;
111101
102+ Vector256 < float > mm256_F_1_4142 = Vector256 . Create ( 1.414213562f ) ;
112103 Vector256 < float > tmp13 = Avx . Add ( tmp1 , tmp3 ) ;
113104 Vector256 < float > tmp12 = SimdUtils . HwIntrinsics . MultiplySubstract ( tmp13 , Avx . Subtract ( tmp1 , tmp3 ) , mm256_F_1_4142 ) ;
114105
@@ -131,10 +122,10 @@ static void IDCT8x8_1D_Avx(ref Block8x8F block)
131122 tmp7 = Avx . Add ( z11 , z13 ) ;
132123 tmp11 = Avx . Multiply ( Avx . Subtract ( z11 , z13 ) , mm256_F_1_4142 ) ;
133124
134- z5 = Avx . Multiply ( Avx . Add ( z10 , z12 ) , mm256_F_1_8477 ) ;
125+ z5 = Avx . Multiply ( Avx . Add ( z10 , z12 ) , Vector256 . Create ( 1.847759065f ) ) ; // mm256_F_1_8477
135126
136- tmp10 = SimdUtils . HwIntrinsics . MultiplyAdd ( z5 , z12 , mm256_F_n1_0823 ) ;
137- tmp12 = SimdUtils . HwIntrinsics . MultiplyAdd ( z5 , z10 , mm256_F_n2_6131 ) ;
127+ tmp10 = SimdUtils . HwIntrinsics . MultiplyAdd ( z5 , z12 , Vector256 . Create ( - 1.082392200f ) ) ; // mm256_F_n1_0823
128+ tmp12 = SimdUtils . HwIntrinsics . MultiplyAdd ( z5 , z10 , Vector256 . Create ( - 2.613125930f ) ) ; // mm256_F_n2_6131
138129
139130 tmp6 = Avx . Subtract ( tmp12 , tmp7 ) ;
140131 tmp5 = Avx . Subtract ( tmp11 , tmp6 ) ;
0 commit comments