@@ -494,32 +494,32 @@ private static void Scale16X16To8X8Vectorized(ref Block8x8F destination, ReadOnl
494494 var f2 = Vector256 . Create ( 2f ) ;
495495 var f025 = Vector256 . Create ( 0.25f ) ;
496496 Vector256 < int > switchInnerDoubleWords = Unsafe . As < byte , Vector256 < int > > ( ref MemoryMarshal . GetReference ( SimdUtils . HwIntrinsics . PermuteMaskSwitchInnerDWords8x32 ) ) ;
497-
498- ref Vector256 < float > in1 = ref Unsafe . As < Block8x8F , Vector256 < float > > ( ref MemoryMarshal . GetReference ( source ) ) ;
499- ref Vector256 < float > in2 = ref Unsafe . As < Block8x8F , Vector256 < float > > ( ref Unsafe . Add ( ref MemoryMarshal . GetReference ( source ) , 1 ) ) ;
500497 ref Vector256 < float > destRef = ref Unsafe . As < Block8x8F , Vector256 < float > > ( ref destination ) ;
501498
502- for ( int i = 0 ; i < 8 ; i ++ )
499+ for ( int i = 0 ; i < 2 ; i ++ )
503500 {
504- Vector256 < float > a = in1 ;
505- Vector256 < float > b = Unsafe . Add ( ref in1 , 1 ) ;
506- Vector256 < float > c = in2 ;
507- Vector256 < float > d = Unsafe . Add ( ref in2 , 1 ) ;
508-
509- Vector256 < float > calc1 = Avx . Shuffle ( a , c , 0b10_00_10_00 ) ;
510- Vector256 < float > calc2 = Avx . Shuffle ( a , c , 0b11_01_11_01 ) ;
511- Vector256 < float > calc3 = Avx . Shuffle ( b , d , 0b10_00_10_00 ) ;
512- Vector256 < float > calc4 = Avx . Shuffle ( b , d , 0b11_01_11_01 ) ;
513-
514- Vector256 < float > sum = Avx . Add ( Avx . Add ( calc1 , calc2 ) , Avx . Add ( calc3 , calc4 ) ) ;
515- Vector256 < float > add = Avx . Add ( sum , f2 ) ;
516- Vector256 < float > res = Avx . Multiply ( add , f025 ) ;
501+ ref Vector256 < float > in1 = ref Unsafe . As < Block8x8F , Vector256 < float > > ( ref Unsafe . Add ( ref MemoryMarshal . GetReference ( source ) , 2 * i ) ) ;
502+ ref Vector256 < float > in2 = ref Unsafe . As < Block8x8F , Vector256 < float > > ( ref Unsafe . Add ( ref MemoryMarshal . GetReference ( source ) , ( 2 * i ) + 1 ) ) ;
517503
518- destRef = Avx2 . PermuteVar8x32 ( res , switchInnerDoubleWords ) ;
519- destRef = ref Unsafe . Add ( ref destRef , 1 ) ;
520-
521- in1 = ref Unsafe . Add ( ref in1 , 2 ) ;
522- in2 = ref Unsafe . Add ( ref in2 , 2 ) ;
504+ for ( int j = 0 ; j < 8 ; j += 2 )
505+ {
506+ Vector256 < float > a = Unsafe . Add ( ref in1 , j ) ;
507+ Vector256 < float > b = Unsafe . Add ( ref in1 , j + 1 ) ;
508+ Vector256 < float > c = Unsafe . Add ( ref in2 , j ) ;
509+ Vector256 < float > d = Unsafe . Add ( ref in2 , j + 1 ) ;
510+
511+ Vector256 < float > calc1 = Avx . Shuffle ( a , c , 0b10_00_10_00 ) ;
512+ Vector256 < float > calc2 = Avx . Shuffle ( a , c , 0b11_01_11_01 ) ;
513+ Vector256 < float > calc3 = Avx . Shuffle ( b , d , 0b10_00_10_00 ) ;
514+ Vector256 < float > calc4 = Avx . Shuffle ( b , d , 0b11_01_11_01 ) ;
515+
516+ Vector256 < float > sum = Avx . Add ( Avx . Add ( calc1 , calc2 ) , Avx . Add ( calc3 , calc4 ) ) ;
517+ Vector256 < float > add = Avx . Add ( sum , f2 ) ;
518+ Vector256 < float > res = Avx . Multiply ( add , f025 ) ;
519+
520+ destRef = Avx2 . PermuteVar8x32 ( res , switchInnerDoubleWords ) ;
521+ destRef = ref Unsafe . Add ( ref destRef , 1 ) ;
522+ }
523523 }
524524#endif
525525 }
0 commit comments