@@ -417,19 +417,40 @@ private void Get3DMoments(MemoryAllocator allocator)
417417
418418 for ( int r = 1 ; r < IndexCount ; r ++ )
419419 {
420+ // Currently, RyuJIT hoists the invariants of multi-level nested loop only to the
421+ // immediate outer loop. See https://github.com/dotnet/runtime/issues/61420
422+ // To ensure the calculation doesn't happen repeatedly, hoist some of the calculations
423+ // in the form of ind1* manually.
424+ int ind1R = ( r << ( ( IndexBits * 2 ) + IndexAlphaBits ) ) +
425+ ( r << ( IndexBits + IndexAlphaBits + 1 ) ) +
426+ ( r << ( IndexBits * 2 ) ) +
427+ ( r << ( IndexBits + 1 ) ) +
428+ r ;
429+
420430 volumeSpan . Clear ( ) ;
421431
422432 for ( int g = 1 ; g < IndexCount ; g ++ )
423433 {
434+ int ind1G = ind1R +
435+ ( g << ( IndexBits + IndexAlphaBits ) ) +
436+ ( g << IndexBits ) +
437+ g ;
438+ int r_g = r + g ;
439+
424440 areaSpan . Clear ( ) ;
425441
426442 for ( int b = 1 ; b < IndexCount ; b ++ )
427443 {
444+ int ind1B = ind1G +
445+ ( ( r_g + b ) << IndexAlphaBits ) +
446+ b ;
447+
428448 Moment line = default ;
429449
430450 for ( int a = 1 ; a < IndexAlphaCount ; a ++ )
431451 {
432- int ind1 = GetPaletteIndex ( r , g , b , a ) ;
452+ int ind1 = ind1B + a ;
453+
433454 line += momentSpan [ ind1 ] ;
434455
435456 areaSpan [ a ] += line ;
@@ -628,13 +649,35 @@ private void Mark(ref Box cube, byte label)
628649
629650 for ( int r = cube . RMin + 1 ; r <= cube . RMax ; r ++ )
630651 {
652+ // Currently, RyuJIT hoists the invariants of multi-level nested loop only to the
653+ // immediate outer loop. See https://github.com/dotnet/runtime/issues/61420
654+ // To ensure the calculation doesn't happen repeatedly, hoist some of the calculations
655+ // in the form of ind1* manually.
656+ int ind1R = ( r << ( ( IndexBits * 2 ) + IndexAlphaBits ) ) +
657+ ( r << ( IndexBits + IndexAlphaBits + 1 ) ) +
658+ ( r << ( IndexBits * 2 ) ) +
659+ ( r << ( IndexBits + 1 ) ) +
660+ r ;
661+
631662 for ( int g = cube . GMin + 1 ; g <= cube . GMax ; g ++ )
632663 {
664+ int ind1G = ind1R +
665+ ( g << ( IndexBits + IndexAlphaBits ) ) +
666+ ( g << IndexBits ) +
667+ g ;
668+ int r_g = r + g ;
669+
633670 for ( int b = cube . BMin + 1 ; b <= cube . BMax ; b ++ )
634671 {
672+ int ind1B = ind1G +
673+ ( ( r_g + b ) << IndexAlphaBits ) +
674+ b ;
675+
635676 for ( int a = cube . AMin + 1 ; a <= cube . AMax ; a ++ )
636677 {
637- tagSpan [ GetPaletteIndex ( r , g , b , a ) ] = label ;
678+ int index = ind1B + a ;
679+
680+ tagSpan [ index ] = label ;
638681 }
639682 }
640683 }
0 commit comments