@@ -437,30 +437,33 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
437437; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v6, vcc
438438; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
439439; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
440- ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
441- ; GFX9-O0-NEXT: s_nop 0
442- ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
443440; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
444441; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
445- ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
442+ ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
443+ ; GFX9-O0-NEXT: s_nop 0
444+ ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
445+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
446+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
447+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
446448; GFX9-O0-NEXT: s_nop 0
447- ; GFX9-O0-NEXT: buffer_store_dword v8 , off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
449+ ; GFX9-O0-NEXT: buffer_store_dword v10 , off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
448450; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
449451; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
450- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
452+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
453+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
451454; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
452455; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
453456; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
454457; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
455458; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
456459; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, 1
457460; GFX9-O0-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9]
458- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], -1
459- ; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[8:9]
460- ; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
461+ ; GFX9-O0-NEXT: s_mov_b64 s[4:5], -1
462+ ; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
461463; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
462464; GFX9-O0-NEXT: s_mov_b32 s14, s13
463465; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
466+ ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
464467; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
465468; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
466469; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -474,17 +477,14 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
474477; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
475478; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6
476479; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7]
477- ; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
478480; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
479- ; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[12:13]
480- ; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
481+ ; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[8:9]
481482; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10
482- ; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[12:13 ]
483+ ; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[8:9 ]
483484; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
484485; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4
485- ; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
486486; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
487- ; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[12:13 ]
487+ ; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[8:9 ]
488488; GFX9-O0-NEXT: v_mov_b32_e32 v3, s10
489489; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9]
490490; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
@@ -977,10 +977,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
977977; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
978978; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
979979; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
980- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
981- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
982- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
983- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
980+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
981+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
982+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
983+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
984984; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
985985; GFX9-O0-NEXT: s_mov_b32 s5, s6
986986; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
@@ -2564,17 +2564,20 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
25642564; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v6, vcc
25652565; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
25662566; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
2567- ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2568- ; GFX9-O0-NEXT: s_nop 0
2569- ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
25702567; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
25712568; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
2572- ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2569+ ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
25732570; GFX9-O0-NEXT: s_nop 0
2574- ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2571+ ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2572+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
2573+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
2574+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2575+ ; GFX9-O0-NEXT: s_nop 0
2576+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
25752577; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
25762578; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
2577- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
2579+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
2580+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
25782581; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
25792582; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
25802583; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
@@ -2587,6 +2590,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
25872590; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
25882591; GFX9-O0-NEXT: s_mov_b32 s14, s13
25892592; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
2593+ ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
25902594; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
25912595; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
25922596; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -3100,10 +3104,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
31003104; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
31013105; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
31023106; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3103- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3104- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3105- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3106- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3107+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3108+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3109+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3110+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
31073111; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
31083112; GFX9-O0-NEXT: s_mov_b32 s5, s6
31093113; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
0 commit comments