@@ -444,14 +444,6 @@ define amdgpu_kernel void @memcpy_known(ptr addrspace(7) %src, ptr addrspace(7)
444444; GISEL-GFX942-NEXT:    buffer_load_dwordx4 v[2:5], v62, s[8:11], 0 offen 
445445; GISEL-GFX942-NEXT:    buffer_load_dwordx4 v[6:9], v62, s[8:11], 0 offen offset:16 
446446; GISEL-GFX942-NEXT:    buffer_load_dwordx4 v[10:13], v62, s[8:11], 0 offen offset:32 
447- ; GISEL-GFX942-NEXT:    v_add_u32_e32 v63, s12, v1 
448- ; GISEL-GFX942-NEXT:    v_add_u32_e32 v1, 0x100, v1 
449- ; GISEL-GFX942-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v0 
450- ; GISEL-GFX942-NEXT:    s_waitcnt vmcnt(0) 
451- ; GISEL-GFX942-NEXT:    v_accvgpr_write_b32 a0, v13 ; Reload Reuse 
452- ; GISEL-GFX942-NEXT:    v_accvgpr_write_b32 a1, v12 ; Reload Reuse 
453- ; GISEL-GFX942-NEXT:    v_accvgpr_write_b32 a2, v11 ; Reload Reuse 
454- ; GISEL-GFX942-NEXT:    v_accvgpr_write_b32 a3, v10 ; Reload Reuse 
455447; GISEL-GFX942-NEXT:    buffer_load_dwordx4 v[14:17], v62, s[8:11], 0 offen offset:48 
456448; GISEL-GFX942-NEXT:    buffer_load_dwordx4 v[18:21], v62, s[8:11], 0 offen offset:64 
457449; GISEL-GFX942-NEXT:    buffer_load_dwordx4 v[22:25], v62, s[8:11], 0 offen offset:80 
@@ -464,20 +456,15 @@ define amdgpu_kernel void @memcpy_known(ptr addrspace(7) %src, ptr addrspace(7)
464456; GISEL-GFX942-NEXT:    buffer_load_dwordx4 v[50:53], v62, s[8:11], 0 offen offset:192 
465457; GISEL-GFX942-NEXT:    buffer_load_dwordx4 v[54:57], v62, s[8:11], 0 offen offset:208 
466458; GISEL-GFX942-NEXT:    buffer_load_dwordx4 v[58:61], v62, s[8:11], 0 offen offset:224 
467- ; GISEL-GFX942-NEXT:    buffer_load_dwordx4 v[10:13], v62, s[8:11], 0 offen offset:240 
468- ; GISEL-GFX942-NEXT:    s_nop 0 
459+ ; GISEL-GFX942-NEXT:    buffer_load_dwordx4 a[0:3], v62, s[8:11], 0 offen offset:240 
460+ ; GISEL-GFX942-NEXT:    v_add_u32_e32 v63, s12, v1 
461+ ; GISEL-GFX942-NEXT:    v_add_u32_e32 v1, 0x100, v1 
462+ ; GISEL-GFX942-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v0 
463+ ; GISEL-GFX942-NEXT:    s_waitcnt vmcnt(0) 
464+ ; GISEL-GFX942-NEXT:    scratch_store_dwordx4 off, a[0:3], off ; 16-byte Folded Spill 
469465; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen 
470466; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[6:9], v63, s[4:7], 0 offen offset:16 
471- ; GISEL-GFX942-NEXT:    s_waitcnt vmcnt(2) 
472- ; GISEL-GFX942-NEXT:    v_accvgpr_write_b32 a4, v13 ; Reload Reuse 
473- ; GISEL-GFX942-NEXT:    v_accvgpr_read_b32 v5, a0 ; Reload Reuse 
474- ; GISEL-GFX942-NEXT:    v_accvgpr_read_b32 v4, a1 ; Reload Reuse 
475- ; GISEL-GFX942-NEXT:    v_accvgpr_read_b32 v3, a2 ; Reload Reuse 
476- ; GISEL-GFX942-NEXT:    v_accvgpr_read_b32 v2, a3 ; Reload Reuse 
477- ; GISEL-GFX942-NEXT:    v_accvgpr_write_b32 a5, v12 ; Reload Reuse 
478- ; GISEL-GFX942-NEXT:    v_accvgpr_write_b32 a6, v11 ; Reload Reuse 
479- ; GISEL-GFX942-NEXT:    v_accvgpr_write_b32 a7, v10 ; Reload Reuse 
480- ; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen offset:32 
467+ ; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[10:13], v63, s[4:7], 0 offen offset:32 
481468; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[14:17], v63, s[4:7], 0 offen offset:48 
482469; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[18:21], v63, s[4:7], 0 offen offset:64 
483470; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[22:25], v63, s[4:7], 0 offen offset:80 
@@ -490,10 +477,8 @@ define amdgpu_kernel void @memcpy_known(ptr addrspace(7) %src, ptr addrspace(7)
490477; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[50:53], v63, s[4:7], 0 offen offset:192 
491478; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[54:57], v63, s[4:7], 0 offen offset:208 
492479; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[58:61], v63, s[4:7], 0 offen offset:224 
493- ; GISEL-GFX942-NEXT:    v_accvgpr_read_b32 v5, a4 ; Reload Reuse 
494- ; GISEL-GFX942-NEXT:    v_accvgpr_read_b32 v4, a5 ; Reload Reuse 
495- ; GISEL-GFX942-NEXT:    v_accvgpr_read_b32 v3, a6 ; Reload Reuse 
496- ; GISEL-GFX942-NEXT:    v_accvgpr_read_b32 v2, a7 ; Reload Reuse 
480+ ; GISEL-GFX942-NEXT:    scratch_load_dwordx4 v[2:5], off, off ; 16-byte Folded Reload 
481+ ; GISEL-GFX942-NEXT:    s_waitcnt vmcnt(0) 
497482; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen offset:240 
498483; GISEL-GFX942-NEXT:    s_cbranch_vccnz .LBB0_1 
499484; GISEL-GFX942-NEXT:  ; %bb.2: ; %memcpy-split 
@@ -822,14 +807,6 @@ define amdgpu_kernel void @memcpy_known_medium(ptr addrspace(7) %src, ptr addrsp
822807; SDAG-GFX942-NEXT:    buffer_load_dwordx4 v[2:5], v1, s[4:7], 0 offen 
823808; SDAG-GFX942-NEXT:    buffer_load_dwordx4 v[6:9], v1, s[4:7], 0 offen offset:16 
824809; SDAG-GFX942-NEXT:    buffer_load_dwordx4 v[10:13], v1, s[4:7], 0 offen offset:32 
825- ; SDAG-GFX942-NEXT:    v_add_u32_e32 v62, s8, v0 
826- ; SDAG-GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0x100, v0 
827- ; SDAG-GFX942-NEXT:    s_and_b64 vcc, exec, vcc 
828- ; SDAG-GFX942-NEXT:    s_waitcnt vmcnt(0) 
829- ; SDAG-GFX942-NEXT:    v_accvgpr_write_b32 a0, v13 ; Reload Reuse 
830- ; SDAG-GFX942-NEXT:    v_accvgpr_write_b32 a1, v12 ; Reload Reuse 
831- ; SDAG-GFX942-NEXT:    v_accvgpr_write_b32 a2, v11 ; Reload Reuse 
832- ; SDAG-GFX942-NEXT:    v_accvgpr_write_b32 a3, v10 ; Reload Reuse 
833810; SDAG-GFX942-NEXT:    buffer_load_dwordx4 v[14:17], v1, s[4:7], 0 offen offset:48 
834811; SDAG-GFX942-NEXT:    buffer_load_dwordx4 v[18:21], v1, s[4:7], 0 offen offset:64 
835812; SDAG-GFX942-NEXT:    buffer_load_dwordx4 v[22:25], v1, s[4:7], 0 offen offset:80 
@@ -842,20 +819,16 @@ define amdgpu_kernel void @memcpy_known_medium(ptr addrspace(7) %src, ptr addrsp
842819; SDAG-GFX942-NEXT:    buffer_load_dwordx4 v[50:53], v1, s[4:7], 0 offen offset:192 
843820; SDAG-GFX942-NEXT:    buffer_load_dwordx4 v[54:57], v1, s[4:7], 0 offen offset:208 
844821; SDAG-GFX942-NEXT:    buffer_load_dwordx4 v[58:61], v1, s[4:7], 0 offen offset:224 
845- ; SDAG-GFX942-NEXT:    buffer_load_dwordx4 v[10:13], v1, s[4:7], 0 offen offset:240 
846- ; SDAG-GFX942-NEXT:    s_nop 0 
822+ ; SDAG-GFX942-NEXT:    buffer_load_dwordx4 a[0:3], v1, s[4:7], 0 offen offset:240 
823+ ; SDAG-GFX942-NEXT:    v_add_u32_e32 v62, s8, v0 
824+ ; SDAG-GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0x100, v0 
825+ ; SDAG-GFX942-NEXT:    s_and_b64 vcc, exec, vcc 
826+ ; SDAG-GFX942-NEXT:    s_waitcnt vmcnt(0) 
827+ ; SDAG-GFX942-NEXT:    v_accvgpr_read_b32 v63, a3 ; Reload Reuse 
828+ ; SDAG-GFX942-NEXT:    scratch_store_dwordx3 off, a[0:2], off ; 12-byte Folded Spill 
847829; SDAG-GFX942-NEXT:    buffer_store_dwordx4 v[2:5], v62, s[12:15], 0 offen 
848830; SDAG-GFX942-NEXT:    buffer_store_dwordx4 v[6:9], v62, s[12:15], 0 offen offset:16 
849- ; SDAG-GFX942-NEXT:    s_waitcnt vmcnt(2) 
850- ; SDAG-GFX942-NEXT:    v_accvgpr_write_b32 a4, v13 ; Reload Reuse 
851- ; SDAG-GFX942-NEXT:    v_accvgpr_read_b32 v5, a0 ; Reload Reuse 
852- ; SDAG-GFX942-NEXT:    v_accvgpr_read_b32 v4, a1 ; Reload Reuse 
853- ; SDAG-GFX942-NEXT:    v_accvgpr_read_b32 v3, a2 ; Reload Reuse 
854- ; SDAG-GFX942-NEXT:    v_accvgpr_read_b32 v2, a3 ; Reload Reuse 
855- ; SDAG-GFX942-NEXT:    v_accvgpr_write_b32 a5, v12 ; Reload Reuse 
856- ; SDAG-GFX942-NEXT:    v_accvgpr_write_b32 a6, v11 ; Reload Reuse 
857- ; SDAG-GFX942-NEXT:    v_accvgpr_write_b32 a7, v10 ; Reload Reuse 
858- ; SDAG-GFX942-NEXT:    buffer_store_dwordx4 v[2:5], v62, s[12:15], 0 offen offset:32 
831+ ; SDAG-GFX942-NEXT:    buffer_store_dwordx4 v[10:13], v62, s[12:15], 0 offen offset:32 
859832; SDAG-GFX942-NEXT:    buffer_store_dwordx4 v[14:17], v62, s[12:15], 0 offen offset:48 
860833; SDAG-GFX942-NEXT:    buffer_store_dwordx4 v[18:21], v62, s[12:15], 0 offen offset:64 
861834; SDAG-GFX942-NEXT:    buffer_store_dwordx4 v[22:25], v62, s[12:15], 0 offen offset:80 
@@ -868,10 +841,8 @@ define amdgpu_kernel void @memcpy_known_medium(ptr addrspace(7) %src, ptr addrsp
868841; SDAG-GFX942-NEXT:    buffer_store_dwordx4 v[50:53], v62, s[12:15], 0 offen offset:192 
869842; SDAG-GFX942-NEXT:    buffer_store_dwordx4 v[54:57], v62, s[12:15], 0 offen offset:208 
870843; SDAG-GFX942-NEXT:    buffer_store_dwordx4 v[58:61], v62, s[12:15], 0 offen offset:224 
871- ; SDAG-GFX942-NEXT:    v_accvgpr_read_b32 v5, a4 ; Reload Reuse 
872- ; SDAG-GFX942-NEXT:    v_accvgpr_read_b32 v4, a5 ; Reload Reuse 
873- ; SDAG-GFX942-NEXT:    v_accvgpr_read_b32 v3, a6 ; Reload Reuse 
874- ; SDAG-GFX942-NEXT:    v_accvgpr_read_b32 v2, a7 ; Reload Reuse 
844+ ; SDAG-GFX942-NEXT:    scratch_load_dwordx3 v[2:4], off, off ; 12-byte Folded Reload 
845+ ; SDAG-GFX942-NEXT:    s_waitcnt vmcnt(0) 
875846; SDAG-GFX942-NEXT:    buffer_store_dwordx4 v[2:5], v62, s[12:15], 0 offen offset:240 
876847; SDAG-GFX942-NEXT:    s_cbranch_vccnz .LBB1_1 
877848; SDAG-GFX942-NEXT:  ; %bb.2: ; %memcpy-split 
@@ -993,16 +964,6 @@ define amdgpu_kernel void @memcpy_known_medium(ptr addrspace(7) %src, ptr addrsp
993964; GISEL-GFX942-NEXT:    buffer_load_dwordx4 v[2:5], v1, s[8:11], 0 offen 
994965; GISEL-GFX942-NEXT:    buffer_load_dwordx4 v[6:9], v1, s[8:11], 0 offen offset:16 
995966; GISEL-GFX942-NEXT:    buffer_load_dwordx4 v[10:13], v1, s[8:11], 0 offen offset:32 
996- ; GISEL-GFX942-NEXT:    v_add_u32_e32 v62, s12, v0 
997- ; GISEL-GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0x100, v0 
998- ; GISEL-GFX942-NEXT:    s_xor_b64 s[2:3], vcc, -1 
999- ; GISEL-GFX942-NEXT:    s_xor_b64 s[2:3], s[2:3], -1 
1000- ; GISEL-GFX942-NEXT:    s_and_b64 vcc, s[2:3], exec 
1001- ; GISEL-GFX942-NEXT:    s_waitcnt vmcnt(0) 
1002- ; GISEL-GFX942-NEXT:    v_accvgpr_write_b32 a0, v13 ; Reload Reuse 
1003- ; GISEL-GFX942-NEXT:    v_accvgpr_write_b32 a1, v12 ; Reload Reuse 
1004- ; GISEL-GFX942-NEXT:    v_accvgpr_write_b32 a2, v11 ; Reload Reuse 
1005- ; GISEL-GFX942-NEXT:    v_accvgpr_write_b32 a3, v10 ; Reload Reuse 
1006967; GISEL-GFX942-NEXT:    buffer_load_dwordx4 v[14:17], v1, s[8:11], 0 offen offset:48 
1007968; GISEL-GFX942-NEXT:    buffer_load_dwordx4 v[18:21], v1, s[8:11], 0 offen offset:64 
1008969; GISEL-GFX942-NEXT:    buffer_load_dwordx4 v[22:25], v1, s[8:11], 0 offen offset:80 
@@ -1015,20 +976,18 @@ define amdgpu_kernel void @memcpy_known_medium(ptr addrspace(7) %src, ptr addrsp
1015976; GISEL-GFX942-NEXT:    buffer_load_dwordx4 v[50:53], v1, s[8:11], 0 offen offset:192 
1016977; GISEL-GFX942-NEXT:    buffer_load_dwordx4 v[54:57], v1, s[8:11], 0 offen offset:208 
1017978; GISEL-GFX942-NEXT:    buffer_load_dwordx4 v[58:61], v1, s[8:11], 0 offen offset:224 
1018- ; GISEL-GFX942-NEXT:    buffer_load_dwordx4 v[10:13], v1, s[8:11], 0 offen offset:240 
1019- ; GISEL-GFX942-NEXT:    s_nop 0 
979+ ; GISEL-GFX942-NEXT:    buffer_load_dwordx4 a[0:3], v1, s[8:11], 0 offen offset:240 
980+ ; GISEL-GFX942-NEXT:    v_add_u32_e32 v62, s12, v0 
981+ ; GISEL-GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0x100, v0 
982+ ; GISEL-GFX942-NEXT:    s_xor_b64 s[2:3], vcc, -1 
983+ ; GISEL-GFX942-NEXT:    s_xor_b64 s[2:3], s[2:3], -1 
984+ ; GISEL-GFX942-NEXT:    s_and_b64 vcc, s[2:3], exec 
985+ ; GISEL-GFX942-NEXT:    s_waitcnt vmcnt(0) 
986+ ; GISEL-GFX942-NEXT:    v_accvgpr_read_b32 v63, a3 ; Reload Reuse 
987+ ; GISEL-GFX942-NEXT:    scratch_store_dwordx3 off, a[0:2], off ; 12-byte Folded Spill 
1020988; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[2:5], v62, s[4:7], 0 offen 
1021989; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[6:9], v62, s[4:7], 0 offen offset:16 
1022- ; GISEL-GFX942-NEXT:    s_waitcnt vmcnt(2) 
1023- ; GISEL-GFX942-NEXT:    v_accvgpr_write_b32 a4, v13 ; Reload Reuse 
1024- ; GISEL-GFX942-NEXT:    v_accvgpr_read_b32 v5, a0 ; Reload Reuse 
1025- ; GISEL-GFX942-NEXT:    v_accvgpr_read_b32 v4, a1 ; Reload Reuse 
1026- ; GISEL-GFX942-NEXT:    v_accvgpr_read_b32 v3, a2 ; Reload Reuse 
1027- ; GISEL-GFX942-NEXT:    v_accvgpr_read_b32 v2, a3 ; Reload Reuse 
1028- ; GISEL-GFX942-NEXT:    v_accvgpr_write_b32 a5, v12 ; Reload Reuse 
1029- ; GISEL-GFX942-NEXT:    v_accvgpr_write_b32 a6, v11 ; Reload Reuse 
1030- ; GISEL-GFX942-NEXT:    v_accvgpr_write_b32 a7, v10 ; Reload Reuse 
1031- ; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[2:5], v62, s[4:7], 0 offen offset:32 
990+ ; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[10:13], v62, s[4:7], 0 offen offset:32 
1032991; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[14:17], v62, s[4:7], 0 offen offset:48 
1033992; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[18:21], v62, s[4:7], 0 offen offset:64 
1034993; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[22:25], v62, s[4:7], 0 offen offset:80 
@@ -1041,10 +1000,8 @@ define amdgpu_kernel void @memcpy_known_medium(ptr addrspace(7) %src, ptr addrsp
10411000; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[50:53], v62, s[4:7], 0 offen offset:192 
10421001; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[54:57], v62, s[4:7], 0 offen offset:208 
10431002; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[58:61], v62, s[4:7], 0 offen offset:224 
1044- ; GISEL-GFX942-NEXT:    v_accvgpr_read_b32 v5, a4 ; Reload Reuse 
1045- ; GISEL-GFX942-NEXT:    v_accvgpr_read_b32 v4, a5 ; Reload Reuse 
1046- ; GISEL-GFX942-NEXT:    v_accvgpr_read_b32 v3, a6 ; Reload Reuse 
1047- ; GISEL-GFX942-NEXT:    v_accvgpr_read_b32 v2, a7 ; Reload Reuse 
1003+ ; GISEL-GFX942-NEXT:    scratch_load_dwordx3 v[2:4], off, off ; 12-byte Folded Reload 
1004+ ; GISEL-GFX942-NEXT:    s_waitcnt vmcnt(0) 
10481005; GISEL-GFX942-NEXT:    buffer_store_dwordx4 v[2:5], v62, s[4:7], 0 offen offset:240 
10491006; GISEL-GFX942-NEXT:    s_cbranch_vccnz .LBB1_1 
10501007; GISEL-GFX942-NEXT:  ; %bb.2: ; %memcpy-split 
0 commit comments