Skip to content

Commit fde9cb1

Browse files
committed
For gfx10.
1 parent 3c3a22a commit fde9cb1

15 files changed

+350
-345
lines changed

llvm/lib/Target/AMDGPU/MIMGInstructions.td

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1279,15 +1279,20 @@ multiclass MIMG_Sampler_nortn_gfx10_m<mimgopc op, string opcode,
12791279
}
12801280

12811281
class MIMG_Sampler_nortn_nsa_gfx10<mimgopc op, string opcode,
1282-
int num_addrs,
1283-
string dns="">
1282+
int num_addrs, bit hasR128,
1283+
string dns>
12841284
: MIMG_nsa_gfx10<op.GFX10M, (outs), num_addrs, dns> {
1285-
let InOperandList = MIMG_Sampler_OpList_gfx10p<AddrIns, BaseOpcode.HasD16, false>.ret;
1285+
let InOperandList = MIMG_Sampler_OpList_gfx10p<AddrIns, BaseOpcode.HasD16, hasR128>.ret;
12861286
let AsmString = MIMG_Sampler_Asm_gfx10p<opcode, " off, "#AddrAsm, BaseOpcode.HasD16>.ret;
12871287
// Force vdata to VGPR0 as no result will be returned.
12881288
let vdata = 0;
12891289
}
12901290

1291+
multiclass MIMG_Sampler_nortn_nsa_gfx10_m<mimgopc op, string opcode, int num_addrs, string dns=""> {
1292+
def _R1_nortn_nsa_gfx10 : MIMG_Sampler_nortn_nsa_gfx10<op, opcode, num_addrs, false, dns>;
1293+
def _R2_nortn_nsa_gfx10 : MIMG_Sampler_nortn_nsa_gfx10<op, opcode, num_addrs, true, "">;
1294+
}
1295+
12911296
class MIMG_Sampler_gfx11<mimgopc op, string opcode,
12921297
RegisterClass DataRC, RegisterClass AddrRC,
12931298
string dns="">
@@ -1523,8 +1528,8 @@ multiclass MIMG_Sampler_NoReturn <mimgopc op, AMDGPUSampleVariant sample, bit wq
15231528
foreach addr = MIMG_Sampler_AddrSizes<sample, isG16>.NSAInstrs in {
15241529
let VAddrDwords = addr.NumWords in {
15251530
if op.HAS_GFX10M then {
1526-
def _V # addr.NumWords # _nsa_gfx10
1527-
: MIMG_Sampler_nortn_nsa_gfx10<op, asm, addr.NumWords>;
1531+
defm _V # addr.NumWords
1532+
: MIMG_Sampler_nortn_nsa_gfx10_m<op, asm, addr.NumWords>;
15281533
}
15291534
}
15301535
}

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ body: |
4141
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
4242
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
4343
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
44-
; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
44+
; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_R1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
4545
; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0
4646
; GFX10-NEXT: $vgpr0 = COPY [[COPY3]]
4747
; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
@@ -105,7 +105,7 @@ body: |
105105
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
106106
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
107107
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
108-
; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
108+
; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_R1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
109109
; GFX10-NEXT: S_ENDPGM 0
110110
; GFX11-LABEL: name: atomic_cmpswap_i32_1d_no_return
111111
; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
@@ -166,7 +166,7 @@ body: |
166166
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
167167
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
168168
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
169-
; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
169+
; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_R1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
170170
; GFX10-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_]].sub0_sub1
171171
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]]
172172
; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
@@ -230,7 +230,7 @@ body: |
230230
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
231231
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
232232
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
233-
; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
233+
; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_R1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
234234
; GFX10-NEXT: S_ENDPGM 0
235235
; GFX11-LABEL: name: atomic_cmpswap_i64_1d_no_return
236236
; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4

llvm/test/CodeGen/AMDGPU/adjust-writemask-cse.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ define float @test() {
88
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3, [[S_MOV_B32_]], %subreg.sub4, [[S_MOV_B32_]], %subreg.sub5, [[S_MOV_B32_]], %subreg.sub6, [[S_MOV_B32_]], %subreg.sub7
99
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
1010
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
11-
; GFX10-NEXT: [[IMAGE_LOAD_V2_V2_nsa_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_LOAD_V2_V2_nsa_gfx10 [[COPY]], [[COPY1]], killed [[REG_SEQUENCE]], 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8)
11+
; GFX10-NEXT: [[IMAGE_LOAD_V2_V2_nsa_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_LOAD_V2_V2_R1_nsa_gfx10 [[COPY]], [[COPY1]], killed [[REG_SEQUENCE]], 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8)
1212
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[IMAGE_LOAD_V2_V2_nsa_gfx10_]].sub1
1313
; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[IMAGE_LOAD_V2_V2_nsa_gfx10_]].sub0
1414
; GFX10-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY2]], 0, killed [[COPY3]], 0, 0, implicit $mode, implicit $exec

llvm/test/CodeGen/AMDGPU/greedy-liverange-priority.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ body: |
3232
%16.sub5:sgpr_256 = COPY %16.sub0
3333
%16.sub6:sgpr_256 = COPY %16.sub0
3434
%16.sub7:sgpr_256 = COPY %16.sub0
35-
IMAGE_STORE_V4_V2_gfx10 %27, %26, %16, 0, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), addrspace 7)
35+
IMAGE_STORE_V4_V2_R1_gfx10 %27, %26, %16, 0, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), addrspace 7)
3636
S_CBRANCH_SCC1 %bb.2, implicit undef $scc
3737
S_BRANCH %bb.1
3838
@@ -41,7 +41,7 @@ body: |
4141
%30.sub2:vreg_128 = COPY %30.sub1
4242
%30.sub3:vreg_128 = COPY %30.sub1
4343
%26.sub1:vreg_64 = COPY %30.sub1
44-
IMAGE_STORE_V4_V2_gfx10 %30, %26, %16, 0, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), addrspace 7)
44+
IMAGE_STORE_V4_V2_R1_gfx10 %30, %26, %16, 0, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), addrspace 7)
4545
4646
bb.2:
4747
S_ENDPGM 0
@@ -97,7 +97,7 @@ body: |
9797
%25.sub5:sgpr_256 = COPY %25.sub0
9898
%25.sub6:sgpr_256 = COPY %25.sub0
9999
%25.sub7:sgpr_256 = COPY %25.sub0
100-
IMAGE_STORE_V4_V3_nsa_gfx10 %28, %3, %30, %4, %25, 0, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32))
100+
IMAGE_STORE_V4_V3_R1_nsa_gfx10 %28, %3, %30, %4, %25, 0, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32))
101101
102102
bb.3:
103103
S_ENDPGM 0

llvm/test/CodeGen/AMDGPU/hard-clauses-img-gfx10.mir

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ body: |
1111
; CHECK-LABEL: name: mimg_nsa
1212
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
1313
; CHECK-NEXT: {{ $}}
14-
; CHECK-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
15-
; CHECK-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
16-
$vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
17-
$vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
14+
; CHECK-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_R1_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
15+
; CHECK-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_R1_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
16+
$vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_R1_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
17+
$vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_R1_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
1818
...
1919

2020
---
@@ -26,10 +26,10 @@ body: |
2626
; CHECK-LABEL: name: mimg_nsa_mixed
2727
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
2828
; CHECK-NEXT: {{ $}}
29-
; CHECK-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
30-
; CHECK-NEXT: $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
31-
; CHECK-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
32-
$vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
33-
$vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
34-
$vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
29+
; CHECK-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_R1_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
30+
; CHECK-NEXT: $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_R1_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
31+
; CHECK-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_R1_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
32+
$vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_R1_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
33+
$vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_R1_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
34+
$vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_R1_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
3535
...

llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
# IMAGE_SAMPLE_V3_V2_gfx10, even though it has just overwritten half of it.
77

88
# CHECK-LABEL: name: _amdgpu_ps_main
9-
# CHECK-NOT: IMAGE_SAMPLE_V3_V2_gfx10 {{.*}} $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
10-
# CHECK: IMAGE_SAMPLE_V3_V2_gfx10 {{.*}} $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
9+
# CHECK-NOT: IMAGE_SAMPLE_V3_V2_R1_gfx10 {{.*}} $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
10+
# CHECK: IMAGE_SAMPLE_V3_V2_R1_gfx10 {{.*}} $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
1111

1212
---
1313
name: _amdgpu_ps_main
@@ -18,7 +18,7 @@ body: |
1818
1919
renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = COPY killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
2020
renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 = COPY killed renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
21-
renamable $vgpr12_vgpr13_vgpr14 = IMAGE_SAMPLE_V3_V2_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16)
21+
renamable $vgpr12_vgpr13_vgpr14 = IMAGE_SAMPLE_V3_V2_R1_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16)
2222
S_ENDPGM 0
2323
2424
...

0 commit comments

Comments
 (0)