[AMDGPU][NFC] Mark GEPs in flat offset folding tests as inbounds #131994

ritter-x2a · 2025-03-19T10:08:53Z

This is in preparation for a patch that will only fold offsets into flat
instructions if their addition is inbounds.

For SWDEV-516125.

ritter-x2a · 2025-03-19T10:09:15Z

Warning

This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
Learn more

[AMDGPU][SDAG] Only fold flat offsets if they are inbounds #132353
[AMDGPU][NFC] Mark GEPs in flat offset folding tests as inbounds #131994 👈 (View in Graphite)
[SDAG] Introduce inbounds flag for pointer arithmetic #131862 : 1 other dependent PR (#131863 )
[SeparateConstOffsetFromGEP] Preserve inbounds flag based on ValueTracking and NUW #130617
[SeparateConstOffsetFromGEP] Don't set unsound inbounds flag #130616
main

This stack of pull requests is managed by Graphite. Learn more about stacking.

llvmbot · 2025-03-21T07:46:42Z

@llvm/pr-subscribers-backend-amdgpu

Author: Fabian Ritter (ritter-x2a)

Changes

This is in preparation for a patch that will only fold offsets into flat
instructions if their addition is inbounds.

For SWDEV-516125.

Patch is 410.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131994.diff

20 Files Affected:

(modified) llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll (+42-42)
(modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll (+34-34)
(modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll (+28-28)
(modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll (+28-28)
(modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll (+29-29)
(modified) llvm/test/CodeGen/AMDGPU/flat_atomics.ll (+143-143)
(modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll (+100-100)
(modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll (+13-13)
(modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll (+123-123)
(modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system_noprivate.ll (+100-100)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll (+34-34)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll (+34-34)
(modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll (+60-60)
(modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll (+60-60)
(modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll (+60-60)
(modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll (+59-59)
(modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll (+56-56)
(modified) llvm/test/CodeGen/AMDGPU/offset-split-flat.ll (+54-54)
(modified) llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll (+1-1)

diff --git a/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll b/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll
index e74fd21365c9d..90ef9a7a45863 100644
--- a/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll
@@ -25,7 +25,7 @@ define amdgpu_kernel void @flat_atomic_cond_sub_no_rtn_u32(ptr %addr, i32 %in) {
 ; GFX12-GISEL-NEXT:    flat_atomic_cond_sub_u32 v0, v[0:1], v2 offset:-16 th:TH_ATOMIC_RETURN
 ; GFX12-GISEL-NEXT:    s_endpgm
 entry:
-  %gep = getelementptr i32, ptr %addr, i32 -4
+  %gep = getelementptr inbounds i32, ptr %addr, i32 -4
   %unused = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p0(ptr %gep, i32 %in)
   ret void
 }
@@ -49,7 +49,7 @@ define amdgpu_kernel void @flat_atomic_cond_sub_no_rtn_u32_forced(ptr %addr, i32
 ; GFX12-GISEL-NEXT:    flat_atomic_cond_sub_u32 v[0:1], v2 offset:-16
 ; GFX12-GISEL-NEXT:    s_endpgm
 entry:
-  %gep = getelementptr i32, ptr %addr, i32 -4
+  %gep = getelementptr inbounds i32, ptr %addr, i32 -4
   %unused = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p0(ptr %gep, i32 %in)
   ret void
 }
@@ -83,7 +83,7 @@ define amdgpu_kernel void @flat_atomic_cond_sub_rtn_u32(ptr %addr, i32 %in, ptr
 ; GFX12-GISEL-NEXT:    flat_store_b32 v[0:1], v2
 ; GFX12-GISEL-NEXT:    s_endpgm
 entry:
-  %gep = getelementptr i32, ptr %addr, i32 4
+  %gep = getelementptr inbounds i32, ptr %addr, i32 4
   %val = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p0(ptr %gep, i32 %in)
   store i32 %val, ptr %use
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
index 3305cac0d7ea6..9b57bc2f74df0 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
@@ -12,8 +12,8 @@
 define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
 ; OPT-GFX7-LABEL: @test_sinkable_flat_small_offset_i32(
 ; OPT-GFX7-NEXT:  entry:
-; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
-; OPT-GFX7-NEXT:    [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
+; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX7-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7
 ; OPT-GFX7-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX7-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX7:       if:
@@ -28,8 +28,8 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
 ;
 ; OPT-GFX8-LABEL: @test_sinkable_flat_small_offset_i32(
 ; OPT-GFX8-NEXT:  entry:
-; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
-; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
+; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7
 ; OPT-GFX8-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX8-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX8:       if:
@@ -44,11 +44,11 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
 ;
 ; OPT-GFX9-LABEL: @test_sinkable_flat_small_offset_i32(
 ; OPT-GFX9-NEXT:  entry:
-; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
 ; OPT-GFX9-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX9-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX9:       if:
-; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28
+; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 28
 ; OPT-GFX9-NEXT:    [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4
 ; OPT-GFX9-NEXT:    br label [[ENDIF]]
 ; OPT-GFX9:       endif:
@@ -58,11 +58,11 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
 ;
 ; OPT-GFX10-LABEL: @test_sinkable_flat_small_offset_i32(
 ; OPT-GFX10-NEXT:  entry:
-; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
 ; OPT-GFX10-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX10-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX10:       if:
-; OPT-GFX10-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28
+; OPT-GFX10-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 28
 ; OPT-GFX10-NEXT:    [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4
 ; OPT-GFX10-NEXT:    br label [[ENDIF]]
 ; OPT-GFX10:       endif:
@@ -146,8 +146,8 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %out.gep = getelementptr i32, ptr %out, i64 999999
-  %in.gep = getelementptr i32, ptr %in, i64 7
+  %out.gep = getelementptr inbounds i32, ptr %out, i64 999999
+  %in.gep = getelementptr inbounds i32, ptr %in, i64 7
   %cmp0 = icmp eq i32 %cond, 0
   br i1 %cmp0, label %endif, label %if
 
@@ -167,12 +167,12 @@ done:
 define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in, i32 %cond) {
 ; OPT-GFX7-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
 ; OPT-GFX7-NEXT:  entry:
-; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
 ; OPT-GFX7-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX7-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX7:       if:
 ; OPT-GFX7-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
-; OPT-GFX7-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
+; OPT-GFX7-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28
 ; OPT-GFX7-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
 ; OPT-GFX7-NEXT:    br label [[ENDIF]]
 ; OPT-GFX7:       endif:
@@ -182,8 +182,8 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
 ;
 ; OPT-GFX8-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
 ; OPT-GFX8-NEXT:  entry:
-; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
-; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
+; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7
 ; OPT-GFX8-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX8-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX8:       if:
@@ -197,12 +197,12 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
 ;
 ; OPT-GFX9-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
 ; OPT-GFX9-NEXT:  entry:
-; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
 ; OPT-GFX9-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX9-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX9:       if:
 ; OPT-GFX9-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
-; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
+; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28
 ; OPT-GFX9-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
 ; OPT-GFX9-NEXT:    br label [[ENDIF]]
 ; OPT-GFX9:       endif:
@@ -212,12 +212,12 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
 ;
 ; OPT-GFX10-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
 ; OPT-GFX10-NEXT:  entry:
-; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
 ; OPT-GFX10-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX10-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX10:       if:
 ; OPT-GFX10-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
-; OPT-GFX10-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
+; OPT-GFX10-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28
 ; OPT-GFX10-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
 ; OPT-GFX10-NEXT:    br label [[ENDIF]]
 ; OPT-GFX10:       endif:
@@ -303,8 +303,8 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %out.gep = getelementptr i32, ptr %out, i64 999999
-  %in.gep = getelementptr i32, ptr %in, i64 7
+  %out.gep = getelementptr inbounds i32, ptr %out, i64 999999
+  %in.gep = getelementptr inbounds i32, ptr %in, i64 7
   %cast = addrspacecast ptr %in.gep to ptr addrspace(1)
   %cmp0 = icmp eq i32 %cond, 0
   br i1 %cmp0, label %endif, label %if
@@ -325,12 +325,12 @@ done:
 define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in, i32 %cond) {
 ; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32(
 ; OPT-NEXT:  entry:
-; OPT-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
 ; OPT-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT:       if:
 ; OPT-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(4)
-; OPT-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i64 28
+; OPT-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP0]], i64 28
 ; OPT-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(4) [[SUNKADDR]], align 4
 ; OPT-NEXT:    br label [[ENDIF]]
 ; OPT:       endif:
@@ -416,8 +416,8 @@ define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %out.gep = getelementptr i32, ptr %out, i64 999999
-  %in.gep = getelementptr i32, ptr %in, i64 7
+  %out.gep = getelementptr inbounds i32, ptr %out, i64 999999
+  %in.gep = getelementptr inbounds i32, ptr %in, i64 7
   %cast = addrspacecast ptr %in.gep to ptr addrspace(4)
   %cmp0 = icmp eq i32 %cond, 0
   br i1 %cmp0, label %endif, label %if
@@ -438,8 +438,8 @@ done:
 define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
 ; OPT-GFX7-LABEL: @test_sink_flat_small_max_flat_offset(
 ; OPT-GFX7-NEXT:  entry:
-; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
-; OPT-GFX7-NEXT:    [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
+; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
+; OPT-GFX7-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095
 ; OPT-GFX7-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
 ; OPT-GFX7-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
 ; OPT-GFX7-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
@@ -456,8 +456,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
 ;
 ; OPT-GFX8-LABEL: @test_sink_flat_small_max_flat_offset(
 ; OPT-GFX8-NEXT:  entry:
-; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
-; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
+; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
+; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095
 ; OPT-GFX8-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
 ; OPT-GFX8-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
 ; OPT-GFX8-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
@@ -474,12 +474,12 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
 ;
 ; OPT-GFX9-LABEL: @test_sink_flat_small_max_flat_offset(
 ; OPT-GFX9-NEXT:  entry:
-; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
+; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
 ; OPT-GFX9-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
 ; OPT-GFX9-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
 ; OPT-GFX9-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX9:       if:
-; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
+; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095
 ; OPT-GFX9-NEXT:    [[LOAD:%.*]] = load i8, ptr [[SUNKADDR]], align 1
 ; OPT-GFX9-NEXT:    [[CAST:%.*]] = sext i8 [[LOAD]] to i32
 ; OPT-GFX9-NEXT:    br label [[ENDIF]]
@@ -490,8 +490,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
 ;
 ; OPT-GFX10-LABEL: @test_sink_flat_small_max_flat_offset(
 ; OPT-GFX10-NEXT:  entry:
-; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
-; OPT-GFX10-NEXT:    [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
+; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
+; OPT-GFX10-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095
 ; OPT-GFX10-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
 ; OPT-GFX10-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
 ; OPT-GFX10-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
@@ -588,8 +588,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %out.gep = getelementptr i32, ptr %out, i32 1024
-  %in.gep = getelementptr i8, ptr %in, i64 4095
+  %out.gep = getelementptr inbounds i32, ptr %out, i32 1024
+  %in.gep = getelementptr inbounds i8, ptr %in, i64 4095
   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
   %cmp0 = icmp eq i32 %tid, 0
   br i1 %cmp0, label %endif, label %if
@@ -611,8 +611,8 @@ done:
 define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 {
 ; OPT-LABEL: @test_sink_flat_small_max_plus_1_flat_offset(
 ; OPT-NEXT:  entry:
-; OPT-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 99999
-; OPT-NEXT:    [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4096
+; OPT-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 99999
+; OPT-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4096
 ; OPT-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
 ; OPT-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
 ; OPT-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
@@ -711,8 +711,8 @@ define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 {
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %out.gep = getelementptr i32, ptr %out, i64 99999
-  %in.gep = getelementptr i8, ptr %in, i64 4096
+  %out.gep = getelementptr inbounds i32, ptr %out, i64 99999
+  %in.gep = getelementptr inbounds i8, ptr %in, i64 4096
   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
   %cmp0 = icmp eq i32 %tid, 0
   br i1 %cmp0, label %endif, label %if
@@ -734,8 +734,8 @@ done:
 define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 {
 ; OPT-LABEL: @test_sinkable_flat_reg_offset(
 ; OPT-NEXT:  entry:
-; OPT-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
-; OPT-NEXT:    [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 [[REG:%.*]]
+; OPT-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
+; OPT-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 [[REG:%.*]]
 ; OPT-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3]]
 ; OPT-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
 ; OPT-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
@@ -834,8 +834,8 @@ define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 {
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %out.gep = getelementptr i32, ptr %out, i32 1024
-  %in.gep = getelementptr i8, ptr %in, i64 %reg
+  %out.gep = getelementptr inbounds i32, ptr %out, i32 1024
+  %in.gep = getelementptr inbounds i8, ptr %in, i64 %reg
   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
   %cmp0 = icmp eq i32 %tid, 0
   br i1 %cmp0, label %endif, label %if
diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll
index c713c48c92457..4a6b1843de3b6 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll
@@ -369,7 +369,7 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grai
 ; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr float, ptr %ptr, i64 511
+  %gep = getelementptr inbounds float, ptr %ptr, i64 511
   %result = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
   ret float %result
 }
@@ -563,7 +563,7 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grai
 ; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr float, ptr %ptr, i64 -512
+  %gep = getelementptr inbounds float, ptr %ptr, i64 -512
   %result = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
   ret float %result
 }
@@ -986,7 +986,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gra
 ; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr float, ptr %ptr, i64 511
+  %gep = getelementptr inbounds float, ptr %ptr, i64 511
   %unused = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
   ret void
 }
@@ -1208,7 +1208,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_gra
 ; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr float, ptr %ptr, i64 -512
+  %gep = getelementptr inbounds float, ptr %ptr, i64 -512
   %unused = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
   ret void
 }
@@ -1397,7 +1397,7 @@ define float @flat_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_gra
 ; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr float, ptr %ptr, i64 511
+  %gep = getelementptr inbounds float, ptr %ptr, i64 511
   %result = atomicrmw fadd ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
   ret float %result
 }
@@ -1617,7 +1617,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gr
 ; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr float, ptr %ptr, i64 511
+  %gep = getelementptr inbounds float, ptr %ptr, i64 511
   %unused = atomicrmw fadd ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ign...
[truncated]

This is in preparation for a patch that will only fold offsets into flat instructions if their addition is inbounds. For SWDEV-516125.

This was referenced Mar 19, 2025

[SeparateConstOffsetFromGEP] Don't set unsound inbounds flag #130616

Merged

[SeparateConstOffsetFromGEP] Preserve inbounds flag based on ValueTracking and NUW #130617

Merged

[SDAG] Introduce inbounds flag for pointer arithmetic #131862

Open

ritter-x2a mentioned this pull request Mar 19, 2025

[AMDGPU][SDAG] Only fold flat offsets if they are inbounds #131863

Closed

ritter-x2a force-pushed the users/ritter-x2a/03-17-_sdag_introduce_inbounds_flag_for_pointer_arithmetic branch from 4d8dda5 to efdf7eb Compare March 21, 2025 07:40

ritter-x2a force-pushed the users/ritter-x2a/03-19-_amdgpu_nfc_mark_geps_in_flat_offset_folding_tests_as_inbounds branch from a57b595 to dab4dde Compare March 21, 2025 07:40

ritter-x2a mentioned this pull request Mar 21, 2025

[AMDGPU][SDAG] Only fold flat offsets if they are inbounds #132353

Open

ritter-x2a added the backend:AMDGPU label Mar 21, 2025 — with Graphite App

ritter-x2a requested review from arsenm, mariusz-sikora-at-amd and shiltian March 21, 2025 07:46

ritter-x2a marked this pull request as ready for review March 21, 2025 07:47

ritter-x2a force-pushed the users/ritter-x2a/03-17-_sdag_introduce_inbounds_flag_for_pointer_arithmetic branch from efdf7eb to bd22087 Compare March 21, 2025 13:35

ritter-x2a force-pushed the users/ritter-x2a/03-19-_amdgpu_nfc_mark_geps_in_flat_offset_folding_tests_as_inbounds branch from dab4dde to 173036a Compare March 21, 2025 13:35

arsenm approved these changes Mar 22, 2025

View reviewed changes

ritter-x2a force-pushed the users/ritter-x2a/03-17-_sdag_introduce_inbounds_flag_for_pointer_arithmetic branch from bd22087 to 4248162 Compare March 27, 2025 09:02

ritter-x2a force-pushed the users/ritter-x2a/03-19-_amdgpu_nfc_mark_geps_in_flat_offset_folding_tests_as_inbounds branch 2 times, most recently from 5e4da87 to de34b4c Compare March 31, 2025 11:37

ritter-x2a force-pushed the users/ritter-x2a/03-17-_sdag_introduce_inbounds_flag_for_pointer_arithmetic branch from 4248162 to ee21919 Compare March 31, 2025 11:37

arsenm approved these changes Apr 1, 2025

View reviewed changes

ritter-x2a force-pushed the users/ritter-x2a/03-17-_sdag_introduce_inbounds_flag_for_pointer_arithmetic branch from ee21919 to 75e41ae Compare April 10, 2025 12:27

ritter-x2a force-pushed the users/ritter-x2a/03-19-_amdgpu_nfc_mark_geps_in_flat_offset_folding_tests_as_inbounds branch from de34b4c to 39089ea Compare April 10, 2025 12:27

arsenm approved these changes Apr 10, 2025

View reviewed changes

ritter-x2a force-pushed the users/ritter-x2a/03-19-_amdgpu_nfc_mark_geps_in_flat_offset_folding_tests_as_inbounds branch from 39089ea to 0614081 Compare April 10, 2025 14:08

ritter-x2a force-pushed the users/ritter-x2a/03-17-_sdag_introduce_inbounds_flag_for_pointer_arithmetic branch from 75e41ae to 4b88628 Compare April 10, 2025 14:08

ritter-x2a force-pushed the users/ritter-x2a/03-19-_amdgpu_nfc_mark_geps_in_flat_offset_folding_tests_as_inbounds branch from 0614081 to ab60c5a Compare April 14, 2025 09:39

ritter-x2a force-pushed the users/ritter-x2a/03-17-_sdag_introduce_inbounds_flag_for_pointer_arithmetic branch from 4b88628 to 139ed05 Compare April 14, 2025 09:39

ritter-x2a force-pushed the users/ritter-x2a/03-17-_sdag_introduce_inbounds_flag_for_pointer_arithmetic branch from 139ed05 to 04831c1 Compare April 23, 2025 10:40

[AMDGPU][NFC] Mark GEPs in flat offset folding tests as inbounds

cb87bed

This is in preparation for a patch that will only fold offsets into flat instructions if their addition is inbounds. For SWDEV-516125.

ritter-x2a force-pushed the users/ritter-x2a/03-19-_amdgpu_nfc_mark_geps_in_flat_offset_folding_tests_as_inbounds branch from ab60c5a to cb87bed Compare April 23, 2025 10:41

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU][NFC] Mark GEPs in flat offset folding tests as inbounds #131994

[AMDGPU][NFC] Mark GEPs in flat offset folding tests as inbounds #131994

ritter-x2a commented Mar 19, 2025

Uh oh!

ritter-x2a commented Mar 19, 2025 •

edited

Loading

Uh oh!

llvmbot commented Mar 21, 2025

Uh oh!

Uh oh!

[AMDGPU][NFC] Mark GEPs in flat offset folding tests as inbounds #131994

Are you sure you want to change the base?

[AMDGPU][NFC] Mark GEPs in flat offset folding tests as inbounds #131994

Conversation

ritter-x2a commented Mar 19, 2025

Uh oh!

ritter-x2a commented Mar 19, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Mar 21, 2025

Uh oh!

Uh oh!

ritter-x2a commented Mar 19, 2025 •

edited

Loading