-
Notifications
You must be signed in to change notification settings - Fork 13.9k
[AMDGPU][NFC] Replace gfx940 and gfx941 with gfx942 in llvm/test #125711
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-amdgpu Author: Fabian Ritter (ritter-x2a) Changes[AMDGPU][NFC] Replace gfx940 and gfx941 with gfx942 in llvm/test gfx940 and gfx941 are no longer supported. This is one of a series of PRs to remove them from the code base. This PR uses gfx942 instead of gfx940 and gfx941 in the test RUN-lines (unless there is already a RUN-line for gfx942). The only notable difference in the test output is that gfx942 does not force the use of sc0 and sc1 on stores while gfx940 and gfx941 do (cf. https://reviews.llvm.org/D149986). Patch is 31.19 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/125711.diff 276 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll
index 424388a30e99b41..d1a303b41deefe5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
@@ -24,12 +24,12 @@ define float @local_atomic_fmax_ret_f32(ptr addrspace(3) %ptr, float %val) {
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: local_atomic_fmax_ret_f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: ds_max_rtn_f32 v0, v0, v1
-; GFX940-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: local_atomic_fmax_ret_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ds_max_rtn_f32 v0, v0, v1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: local_atomic_fmax_ret_f32:
; GFX11: ; %bb.0:
@@ -96,12 +96,12 @@ define void @local_atomic_fmax_noret_f32(ptr addrspace(3) %ptr, float %val) {
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: local_atomic_fmax_noret_f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: ds_max_f32 v0, v1
-; GFX940-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: local_atomic_fmax_noret_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ds_max_f32 v0, v1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: local_atomic_fmax_noret_f32:
; GFX11: ; %bb.0:
@@ -168,14 +168,14 @@ define double @local_atomic_fmax_ret_f64(ptr addrspace(3) %ptr, double %val) {
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: local_atomic_fmax_ret_f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v4, v1
-; GFX940-NEXT: v_mov_b32_e32 v5, v2
-; GFX940-NEXT: ds_max_rtn_f64 v[0:1], v0, v[4:5]
-; GFX940-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: local_atomic_fmax_ret_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: ds_max_rtn_f64 v[0:1], v0, v[4:5]
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: local_atomic_fmax_ret_f64:
; GFX11: ; %bb.0:
@@ -244,14 +244,14 @@ define void @local_atomic_fmax_noret_f64(ptr addrspace(3) %ptr, double %val) {
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: local_atomic_fmax_noret_f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v4, v1
-; GFX940-NEXT: v_mov_b32_e32 v5, v2
-; GFX940-NEXT: ds_max_f64 v0, v[4:5]
-; GFX940-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: local_atomic_fmax_noret_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: ds_max_f64 v0, v[4:5]
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: local_atomic_fmax_noret_f64:
; GFX11: ; %bb.0:
@@ -320,30 +320,30 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(pt
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: global_load_dword v3, v[0:1], off
-; GFX940-NEXT: s_mov_b64 s[0:1], 0
-; GFX940-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT: .LBB4_1: ; %atomicrmw.start
-; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v5, v3
-; GFX940-NEXT: v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT: v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT: buffer_wbl2 sc1
-; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: buffer_inv sc1
-; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT: s_cbranch_execnz .LBB4_1
-; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT: v_mov_b32_e32 v0, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: global_load_dword v3, v[0:1], off
+; GFX942-NEXT: s_mov_b64 s[0:1], 0
+; GFX942-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT: .LBB4_1: ; %atomicrmw.start
+; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT: v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT: buffer_wbl2 sc1
+; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: buffer_inv sc1
+; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT: s_cbranch_execnz .LBB4_1
+; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
; GFX11: ; %bb.0:
@@ -466,29 +466,29 @@ define void @global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(p
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: global_load_dword v3, v[0:1], off
-; GFX940-NEXT: s_mov_b64 s[0:1], 0
-; GFX940-NEXT: v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT: .LBB5_1: ; %atomicrmw.start
-; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT: v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT: buffer_wbl2 sc1
-; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: buffer_inv sc1
-; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT: v_mov_b32_e32 v3, v2
-; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT: s_cbranch_execnz .LBB5_1
-; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: global_load_dword v3, v[0:1], off
+; GFX942-NEXT: s_mov_b64 s[0:1], 0
+; GFX942-NEXT: v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT: .LBB5_1: ; %atomicrmw.start
+; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT: buffer_wbl2 sc1
+; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: buffer_inv sc1
+; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT: s_cbranch_execnz .LBB5_1
+; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
; GFX11: ; %bb.0:
@@ -626,14 +626,14 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(p
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: buffer_wbl2 sc1
-; GFX940-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: buffer_inv sc1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: buffer_wbl2 sc1
+; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: buffer_inv sc1
+; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
; GFX11: ; %bb.0:
@@ -781,14 +781,14 @@ define void @global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(p
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: buffer_wbl2 sc1
-; GFX940-NEXT: global_atomic_max_f64 v[0:1], v[2:3], off
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: buffer_inv sc1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: buffer_wbl2 sc1
+; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[2:3], off
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: buffer_inv sc1
+; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
; GFX11: ; %bb.0:
@@ -911,30 +911,30 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: flat_load_dword v3, v[0:1]
-; GFX940-NEXT: s_mov_b64 s[0:1], 0
-; GFX940-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT: .LBB8_1: ; %atomicrmw.start
-; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v5, v3
-; GFX940-NEXT: v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT: v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT: buffer_wbl2 sc1
-; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT: buffer_inv sc1
-; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT: s_cbranch_execnz .LBB8_1
-; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT: v_mov_b32_e32 v0, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: flat_load_dword v3, v[0:1]
+; GFX942-NEXT: s_mov_b64 s[0:1], 0
+; GFX942-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT: .LBB8_1: ; %atomicrmw.start
+; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT: v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT: buffer_wbl2 sc1
+; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT: buffer_inv sc1
+; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT: s_cbranch_execnz .LBB8_1
+; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
; GFX11: ; %bb.0:
@@ -1053,29 +1053,29 @@ define void @flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: flat_load_dword v3, v[0:1]
-; GFX940-NEXT: s_mov_b64 s[0:1], 0
-; GFX940-NEXT: v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT: .LBB9_1: ; %atomicrmw.start
-; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT: v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT: buffer_wbl2 sc1
-; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT: buffer_inv sc1
-; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT: v_mov_b32_e32 v3, v2
-; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT: s_cbranch_execnz .LBB9_1
-; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: flat_load_dword v3, v[0:1]
+; GFX942-NEXT: s_mov_b64 s[0:1], 0
+; GFX942-NEXT: v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT: .LBB9_1: ; %atomicrmw.start
+; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT: buffer_wbl2 sc1
+; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT: buffer_inv sc1
+; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT: s_cbranch_execnz .LBB9_1
+; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
; GFX11: ; %bb.0:
@@ -1212,14 +1212,14 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: buffer_wbl2 sc1
-; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] sc0
-; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT: buffer_inv sc1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: buffer_wbl2 sc1
+; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] sc0
+; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT: buffer_inv sc1
+; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
; GFX11: ; %bb.0:
@@ -1365,14 +1365,14 @@ define void @flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: buffer_wbl2 sc1
-; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
-; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT: buffer_inv sc1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: buffer_wbl2 sc1
+; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
+; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT: buffer_inv sc1
+; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
; GFX11: ; %bb.0:
@@ -1497,32 +1497,32 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v2, s16
-; GFX940-NEXT: v_mov_b32_e32 v1, v0
-; GFX940-NEXT: buffer_load_dword v0, v2, s[0:3], 0 offen
-; GFX940-NEXT: s_mov_b64 s[4:5], 0
-; GFX940-NEXT: v_max_f32_e32 v3, v1, v1
-; GFX940-NEXT: .LBB12_1: ; %atomicrmw.start
-; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v5, v0
-; GFX940-NEXT: v_max_f32_e32 v0, v5, v5
-; GFX940-NEXT: v_max_f32_e32 v4, v0, v3
-; GFX940-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
-; GFX940-NEXT: buffer_wbl2 sc1
-; GFX940-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: buffer_inv sc1
-; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5
-; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT: s_cbranch_execnz .LBB12_1
-; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT: s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v2, s16
+; GFX942-NEXT: v_mov_b32_e32 v1, v0
+; GFX942-NEXT: buffer_load_dword v0, v2, s[0:3], 0 offen
+; GFX942-NEXT: s_mov_b64 s[4:5], 0
+; GFX942-NEXT: v_max_f32_e32 v3, v1, v1
+; GFX942-NEXT: .LBB12_1: ; %...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't check every single file because it took GitHub forever to load them. It looks fine after looking into 10+ files so I'll stamp green on it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've got no stake here, but it seems logical to me for dumping tools to still be able to identify obsolete flavours of AMDGPU, so that if somebody needs to inspect one of these objects, they'd be able to see that it is the obsolete version.
Specifically, I'm thinking the changes in the Object/AMDGPU/elf-header-flags-mach.yaml and tools/llvm-readobj/ELF/AMDGPU/elf-headers.test tests should be dropped. I'd say the same for the llvm-objdump test, but I expect that's less viable, since it requires llc support to generate the input (could it be switched to yaml2obj instead?).
Makes sense to me, I'll bring it up for an internal discussion. Thanks for pointing that out, @jh7370! |
Per further discussion, we found it best to also drop these targets from the elf tooling. |
ba30315
to
d50aa30
Compare
gfx940 and gfx941 are no longer supported. This is one of a series of PRs to remove them from the code base. This PR uses gfx942 instead of gfx940 and gfx941 in the test RUN-lines (unless there is already a RUN-line for gfx942).
Mainly remove sc0 sc1 flags from memory writes since gfx942 does not force them, in contrast to gfx940 and gfx941.
d50aa30
to
9288ce5
Compare
…m#125711) [AMDGPU][NFC] Replace gfx940 and gfx941 with gfx942 in llvm/test gfx940 and gfx941 are no longer supported. This is one of a series of PRs to remove them from the code base. This PR uses gfx942 instead of gfx940 and gfx941 in the test RUN-lines (unless there is already a RUN-line for gfx942). The only notable difference in the test output is that gfx942 does not force the use of sc0 and sc1 on stores while gfx940 and gfx941 do (cf. https://reviews.llvm.org/D149986). For SWDEV-512631
…m#125711) [AMDGPU][NFC] Replace gfx940 and gfx941 with gfx942 in llvm/test gfx940 and gfx941 are no longer supported. This is one of a series of PRs to remove them from the code base. This PR uses gfx942 instead of gfx940 and gfx941 in the test RUN-lines (unless there is already a RUN-line for gfx942). The only notable difference in the test output is that gfx942 does not force the use of sc0 and sc1 on stores while gfx940 and gfx941 do (cf. https://reviews.llvm.org/D149986). For SWDEV-512631
[AMDGPU][NFC] Replace gfx940 and gfx941 with gfx942 in llvm/test
gfx940 and gfx941 are no longer supported. This is one of a series of PRs to remove them from the code base.
This PR uses gfx942 instead of gfx940 and gfx941 in the test RUN-lines (unless there is already a RUN-line for gfx942).
The only notable difference in the test output is that gfx942 does not force the use of sc0 and sc1 on stores while gfx940 and gfx941 do (cf. https://reviews.llvm.org/D149986).
For SWDEV-512631