Skip to content

Commit 4485b78

Browse files
committed
fixup! [AMDGPU] ISel for @llvm.amdgcn.cs.chain intrinsic
1 parent 95a0666 commit 4485b78

File tree

5 files changed

+50
-50
lines changed

5 files changed

+50
-50
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
2-
; RUN: llc --global-isel=1 -march=amdgcn -mcpu=gfx1100 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=GFX11
3-
; RUN: llc --global-isel=1 -march=amdgcn -mcpu=gfx1030 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=GFX10
2+
; RUN: llc --global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=GFX11
3+
; RUN: llc --global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=GFX10
44

55
declare amdgpu_cs_chain void @callee(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 })
66
declare amdgpu_cs_chain_preserve void @callee_preserve(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 })

llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2-
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s
3-
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s
4-
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s
5-
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s
2+
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s
3+
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s
4+
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s
5+
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s
66

77
declare amdgpu_gfx void @use(...)
88

9-
; FIXME: The values of the counters are undefined on entry to amdgpu_cs_chain functions, so these waits are unnecessary.
10-
119
define amdgpu_cs_chain void @amdgpu_cs_chain_no_stack({ptr, i32, <4 x i32>} inreg %a, {ptr, i32, <4 x i32>} %b) {
1210
; GISEL-GFX11-LABEL: amdgpu_cs_chain_no_stack:
1311
; GISEL-GFX11: ; %bb.0:
@@ -398,11 +396,14 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %a, <3 x i32> %b) {
398396
;
399397
; GISEL-GFX10-LABEL: cs_to_chain:
400398
; GISEL-GFX10: ; %bb.0:
401-
; GISEL-GFX10-NEXT: s_mov_b32 s100, SCRATCH_RSRC_DWORD0
402-
; GISEL-GFX10-NEXT: s_mov_b32 s101, SCRATCH_RSRC_DWORD1
403-
; GISEL-GFX10-NEXT: s_mov_b32 s102, -1
404-
; GISEL-GFX10-NEXT: s_mov_b32 s103, 0x31c16000
399+
; GISEL-GFX10-NEXT: s_getpc_b64 s[100:101]
400+
; GISEL-GFX10-NEXT: s_mov_b32 s100, s0
405401
; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0
402+
; GISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10
403+
; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1
404+
; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2
405+
; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
406+
; GISEL-GFX10-NEXT: s_bitset0_b32 s103, 21
406407
; GISEL-GFX10-NEXT: s_add_u32 s100, s100, s3
407408
; GISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0
408409
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
@@ -412,8 +413,6 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %a, <3 x i32> %b) {
412413
; GISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101]
413414
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
414415
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3
415-
; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1
416-
; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2
417416
; GISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103]
418417
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
419418
; GISEL-GFX10-NEXT: s_getpc_b64 s[4:5]
@@ -442,27 +441,28 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %a, <3 x i32> %b) {
442441
;
443442
; DAGISEL-GFX10-LABEL: cs_to_chain:
444443
; DAGISEL-GFX10: ; %bb.0:
445-
; DAGISEL-GFX10-NEXT: s_mov_b32 s100, SCRATCH_RSRC_DWORD0
446-
; DAGISEL-GFX10-NEXT: s_mov_b32 s101, SCRATCH_RSRC_DWORD1
447-
; DAGISEL-GFX10-NEXT: s_mov_b32 s102, -1
448-
; DAGISEL-GFX10-NEXT: s_mov_b32 s103, 0x31c16000
444+
; DAGISEL-GFX10-NEXT: s_getpc_b64 s[100:101]
445+
; DAGISEL-GFX10-NEXT: s_mov_b32 s100, s0
446+
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0
447+
; DAGISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10
448+
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1
449+
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2
450+
; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
451+
; DAGISEL-GFX10-NEXT: s_bitset0_b32 s103, 21
449452
; DAGISEL-GFX10-NEXT: s_add_u32 s100, s100, s3
450453
; DAGISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0
451454
; DAGISEL-GFX10-NEXT: s_getpc_b64 s[4:5]
452455
; DAGISEL-GFX10-NEXT: s_add_u32 s4, s4, chain_callee@gotpcrel32@lo+4
453456
; DAGISEL-GFX10-NEXT: s_addc_u32 s5, s5, chain_callee@gotpcrel32@hi+12
454-
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0
455-
; DAGISEL-GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
456457
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
458+
; DAGISEL-GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
457459
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
458460
; DAGISEL-GFX10-NEXT: s_nop
459461
; DAGISEL-GFX10-NEXT: ;;#ASMEND
460462
; DAGISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101]
461463
; DAGISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103]
462464
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
463465
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3
464-
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1
465-
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2
466466
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
467467
; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
468468
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]

llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2-
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s
3-
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s
4-
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s
5-
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s
6-
7-
; FIXME: The values of the counters are undefined on entry to amdgpu_cs_chain_preserve functions, so these waits are unnecessary.
2+
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s
3+
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s
4+
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s
5+
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s
86

97
define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_no_stack({ptr, i32, <4 x i32>} inreg %a, {ptr, i32, <4 x i32>} %b) {
108
; GISEL-GFX11-LABEL: amdgpu_cs_chain_preserve_no_stack:
@@ -50,11 +48,14 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) {
5048
;
5149
; GISEL-GFX10-LABEL: cs_to_chain_preserve:
5250
; GISEL-GFX10: ; %bb.0:
53-
; GISEL-GFX10-NEXT: s_mov_b32 s100, SCRATCH_RSRC_DWORD0
54-
; GISEL-GFX10-NEXT: s_mov_b32 s101, SCRATCH_RSRC_DWORD1
55-
; GISEL-GFX10-NEXT: s_mov_b32 s102, -1
56-
; GISEL-GFX10-NEXT: s_mov_b32 s103, 0x31c16000
51+
; GISEL-GFX10-NEXT: s_getpc_b64 s[100:101]
52+
; GISEL-GFX10-NEXT: s_mov_b32 s100, s0
5753
; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0
54+
; GISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10
55+
; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1
56+
; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2
57+
; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
58+
; GISEL-GFX10-NEXT: s_bitset0_b32 s103, 21
5859
; GISEL-GFX10-NEXT: s_add_u32 s100, s100, s3
5960
; GISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0
6061
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
@@ -64,8 +65,6 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) {
6465
; GISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101]
6566
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
6667
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3
67-
; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1
68-
; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2
6968
; GISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103]
7069
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
7170
; GISEL-GFX10-NEXT: s_getpc_b64 s[4:5]
@@ -94,27 +93,28 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) {
9493
;
9594
; DAGISEL-GFX10-LABEL: cs_to_chain_preserve:
9695
; DAGISEL-GFX10: ; %bb.0:
97-
; DAGISEL-GFX10-NEXT: s_mov_b32 s100, SCRATCH_RSRC_DWORD0
98-
; DAGISEL-GFX10-NEXT: s_mov_b32 s101, SCRATCH_RSRC_DWORD1
99-
; DAGISEL-GFX10-NEXT: s_mov_b32 s102, -1
100-
; DAGISEL-GFX10-NEXT: s_mov_b32 s103, 0x31c16000
96+
; DAGISEL-GFX10-NEXT: s_getpc_b64 s[100:101]
97+
; DAGISEL-GFX10-NEXT: s_mov_b32 s100, s0
98+
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0
99+
; DAGISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10
100+
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1
101+
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2
102+
; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
103+
; DAGISEL-GFX10-NEXT: s_bitset0_b32 s103, 21
101104
; DAGISEL-GFX10-NEXT: s_add_u32 s100, s100, s3
102105
; DAGISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0
103106
; DAGISEL-GFX10-NEXT: s_getpc_b64 s[4:5]
104107
; DAGISEL-GFX10-NEXT: s_add_u32 s4, s4, chain_preserve_callee@gotpcrel32@lo+4
105108
; DAGISEL-GFX10-NEXT: s_addc_u32 s5, s5, chain_preserve_callee@gotpcrel32@hi+12
106-
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0
107-
; DAGISEL-GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
108109
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
110+
; DAGISEL-GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
109111
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
110112
; DAGISEL-GFX10-NEXT: s_nop
111113
; DAGISEL-GFX10-NEXT: ;;#ASMEND
112114
; DAGISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101]
113115
; DAGISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103]
114116
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
115117
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3
116-
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1
117-
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2
118118
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
119119
; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
120120
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]

llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
2-
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s
3-
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize32,-wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s
4-
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s
5-
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize32,-wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s
2+
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s
3+
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -mattr=+wavefrontsize32,-wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s
4+
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s
5+
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -mattr=+wavefrontsize32,-wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s
66

77
declare amdgpu_cs_chain void @callee(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 })
88
declare amdgpu_cs_chain_preserve void @callee_preserve(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 })

llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
2-
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s
3-
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1030 -mattr=-wavefrontsize32,+wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s
4-
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s
5-
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1030 -mattr=-wavefrontsize32,+wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s
2+
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s
3+
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -mattr=-wavefrontsize32,+wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s
4+
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s
5+
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -mattr=-wavefrontsize32,+wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s
66

77
declare amdgpu_cs_chain void @callee(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 })
88
declare amdgpu_cs_chain_preserve void @callee_preserve(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 })

0 commit comments

Comments
 (0)