-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[NFC][AMDGPU] Auto generate check lines for three test cases #127352
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
shiltian
merged 1 commit into
main
from
users/shiltian/autogen-tests-for-for-striped-sgrp-cc
Feb 17, 2025
Merged
[NFC][AMDGPU] Auto generate check lines for three test cases #127352
shiltian
merged 1 commit into
main
from
users/shiltian/autogen-tests-for-for-striped-sgrp-cc
Feb 17, 2025
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
- `CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll` - `CodeGen/AMDGPU/call-preserved-registers.ll` This is to make preparation for another PR.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu Author: Shilei Tian (shiltian) Changes
This is to make preparation for another PR. Patch is 92.52 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127352.diff 3 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
index ff80e05197b0d..db9ce56ecc3cc 100644
--- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MUBUF %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MUBUF %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MUBUF %s
@@ -5,110 +6,258 @@
declare hidden void @external_void_func_void() #3
-; GCN-LABEL: {{^}}test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
-; GCN: s_getpc_b64 s[34:35]
-; GCN-NEXT: s_add_u32 s34, s34,
-; GCN-NEXT: s_addc_u32 s35, s35,
-; GCN: s_swappc_b64 s[30:31], s[34:35]
-
-; GCN-NEXT: #ASMSTART
-; GCN-NEXT: #ASMEND
-; GCN-NEXT: s_swappc_b64 s[30:31], s[34:35]
define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
+; FLATSCR-LABEL: test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
+; FLATSCR: ; %bb.0:
+; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
+; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; FLATSCR-NEXT: s_getpc_b64 s[34:35]
+; FLATSCR-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
+; FLATSCR-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
+; FLATSCR-NEXT: s_mov_b32 s32, 0
+; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; FLATSCR-NEXT: ;;#ASMSTART
+; FLATSCR-NEXT: ;;#ASMEND
+; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; FLATSCR-NEXT: s_endpgm
call void @external_void_func_void()
call void asm sideeffect "", ""() #0
call void @external_void_func_void()
ret void
}
-; GCN-LABEL: {{^}}test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
-; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33
-; MUBUF: buffer_store_dword
-; FLATSCR: scratch_store_dword
-; GCN: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 4
-; GCN: v_writelane_b32 v40, s30, 0
-; GCN: v_writelane_b32 v40, s31, 1
-; GCN: v_writelane_b32 v40, s34, 2
-; GCN: v_writelane_b32 v40, s35, 3
-
-; GCN: s_swappc_b64
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_swappc_b64
-; GCN: v_readlane_b32 s35, v40, 3
-; GCN: v_readlane_b32 s34, v40, 2
-; MUBUF-DAG: v_readlane_b32 s31, v40, 1
-; MUBUF-DAG: v_readlane_b32 s30, v40, 0
-; FLATSCR-DAG: v_readlane_b32 s31, v40, 1
-; FLATSCR-DAG: v_readlane_b32 s30, v40, 0
-
-; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 4
-; MUBUF: buffer_load_dword
-; FLATSCR: scratch_load_dword
-; GCN: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
-; GCN: s_setpc_b64 s[30:31]
define void @test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
+; MUBUF-LABEL: test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
+; MUBUF: ; %bb.0:
+; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT: s_mov_b32 s4, s33
+; MUBUF-NEXT: s_mov_b32 s33, s32
+; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT: v_writelane_b32 v40, s4, 4
+; MUBUF-NEXT: v_writelane_b32 v40, s30, 0
+; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
+; MUBUF-NEXT: s_addk_i32 s32, 0x400
+; MUBUF-NEXT: v_writelane_b32 v40, s34, 2
+; MUBUF-NEXT: v_writelane_b32 v40, s35, 3
+; MUBUF-NEXT: s_getpc_b64 s[34:35]
+; MUBUF-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
+; MUBUF-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
+; MUBUF-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; MUBUF-NEXT: ;;#ASMSTART
+; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; MUBUF-NEXT: v_readlane_b32 s35, v40, 3
+; MUBUF-NEXT: v_readlane_b32 s34, v40, 2
+; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
+; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
+; MUBUF-NEXT: s_mov_b32 s32, s33
+; MUBUF-NEXT: v_readlane_b32 s4, v40, 4
+; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT: s_mov_b32 s33, s4
+; MUBUF-NEXT: s_waitcnt vmcnt(0)
+; MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
+; FLATSCR: ; %bb.0:
+; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT: s_mov_b32 s0, s33
+; FLATSCR-NEXT: s_mov_b32 s33, s32
+; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
+; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
+; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
+; FLATSCR-NEXT: v_writelane_b32 v40, s0, 4
+; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
+; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
+; FLATSCR-NEXT: s_add_i32 s32, s32, 16
+; FLATSCR-NEXT: v_writelane_b32 v40, s34, 2
+; FLATSCR-NEXT: v_writelane_b32 v40, s35, 3
+; FLATSCR-NEXT: s_getpc_b64 s[34:35]
+; FLATSCR-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
+; FLATSCR-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
+; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; FLATSCR-NEXT: ;;#ASMSTART
+; FLATSCR-NEXT: ;;#ASMEND
+; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; FLATSCR-NEXT: v_readlane_b32 s35, v40, 3
+; FLATSCR-NEXT: v_readlane_b32 s34, v40, 2
+; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
+; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
+; FLATSCR-NEXT: s_mov_b32 s32, s33
+; FLATSCR-NEXT: v_readlane_b32 s0, v40, 4
+; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
+; FLATSCR-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
+; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
+; FLATSCR-NEXT: s_mov_b32 s33, s0
+; FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; FLATSCR-NEXT: s_setpc_b64 s[30:31]
call void @external_void_func_void()
call void asm sideeffect "", ""() #0
call void @external_void_func_void()
ret void
}
-; GCN-LABEL: {{^}}test_func_call_external_void_funcx2:
-; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33
-; GCN: s_mov_b32 s33, s32
-; MUBUF: buffer_store_dword v40
-; FLATSCR: scratch_store_dword off, v40
-; GCN: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 4
-; MUBUF: s_addk_i32 s32, 0x400
-; FLATSCR: s_add_i32 s32, s32, 16
-
-; GCN: s_swappc_b64
-; GCN-NEXT: s_swappc_b64
-
-; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 4
-; MUBUF: buffer_load_dword v40
-; FLATSCR: scratch_load_dword v40
-; GCN: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
define void @test_func_call_external_void_funcx2() #0 {
+; MUBUF-LABEL: test_func_call_external_void_funcx2:
+; MUBUF: ; %bb.0:
+; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT: s_mov_b32 s4, s33
+; MUBUF-NEXT: s_mov_b32 s33, s32
+; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT: v_writelane_b32 v40, s4, 4
+; MUBUF-NEXT: v_writelane_b32 v40, s30, 0
+; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
+; MUBUF-NEXT: s_addk_i32 s32, 0x400
+; MUBUF-NEXT: v_writelane_b32 v40, s34, 2
+; MUBUF-NEXT: v_writelane_b32 v40, s35, 3
+; MUBUF-NEXT: s_getpc_b64 s[34:35]
+; MUBUF-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
+; MUBUF-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
+; MUBUF-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; MUBUF-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; MUBUF-NEXT: v_readlane_b32 s35, v40, 3
+; MUBUF-NEXT: v_readlane_b32 s34, v40, 2
+; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
+; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
+; MUBUF-NEXT: s_mov_b32 s32, s33
+; MUBUF-NEXT: v_readlane_b32 s4, v40, 4
+; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT: s_mov_b32 s33, s4
+; MUBUF-NEXT: s_waitcnt vmcnt(0)
+; MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: test_func_call_external_void_funcx2:
+; FLATSCR: ; %bb.0:
+; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT: s_mov_b32 s0, s33
+; FLATSCR-NEXT: s_mov_b32 s33, s32
+; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
+; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
+; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
+; FLATSCR-NEXT: v_writelane_b32 v40, s0, 4
+; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
+; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
+; FLATSCR-NEXT: s_add_i32 s32, s32, 16
+; FLATSCR-NEXT: v_writelane_b32 v40, s34, 2
+; FLATSCR-NEXT: v_writelane_b32 v40, s35, 3
+; FLATSCR-NEXT: s_getpc_b64 s[34:35]
+; FLATSCR-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
+; FLATSCR-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
+; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; FLATSCR-NEXT: v_readlane_b32 s35, v40, 3
+; FLATSCR-NEXT: v_readlane_b32 s34, v40, 2
+; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
+; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
+; FLATSCR-NEXT: s_mov_b32 s32, s33
+; FLATSCR-NEXT: v_readlane_b32 s0, v40, 4
+; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
+; FLATSCR-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
+; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
+; FLATSCR-NEXT: s_mov_b32 s33, s0
+; FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; FLATSCR-NEXT: s_setpc_b64 s[30:31]
call void @external_void_func_void()
call void @external_void_func_void()
ret void
}
-; GCN-LABEL: {{^}}void_func_void_clobber_s30_s31:
-; GCN: s_waitcnt
-; GCN: v_writelane_b32 v0, s30, 0
-; GCN: v_writelane_b32 v0, s31, 1
-; GCN-NEXT: #ASMSTART
-; GCN: ; clobber
-; GCN-NEXT: #ASMEND
-; GCN: v_readlane_b32 s31, v0, 1
-; GCN: v_readlane_b32 s30, v0, 0
-; GCN: s_setpc_b64 s[30:31]
define void @void_func_void_clobber_s30_s31() #2 {
+; MUBUF-LABEL: void_func_void_clobber_s30_s31:
+; MUBUF: ; %bb.0:
+; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
+; MUBUF-NEXT: v_writelane_b32 v0, s30, 0
+; MUBUF-NEXT: v_writelane_b32 v0, s31, 1
+; MUBUF-NEXT: ;;#ASMSTART
+; MUBUF-NEXT: ; clobber
+; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: v_readlane_b32 s31, v0, 1
+; MUBUF-NEXT: v_readlane_b32 s30, v0, 0
+; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
+; MUBUF-NEXT: s_waitcnt vmcnt(0)
+; MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: void_func_void_clobber_s30_s31:
+; FLATSCR: ; %bb.0:
+; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; FLATSCR-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill
+; FLATSCR-NEXT: s_mov_b64 exec, s[0:1]
+; FLATSCR-NEXT: v_writelane_b32 v0, s30, 0
+; FLATSCR-NEXT: v_writelane_b32 v0, s31, 1
+; FLATSCR-NEXT: ;;#ASMSTART
+; FLATSCR-NEXT: ; clobber
+; FLATSCR-NEXT: ;;#ASMEND
+; FLATSCR-NEXT: v_readlane_b32 s31, v0, 1
+; FLATSCR-NEXT: v_readlane_b32 s30, v0, 0
+; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; FLATSCR-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload
+; FLATSCR-NEXT: s_mov_b64 exec, s[0:1]
+; FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; FLATSCR-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber", "~{s[30:31]}"() #0
ret void
}
-; GCN-LABEL: {{^}}void_func_void_clobber_vcc:
-; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_setpc_b64 s[30:31]
define hidden void @void_func_void_clobber_vcc() #2 {
+; GCN-LABEL: void_func_void_clobber_vcc:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "", "~{vcc}"() #0
ret void
}
-; GCN-LABEL: {{^}}test_call_void_func_void_clobber_vcc:
-; GCN: s_getpc_b64
-; GCN-NEXT: s_add_u32
-; GCN-NEXT: s_addc_u32
-; GCN: s_mov_b64 s[34:35], vcc
-; GCN-NEXT: s_swappc_b64
-; GCN: s_mov_b64 vcc, s[34:35]
define amdgpu_kernel void @test_call_void_func_void_clobber_vcc(ptr addrspace(1) %out) #0 {
+; FLATSCR-LABEL: test_call_void_func_void_clobber_vcc:
+; FLATSCR: ; %bb.0:
+; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
+; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; FLATSCR-NEXT: s_add_u32 s8, s4, 8
+; FLATSCR-NEXT: s_addc_u32 s9, s5, 0
+; FLATSCR-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; FLATSCR-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; FLATSCR-NEXT: s_mov_b32 s14, s12
+; FLATSCR-NEXT: s_mov_b32 s13, s11
+; FLATSCR-NEXT: s_mov_b32 s12, s10
+; FLATSCR-NEXT: s_mov_b64 s[10:11], s[6:7]
+; FLATSCR-NEXT: s_getpc_b64 s[16:17]
+; FLATSCR-NEXT: s_add_u32 s16, s16, void_func_void_clobber_vcc@rel32@lo+4
+; FLATSCR-NEXT: s_addc_u32 s17, s17, void_func_void_clobber_vcc@rel32@hi+12
+; FLATSCR-NEXT: v_or3_b32 v31, v0, v1, v2
+; FLATSCR-NEXT: s_mov_b64 s[4:5], s[0:1]
+; FLATSCR-NEXT: s_mov_b64 s[6:7], s[2:3]
+; FLATSCR-NEXT: s_mov_b32 s32, 0
+; FLATSCR-NEXT: ;;#ASMSTART
+; FLATSCR-NEXT: ; def vcc
+; FLATSCR-NEXT: ;;#ASMEND
+; FLATSCR-NEXT: s_mov_b64 s[34:35], vcc
+; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; FLATSCR-NEXT: global_load_dword v0, v[0:1], off glc
+; FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; FLATSCR-NEXT: s_mov_b64 vcc, s[34:35]
+; FLATSCR-NEXT: global_load_dword v0, v[0:1], off glc
+; FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; FLATSCR-NEXT: ; kill: killed $vgpr0_vgpr1
+; FLATSCR-NEXT: ; kill: killed $vgpr0_vgpr1
+; FLATSCR-NEXT: ;;#ASMSTART
+; FLATSCR-NEXT: ; use vcc
+; FLATSCR-NEXT: ;;#ASMEND
+; FLATSCR-NEXT: s_endpgm
%vcc = call i64 asm sideeffect "; def $0", "={vcc}"()
call void @void_func_void_clobber_vcc()
%val0 = load volatile i32, ptr addrspace(1) undef
@@ -117,22 +266,50 @@ define amdgpu_kernel void @test_call_void_func_void_clobber_vcc(ptr addrspace(1)
ret void
}
-; GCN-LABEL: {{^}}test_call_void_func_void_mayclobber_s31:
-; GCN: s_mov_b32 s33, s31
-; GCN: s_swappc_b64
-; GCN-NEXT: s_mov_b32 s31, s33
define amdgpu_kernel void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) %out) #0 {
+; FLATSCR-LABEL: test_call_void_func_void_mayclobber_s31:
+; FLATSCR: ; %bb.0:
+; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
+; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; FLATSCR-NEXT: s_getpc_b64 s[0:1]
+; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
+; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
+; FLATSCR-NEXT: s_mov_b32 s32, 0
+; FLATSCR-NEXT: ;;#ASMSTART
+; FLATSCR-NEXT: ; def s31
+; FLATSCR-NEXT: ;;#ASMEND
+; FLATSCR-NEXT: s_mov_b32 s33, s31
+; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; FLATSCR-NEXT: s_mov_b32 s31, s33
+; FLATSCR-NEXT: ;;#ASMSTART
+; FLATSCR-NEXT: ; use s31
+; FLATSCR-NEXT: ;;#ASMEND
+; FLATSCR-NEXT: s_endpgm
%s31 = call i32 asm sideeffect "; def $0", "={s31}"()
call void @external_void_func_void()
call void asm sideeffect "; use $0", "{s31}"(i32 %s31)
ret void
}
-; GCN-LABEL: {{^}}test_call_void_func_void_mayclobber_v31:
-; GCN: v_mov_b32_e32 v40, v31
-; GCN: s_swappc_b64
-; GCN-NEXT: v_mov_b32_e32 v31, v40
define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) %out) #0 {
+; FLATSCR-LABEL: test_call_void_func_void_mayclobber_v31:
+; FLATSCR: ; %bb.0:
+; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
+; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; FLATSCR-NEXT: s_getpc_b64 s[0:1]
+; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
+; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
+; FLATSCR-NEXT: s_mov_b32 s32, 0
+; FLATSCR-NEXT: ;;#ASMSTART
+; FLATSCR-NEXT: ; def v31
+; FLATSCR-NEXT: ;;#ASMEND
+; FLATSCR-NEXT: v_mov_b32_e32 v40, v31
+; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; FLATSCR-NEXT: v_mov_b32_e32 v31, v40
+; FLATSCR-NEXT: ;;#ASMSTART
+; FLATSCR-NEXT: ; use v31
+; FLATSCR-NEXT: ;;#ASMEND
+; FLATSCR-NEXT: s_endpgm
%v31 = call i32 asm sideeffect "; def $0", "={v31}"()
call void @external_void_func_void()
call void asm sideeffect "; use $0", "{v31}"(i32 %v31)
@@ -140,175 +317,294 @@ define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(ptr addrspace
}
; FIXME: What is the expected behavior for reserved registers here?
-
-; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33:
-; FLATSCR: s_getpc_b64 s[0:1]
-; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
-; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
-; MUBUF: s_getpc_b64 s[4:5]
-; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
-; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
-
-; GCN: #ASMSTART
-; GCN-NEXT: ; def s33
-; GCN-NEXT: #ASMEND
-
-; GCN-NOT: s33
-
-; FLATSCR: s_swappc_b64 s[30:31], s[0:1]
-; MUBUF: s_swappc_b64 s[30:31], s[4:5]
-
-; GCN-NOT: s33
-
-; GCN: ;;#ASMSTART
-; GCN-NEXT: ; use s33
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_void_func_void_preserves_s33(ptr addrspace(1) %out) #0 {
+; FLATSCR-LABEL: test_call_void_func_void_preserves_s33:
+; FLATSCR: ; %bb.0:
+; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
+; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; FLATSCR-NEXT: s_getpc_b64 s[0:1]
+; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
+; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
+; FLATSCR-NEXT: s_mov_b32 s32, 0
+; FLATSCR-NEXT: ;;#ASMSTART
+; FLATSCR-NEXT: ; def s33
+; FLATSCR-NEXT: ;;#ASMEND
+; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; FLATSCR-NEXT: ;;#ASMSTART
+; FLATSCR-NEXT: ; use s33
+; FLATSCR-NEXT: ;;#ASMEND
+; FLATSCR-NEXT: s_endpgm
%s33 = call i32 asm sideeffect "; def $0", "={s33}"()
call void @external_void_func_void()
call void asm sideeffect "; use $0", "{s33}"(i32 %s33)
ret void
}
-; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s34: {{.*}}
-; GCN-NOT: s34
-
-; FLATSCR: s_getpc_b64 s[0:1]
-; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
-; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
-; MUBUF: s_getpc_b64 s[4:5]
-; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
-; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
-; GCN: s_mov_b32 s32, 0
-
-; GCN: ;;#ASMSTART
-; GCN-NEXT: ; def s34
-; GCN-NEXT: ;;#ASMEND
-
-; GCN-NOT: s34
-
-; MUBUF: s_swappc_b64 s[30:31], s[4:5]
-; FLATSCR: s_swappc_b64 s[30:31], s[0:1]
-
-; GCN-NOT: s34
-
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s34
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_void_func_void_preserves_s34(ptr addrspace(1) %out) #0 {
+; FLATSCR-LABEL: test_call_void_func_void_preserves_s34:
+; FLATSCR: ; %bb.0:
+; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
+; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; FLATSCR-NEXT: s_getpc_b64 s[0:1]
+; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
+; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
+; FLATSCR-NEXT: s_mov_b32 s32, 0
+; FLATSCR-NEXT: ;;#ASMSTART
+; FLATSCR-NEXT: ; def s34
+; FLATSCR-NEXT: ;;#ASMEND
+; FLATSCR...
[truncated]
|
arsenm
approved these changes
Feb 17, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll
CodeGen/AMDGPU/call-preserved-registers.ll
CodeGen/AMDGPU/stack-realign.ll
This is to make preparation for another PR.