Skip to content

[NFC][AMDGPU] Auto generate check lines for three test cases #127352

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 17, 2025

Conversation

shiltian
Copy link
Contributor

@shiltian shiltian commented Feb 15, 2025

  • CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll
  • CodeGen/AMDGPU/call-preserved-registers.ll
  • CodeGen/AMDGPU/stack-realign.ll

This is to make preparation for another PR.

- `CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll`
- `CodeGen/AMDGPU/call-preserved-registers.ll`

This is to make preparation for another PR.
Copy link
Contributor Author

shiltian commented Feb 15, 2025

@shiltian shiltian changed the title [NFC][AMDGPU] Auto generate check lines for two test cases [NFC][AMDGPU] Auto generate check lines for three test cases Feb 15, 2025
@shiltian shiltian marked this pull request as ready for review February 15, 2025 23:11
@shiltian shiltian requested a review from arsenm February 15, 2025 23:11
@llvmbot
Copy link
Member

llvmbot commented Feb 15, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Shilei Tian (shiltian)

Changes
  • CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll
  • CodeGen/AMDGPU/call-preserved-registers.ll
  • CodeGen/AMDGPU/stack-realign.ll

This is to make preparation for another PR.


Patch is 92.52 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127352.diff

3 Files Affected:

  • (modified) llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll (+605-218)
  • (modified) llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll (+306-13)
  • (modified) llvm/test/CodeGen/AMDGPU/stack-realign.ll (+658-155)
diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
index ff80e05197b0d..db9ce56ecc3cc 100644
--- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MUBUF %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MUBUF %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MUBUF %s
@@ -5,110 +6,258 @@
 
 declare hidden void @external_void_func_void() #3
 
-; GCN-LABEL: {{^}}test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
-; GCN: s_getpc_b64 s[34:35]
-; GCN-NEXT: s_add_u32 s34, s34,
-; GCN-NEXT: s_addc_u32 s35, s35,
-; GCN: s_swappc_b64 s[30:31], s[34:35]
-
-; GCN-NEXT: #ASMSTART
-; GCN-NEXT: #ASMEND
-; GCN-NEXT: s_swappc_b64 s[30:31], s[34:35]
 define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
+; FLATSCR-LABEL: test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; FLATSCR-NEXT:    s_getpc_b64 s[34:35]
+; FLATSCR-NEXT:    s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
+; FLATSCR-NEXT:    s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
+; FLATSCR-NEXT:    s_mov_b32 s32, 0
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; FLATSCR-NEXT:    s_endpgm
   call void @external_void_func_void()
   call void asm sideeffect "", ""() #0
   call void @external_void_func_void()
   ret void
 }
 
-; GCN-LABEL: {{^}}test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
-; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33
-; MUBUF:   buffer_store_dword
-; FLATSCR: scratch_store_dword
-; GCN: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 4
-; GCN: v_writelane_b32 v40, s30, 0
-; GCN: v_writelane_b32 v40, s31, 1
-; GCN: v_writelane_b32 v40, s34, 2
-; GCN: v_writelane_b32 v40, s35, 3
-
-; GCN: s_swappc_b64
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_swappc_b64
-; GCN: v_readlane_b32 s35, v40, 3
-; GCN: v_readlane_b32 s34, v40, 2
-; MUBUF-DAG:   v_readlane_b32 s31, v40, 1
-; MUBUF-DAG:   v_readlane_b32 s30, v40, 0
-; FLATSCR-DAG: v_readlane_b32 s31, v40, 1
-; FLATSCR-DAG: v_readlane_b32 s30, v40, 0
-
-; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 4
-; MUBUF:   buffer_load_dword
-; FLATSCR: scratch_load_dword
-; GCN: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
-; GCN: s_setpc_b64 s[30:31]
 define void @test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
+; MUBUF-LABEL: test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
+; MUBUF:       ; %bb.0:
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    s_mov_b32 s4, s33
+; MUBUF-NEXT:    s_mov_b32 s33, s32
+; MUBUF-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; MUBUF-NEXT:    s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT:    v_writelane_b32 v40, s4, 4
+; MUBUF-NEXT:    v_writelane_b32 v40, s30, 0
+; MUBUF-NEXT:    v_writelane_b32 v40, s31, 1
+; MUBUF-NEXT:    s_addk_i32 s32, 0x400
+; MUBUF-NEXT:    v_writelane_b32 v40, s34, 2
+; MUBUF-NEXT:    v_writelane_b32 v40, s35, 3
+; MUBUF-NEXT:    s_getpc_b64 s[34:35]
+; MUBUF-NEXT:    s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
+; MUBUF-NEXT:    s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
+; MUBUF-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; MUBUF-NEXT:    v_readlane_b32 s35, v40, 3
+; MUBUF-NEXT:    v_readlane_b32 s34, v40, 2
+; MUBUF-NEXT:    v_readlane_b32 s31, v40, 1
+; MUBUF-NEXT:    v_readlane_b32 s30, v40, 0
+; MUBUF-NEXT:    s_mov_b32 s32, s33
+; MUBUF-NEXT:    v_readlane_b32 s4, v40, 4
+; MUBUF-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT:    s_mov_b32 s33, s4
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    s_mov_b32 s0, s33
+; FLATSCR-NEXT:    s_mov_b32 s33, s32
+; FLATSCR-NEXT:    s_or_saveexec_b64 s[2:3], -1
+; FLATSCR-NEXT:    scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
+; FLATSCR-NEXT:    s_mov_b64 exec, s[2:3]
+; FLATSCR-NEXT:    v_writelane_b32 v40, s0, 4
+; FLATSCR-NEXT:    v_writelane_b32 v40, s30, 0
+; FLATSCR-NEXT:    v_writelane_b32 v40, s31, 1
+; FLATSCR-NEXT:    s_add_i32 s32, s32, 16
+; FLATSCR-NEXT:    v_writelane_b32 v40, s34, 2
+; FLATSCR-NEXT:    v_writelane_b32 v40, s35, 3
+; FLATSCR-NEXT:    s_getpc_b64 s[34:35]
+; FLATSCR-NEXT:    s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
+; FLATSCR-NEXT:    s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; FLATSCR-NEXT:    v_readlane_b32 s35, v40, 3
+; FLATSCR-NEXT:    v_readlane_b32 s34, v40, 2
+; FLATSCR-NEXT:    v_readlane_b32 s31, v40, 1
+; FLATSCR-NEXT:    v_readlane_b32 s30, v40, 0
+; FLATSCR-NEXT:    s_mov_b32 s32, s33
+; FLATSCR-NEXT:    v_readlane_b32 s0, v40, 4
+; FLATSCR-NEXT:    s_or_saveexec_b64 s[2:3], -1
+; FLATSCR-NEXT:    scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_mov_b64 exec, s[2:3]
+; FLATSCR-NEXT:    s_mov_b32 s33, s0
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
   call void @external_void_func_void()
   call void asm sideeffect "", ""() #0
   call void @external_void_func_void()
   ret void
 }
 
-; GCN-LABEL: {{^}}test_func_call_external_void_funcx2:
-; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33
-; GCN: s_mov_b32 s33, s32
-; MUBUF:   buffer_store_dword v40
-; FLATSCR: scratch_store_dword off, v40
-; GCN: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 4
-; MUBUF:   s_addk_i32 s32, 0x400
-; FLATSCR: s_add_i32 s32, s32, 16
-
-; GCN: s_swappc_b64
-; GCN-NEXT: s_swappc_b64
-
-; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 4
-; MUBUF:   buffer_load_dword v40
-; FLATSCR: scratch_load_dword v40
-; GCN: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
 define void @test_func_call_external_void_funcx2() #0 {
+; MUBUF-LABEL: test_func_call_external_void_funcx2:
+; MUBUF:       ; %bb.0:
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    s_mov_b32 s4, s33
+; MUBUF-NEXT:    s_mov_b32 s33, s32
+; MUBUF-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; MUBUF-NEXT:    s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT:    v_writelane_b32 v40, s4, 4
+; MUBUF-NEXT:    v_writelane_b32 v40, s30, 0
+; MUBUF-NEXT:    v_writelane_b32 v40, s31, 1
+; MUBUF-NEXT:    s_addk_i32 s32, 0x400
+; MUBUF-NEXT:    v_writelane_b32 v40, s34, 2
+; MUBUF-NEXT:    v_writelane_b32 v40, s35, 3
+; MUBUF-NEXT:    s_getpc_b64 s[34:35]
+; MUBUF-NEXT:    s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
+; MUBUF-NEXT:    s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
+; MUBUF-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; MUBUF-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; MUBUF-NEXT:    v_readlane_b32 s35, v40, 3
+; MUBUF-NEXT:    v_readlane_b32 s34, v40, 2
+; MUBUF-NEXT:    v_readlane_b32 s31, v40, 1
+; MUBUF-NEXT:    v_readlane_b32 s30, v40, 0
+; MUBUF-NEXT:    s_mov_b32 s32, s33
+; MUBUF-NEXT:    v_readlane_b32 s4, v40, 4
+; MUBUF-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT:    s_mov_b32 s33, s4
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: test_func_call_external_void_funcx2:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    s_mov_b32 s0, s33
+; FLATSCR-NEXT:    s_mov_b32 s33, s32
+; FLATSCR-NEXT:    s_or_saveexec_b64 s[2:3], -1
+; FLATSCR-NEXT:    scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
+; FLATSCR-NEXT:    s_mov_b64 exec, s[2:3]
+; FLATSCR-NEXT:    v_writelane_b32 v40, s0, 4
+; FLATSCR-NEXT:    v_writelane_b32 v40, s30, 0
+; FLATSCR-NEXT:    v_writelane_b32 v40, s31, 1
+; FLATSCR-NEXT:    s_add_i32 s32, s32, 16
+; FLATSCR-NEXT:    v_writelane_b32 v40, s34, 2
+; FLATSCR-NEXT:    v_writelane_b32 v40, s35, 3
+; FLATSCR-NEXT:    s_getpc_b64 s[34:35]
+; FLATSCR-NEXT:    s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
+; FLATSCR-NEXT:    s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; FLATSCR-NEXT:    v_readlane_b32 s35, v40, 3
+; FLATSCR-NEXT:    v_readlane_b32 s34, v40, 2
+; FLATSCR-NEXT:    v_readlane_b32 s31, v40, 1
+; FLATSCR-NEXT:    v_readlane_b32 s30, v40, 0
+; FLATSCR-NEXT:    s_mov_b32 s32, s33
+; FLATSCR-NEXT:    v_readlane_b32 s0, v40, 4
+; FLATSCR-NEXT:    s_or_saveexec_b64 s[2:3], -1
+; FLATSCR-NEXT:    scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_mov_b64 exec, s[2:3]
+; FLATSCR-NEXT:    s_mov_b32 s33, s0
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
   call void @external_void_func_void()
   call void @external_void_func_void()
   ret void
 }
 
-; GCN-LABEL: {{^}}void_func_void_clobber_s30_s31:
-; GCN: s_waitcnt
-; GCN: v_writelane_b32 v0, s30, 0
-; GCN: v_writelane_b32 v0, s31, 1
-; GCN-NEXT: #ASMSTART
-; GCN: ; clobber
-; GCN-NEXT: #ASMEND
-; GCN: v_readlane_b32 s31, v0, 1
-; GCN: v_readlane_b32 s30, v0, 0
-; GCN: s_setpc_b64 s[30:31]
 define void @void_func_void_clobber_s30_s31() #2 {
+; MUBUF-LABEL: void_func_void_clobber_s30_s31:
+; MUBUF:       ; %bb.0:
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; MUBUF-NEXT:    s_mov_b64 exec, s[4:5]
+; MUBUF-NEXT:    v_writelane_b32 v0, s30, 0
+; MUBUF-NEXT:    v_writelane_b32 v0, s31, 1
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; clobber
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    v_readlane_b32 s31, v0, 1
+; MUBUF-NEXT:    v_readlane_b32 s30, v0, 0
+; MUBUF-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_mov_b64 exec, s[4:5]
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: void_func_void_clobber_s30_s31:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    s_xor_saveexec_b64 s[0:1], -1
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s32 ; 4-byte Folded Spill
+; FLATSCR-NEXT:    s_mov_b64 exec, s[0:1]
+; FLATSCR-NEXT:    v_writelane_b32 v0, s30, 0
+; FLATSCR-NEXT:    v_writelane_b32 v0, s31, 1
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; clobber
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    v_readlane_b32 s31, v0, 1
+; FLATSCR-NEXT:    v_readlane_b32 s30, v0, 0
+; FLATSCR-NEXT:    s_xor_saveexec_b64 s[0:1], -1
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s32 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_mov_b64 exec, s[0:1]
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
   call void asm sideeffect "; clobber", "~{s[30:31]}"() #0
   ret void
 }
 
-; GCN-LABEL: {{^}}void_func_void_clobber_vcc:
-; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_setpc_b64 s[30:31]
 define hidden void @void_func_void_clobber_vcc() #2 {
+; GCN-LABEL: void_func_void_clobber_vcc:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   call void asm sideeffect "", "~{vcc}"() #0
   ret void
 }
 
-; GCN-LABEL: {{^}}test_call_void_func_void_clobber_vcc:
-; GCN: s_getpc_b64
-; GCN-NEXT: s_add_u32
-; GCN-NEXT: s_addc_u32
-; GCN: s_mov_b64 s[34:35], vcc
-; GCN-NEXT: s_swappc_b64
-; GCN: s_mov_b64 vcc, s[34:35]
 define amdgpu_kernel void @test_call_void_func_void_clobber_vcc(ptr addrspace(1) %out) #0 {
+; FLATSCR-LABEL: test_call_void_func_void_clobber_vcc:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; FLATSCR-NEXT:    s_add_u32 s8, s4, 8
+; FLATSCR-NEXT:    s_addc_u32 s9, s5, 0
+; FLATSCR-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; FLATSCR-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; FLATSCR-NEXT:    s_mov_b32 s14, s12
+; FLATSCR-NEXT:    s_mov_b32 s13, s11
+; FLATSCR-NEXT:    s_mov_b32 s12, s10
+; FLATSCR-NEXT:    s_mov_b64 s[10:11], s[6:7]
+; FLATSCR-NEXT:    s_getpc_b64 s[16:17]
+; FLATSCR-NEXT:    s_add_u32 s16, s16, void_func_void_clobber_vcc@rel32@lo+4
+; FLATSCR-NEXT:    s_addc_u32 s17, s17, void_func_void_clobber_vcc@rel32@hi+12
+; FLATSCR-NEXT:    v_or3_b32 v31, v0, v1, v2
+; FLATSCR-NEXT:    s_mov_b64 s[4:5], s[0:1]
+; FLATSCR-NEXT:    s_mov_b64 s[6:7], s[2:3]
+; FLATSCR-NEXT:    s_mov_b32 s32, 0
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; def vcc
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_mov_b64 s[34:35], vcc
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; FLATSCR-NEXT:    global_load_dword v0, v[0:1], off glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_mov_b64 vcc, s[34:35]
+; FLATSCR-NEXT:    global_load_dword v0, v[0:1], off glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    ; kill: killed $vgpr0_vgpr1
+; FLATSCR-NEXT:    ; kill: killed $vgpr0_vgpr1
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; use vcc
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_endpgm
   %vcc = call i64 asm sideeffect "; def $0", "={vcc}"()
   call void @void_func_void_clobber_vcc()
   %val0 = load volatile i32, ptr addrspace(1) undef
@@ -117,22 +266,50 @@ define amdgpu_kernel void @test_call_void_func_void_clobber_vcc(ptr addrspace(1)
   ret void
 }
 
-; GCN-LABEL: {{^}}test_call_void_func_void_mayclobber_s31:
-; GCN: s_mov_b32 s33, s31
-; GCN: s_swappc_b64
-; GCN-NEXT: s_mov_b32 s31, s33
 define amdgpu_kernel void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) %out) #0 {
+; FLATSCR-LABEL: test_call_void_func_void_mayclobber_s31:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; FLATSCR-NEXT:    s_getpc_b64 s[0:1]
+; FLATSCR-NEXT:    s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
+; FLATSCR-NEXT:    s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
+; FLATSCR-NEXT:    s_mov_b32 s32, 0
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; def s31
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_mov_b32 s33, s31
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[0:1]
+; FLATSCR-NEXT:    s_mov_b32 s31, s33
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; use s31
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_endpgm
   %s31 = call i32 asm sideeffect "; def $0", "={s31}"()
   call void @external_void_func_void()
   call void asm sideeffect "; use $0", "{s31}"(i32 %s31)
   ret void
 }
 
-; GCN-LABEL: {{^}}test_call_void_func_void_mayclobber_v31:
-; GCN: v_mov_b32_e32 v40, v31
-; GCN: s_swappc_b64
-; GCN-NEXT: v_mov_b32_e32 v31, v40
 define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) %out) #0 {
+; FLATSCR-LABEL: test_call_void_func_void_mayclobber_v31:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; FLATSCR-NEXT:    s_getpc_b64 s[0:1]
+; FLATSCR-NEXT:    s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
+; FLATSCR-NEXT:    s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
+; FLATSCR-NEXT:    s_mov_b32 s32, 0
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; def v31
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    v_mov_b32_e32 v40, v31
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[0:1]
+; FLATSCR-NEXT:    v_mov_b32_e32 v31, v40
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; use v31
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_endpgm
   %v31 = call i32 asm sideeffect "; def $0", "={v31}"()
   call void @external_void_func_void()
   call void asm sideeffect "; use $0", "{v31}"(i32 %v31)
@@ -140,175 +317,294 @@ define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(ptr addrspace
 }
 
 ; FIXME: What is the expected behavior for reserved registers here?
-
-; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33:
-; FLATSCR:      s_getpc_b64 s[0:1]
-; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
-; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
-; MUBUF:        s_getpc_b64 s[4:5]
-; MUBUF-NEXT:   s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
-; MUBUF-NEXT:   s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
-
-; GCN: #ASMSTART
-; GCN-NEXT: ; def s33
-; GCN-NEXT: #ASMEND
-
-; GCN-NOT: s33
-
-; FLATSCR: s_swappc_b64 s[30:31], s[0:1]
-; MUBUF:   s_swappc_b64 s[30:31], s[4:5]
-
-; GCN-NOT: s33
-
-; GCN: ;;#ASMSTART
-; GCN-NEXT: ; use s33
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_endpgm
 define amdgpu_kernel void @test_call_void_func_void_preserves_s33(ptr addrspace(1) %out) #0 {
+; FLATSCR-LABEL: test_call_void_func_void_preserves_s33:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; FLATSCR-NEXT:    s_getpc_b64 s[0:1]
+; FLATSCR-NEXT:    s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
+; FLATSCR-NEXT:    s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
+; FLATSCR-NEXT:    s_mov_b32 s32, 0
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; def s33
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[0:1]
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; use s33
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_endpgm
   %s33 = call i32 asm sideeffect "; def $0", "={s33}"()
   call void @external_void_func_void()
   call void asm sideeffect "; use $0", "{s33}"(i32 %s33)
   ret void
 }
 
-; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s34: {{.*}}
-; GCN-NOT: s34
-
-; FLATSCR:      s_getpc_b64 s[0:1]
-; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
-; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
-; MUBUF:        s_getpc_b64 s[4:5]
-; MUBUF-NEXT:   s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
-; MUBUF-NEXT:   s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
-; GCN: s_mov_b32 s32, 0
-
-; GCN: ;;#ASMSTART
-; GCN-NEXT: ; def s34
-; GCN-NEXT: ;;#ASMEND
-
-; GCN-NOT: s34
-
-; MUBUF:   s_swappc_b64 s[30:31], s[4:5]
-; FLATSCR: s_swappc_b64 s[30:31], s[0:1]
-
-; GCN-NOT: s34
-
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s34
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_endpgm
 define amdgpu_kernel void @test_call_void_func_void_preserves_s34(ptr addrspace(1) %out) #0 {
+; FLATSCR-LABEL: test_call_void_func_void_preserves_s34:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; FLATSCR-NEXT:    s_getpc_b64 s[0:1]
+; FLATSCR-NEXT:    s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
+; FLATSCR-NEXT:    s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
+; FLATSCR-NEXT:    s_mov_b32 s32, 0
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; def s34
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR...
[truncated]

@shiltian shiltian merged commit 8aff59d into main Feb 17, 2025
12 checks passed
@shiltian shiltian deleted the users/shiltian/autogen-tests-for-for-striped-sgrp-cc branch February 17, 2025 16:22
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants