Skip to content

AMDGPU: Add regression test for multiple frame index lowering #140784

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
Original file line number Diff line number Diff line change
Expand Up @@ -424,4 +424,115 @@ vector.body.i.i.i.i: ; preds = %.shuffle.then.i.i.i
ret void
}

; Check that we do not produce a verifier error after prolog
; epilog. alloca1 and alloca2 will lower to literals.

; GCN-LABEL: {{^}}s_multiple_frame_indexes_literal_offsets:
; GCN: s_load_dword [[ARG0:s[0-9]+]]
; GCN: s_movk_i32 [[ALLOCA1:s[0-9]+]], 0x44
; GCN: s_cmp_eq_u32 [[ARG0]], 0
; GCN: s_cselect_b32 [[SELECT:s[0-9]+]], [[ALLOCA1]], 0x48
; GCN: s_mov_b32 [[ALLOCA0:s[0-9]+]], 0
; GCN: ; use [[SELECT]], [[ALLOCA0]]
define amdgpu_kernel void @s_multiple_frame_indexes_literal_offsets(i32 inreg %arg0) #0 {
%alloca0 = alloca [17 x i32], align 8, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
%alloca2 = alloca i32, align 4, addrspace(5)
%cmp = icmp eq i32 %arg0, 0
%select = select i1 %cmp, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca2
call void asm sideeffect "; use $0, $1","s,s"(ptr addrspace(5) %select, ptr addrspace(5) %alloca0)
ret void
}

; %alloca1 or alloca2 will lower to an inline constant, and one will
; be a literal, so we could fold both indexes into the instruction.

; GCN-LABEL: {{^}}s_multiple_frame_indexes_one_imm_one_literal_offset:
; GCN: s_load_dword [[ARG0:s[0-9]+]]
; GCN: s_mov_b32 [[ALLOCA1:s[0-9]+]], 64
; GCN: s_cmp_eq_u32 [[ARG0]], 0
; GCN: s_cselect_b32 [[SELECT:s[0-9]+]], [[ALLOCA1]], 0x44
; GCN: s_mov_b32 [[ALLOCA0:s[0-9]+]], 0
; GCN: ; use [[SELECT]], [[ALLOCA0]]
define amdgpu_kernel void @s_multiple_frame_indexes_one_imm_one_literal_offset(i32 inreg %arg0) #0 {
%alloca0 = alloca [16 x i32], align 8, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
%alloca2 = alloca i32, align 4, addrspace(5)
%cmp = icmp eq i32 %arg0, 0
%select = select i1 %cmp, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca2
call void asm sideeffect "; use $0, $1","s,s"(ptr addrspace(5) %select, ptr addrspace(5) %alloca0)
ret void
}

; GCN-LABEL: {{^}}s_multiple_frame_indexes_imm_offsets:
; GCN: s_load_dword [[ARG0:s[0-9]+]]
; GCN: s_mov_b32 [[ALLOCA1:s[0-9]+]], 16
; GCN: s_cmp_eq_u32 [[ARG0]], 0
; GCN: s_cselect_b32 [[SELECT:s[0-9]+]], [[ALLOCA1]], 20
; GCN: s_mov_b32 [[ALLOCA0:s[0-9]+]], 0
; GCN: ; use [[SELECT]], [[ALLOCA0]]
define amdgpu_kernel void @s_multiple_frame_indexes_imm_offsets(i32 inreg %arg0) #0 {
%alloca0 = alloca [4 x i32], align 8, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
%alloca2 = alloca i32, align 4, addrspace(5)
%cmp = icmp eq i32 %arg0, 0
%select = select i1 %cmp, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca2
call void asm sideeffect "; use $0, $1","s,s"(ptr addrspace(5) %select, ptr addrspace(5) %alloca0)
ret void
}

; GCN-LABEL: {{^}}v_multiple_frame_indexes_literal_offsets:
; GCN: v_mov_b32_e32 [[ALLOCA1:v[0-9]+]], 0x48
; GCN: v_mov_b32_e32 [[ALLOCA2:v[0-9]+]], 0x44
; GCN: v_cmp_eq_u32_e32 vcc, 0, v0
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[ALLOCA1]], [[ALLOCA2]], vcc
; GCN: v_mov_b32_e32 [[ALLOCA0:v[0-9]+]], 0{{$}}
; GCN: ; use [[SELECT]], [[ALLOCA0]]
define amdgpu_kernel void @v_multiple_frame_indexes_literal_offsets() #0 {
%vgpr = call i32 @llvm.amdgcn.workitem.id.x()
%alloca0 = alloca [17 x i32], align 8, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
%alloca2 = alloca i32, align 4, addrspace(5)
%cmp = icmp eq i32 %vgpr, 0
%select = select i1 %cmp, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca2
call void asm sideeffect "; use $0, $1","v,v"(ptr addrspace(5) %select, ptr addrspace(5) %alloca0)
ret void
}

; GCN-LABEL: {{^}}v_multiple_frame_indexes_one_imm_one_literal_offset:
; GCN: v_mov_b32_e32 [[ALLOCA1:v[0-9]+]], 0x44
; GCN: v_mov_b32_e32 [[ALLOCA2:v[0-9]+]], 64
; GCN: v_cmp_eq_u32_e32 vcc, 0, v0
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[ALLOCA1]], [[ALLOCA2]], vcc
; GCN: v_mov_b32_e32 [[ALLOCA0:v[0-9]+]], 0{{$}}
; GCN: ; use [[SELECT]], [[ALLOCA0]]
define amdgpu_kernel void @v_multiple_frame_indexes_one_imm_one_literal_offset() #0 {
%vgpr = call i32 @llvm.amdgcn.workitem.id.x()
%alloca0 = alloca [16 x i32], align 8, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
%alloca2 = alloca i32, align 4, addrspace(5)
%cmp = icmp eq i32 %vgpr, 0
%select = select i1 %cmp, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca2
call void asm sideeffect "; use $0, $1","v,v"(ptr addrspace(5) %select, ptr addrspace(5) %alloca0)
ret void
}

; GCN-LABEL: {{^}}v_multiple_frame_indexes_imm_offsets:
; GCN: v_mov_b32_e32 [[ALLOCA1:v[0-9]+]], 12
; GCN: v_mov_b32_e32 [[ALLOCA2:v[0-9]+]], 8
; GCN: v_cmp_eq_u32_e32 vcc, 0, v0
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[ALLOCA1]], [[ALLOCA2]], vcc
; GCN: v_mov_b32_e32 [[ALLOCA0:v[0-9]+]], 0{{$}}
; GCN: ; use [[SELECT]], [[ALLOCA0]]
define amdgpu_kernel void @v_multiple_frame_indexes_imm_offsets() #0 {
%vgpr = call i32 @llvm.amdgcn.workitem.id.x()
%alloca0 = alloca [2 x i32], align 8, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
%alloca2 = alloca i32, align 4, addrspace(5)
%cmp = icmp eq i32 %vgpr, 0
%select = select i1 %cmp, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca2
call void asm sideeffect "; use $0, $1","v,v"(ptr addrspace(5) %select, ptr addrspace(5) %alloca0)
ret void
}

attributes #0 = { nounwind }
Loading