Skip to content

Commit

Permalink
[AMDGPU] Add llvm.amdgcn.wqm.helper intrinsic to complement demote
Browse files Browse the repository at this point in the history
Originally llvm.amdgcn.wqm.demote worked with llvm.amdgcn.ps.live;
however, this required changing the type signature of ps.live.
While the type signature of ps.live can be updated in LLPC,
other users such as Mesa should not be expected to change.

Change-Id: I7a18e7931bd7f2b4e0bfd4dc5718ff3718bb9d27
  • Loading branch information
perlfu authored and Tim Renouf committed Oct 10, 2019
1 parent 8e41591 commit 4055769
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 7 deletions.
8 changes: 7 additions & 1 deletion llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1276,7 +1276,13 @@ def int_amdgcn_interp_p2_f16 :

// Pixel shaders only: whether the current pixel is live (i.e. not a helper
// invocation for derivative computation).
def int_amdgcn_ps_live : Intrinsic <[llvm_i1_ty], [], []>;
def int_amdgcn_ps_live : Intrinsic <
[llvm_i1_ty],
[],
[IntrNoMem]>;

// Like ps.live, but cannot be moved by LICM.
def int_amdgcn_wqm_helper : Intrinsic <[llvm_i1_ty], [], []>;

def int_amdgcn_mbcnt_lo :
GCCBuiltin<"__builtin_amdgcn_mbcnt_lo">,
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_inc>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_dec>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cmpswap>;
def : SourceOfDivergence<int_amdgcn_ps_live>;
def : SourceOfDivergence<int_amdgcn_wqm_helper>;
def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>;
Expand Down
9 changes: 7 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -325,14 +325,19 @@ def SI_BR_UNDEF : SPseudoInstSI <(outs), (ins sopp_brtarget:$simm16)> {
let isBranch = 1;
}

let Uses = [EXEC] in {

def SI_PS_LIVE : PseudoInstSI <
(outs SReg_1:$dst), (ins),
[(set i1:$dst, (int_amdgcn_ps_live))]> {
let SALU = 1;
}

let Uses = [EXEC] in {
def SI_WQM_HELPER : PseudoInstSI <
(outs SReg_1:$dst), (ins),
[(set i1:$dst, (int_amdgcn_wqm_helper))]> {
let SALU = 1;
}

let Defs = [EXEC] in {
def SI_DEMOTE_I1 : SPseudoInstSI <(outs), (ins SCSrc_i1:$src, i1imm:$killvalue)> {
}
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
III.Disabled = StateWQM | StateWWM;
continue;
} else {
if (Opcode == AMDGPU::SI_PS_LIVE) {
if (Opcode == AMDGPU::SI_PS_LIVE || Opcode == AMDGPU::SI_WQM_HELPER) {
LiveMaskQueries.push_back(&MI);
} else if (Opcode == AMDGPU::SI_DEMOTE_I1) {
DemoteInstrs.push_back(&MI);
Expand Down Expand Up @@ -1036,6 +1036,7 @@ void SIWholeQuadMode::lowerBlock(MachineBasicBlock &MBB) {

switch (MI.getOpcode()) {
case AMDGPU::SI_PS_LIVE:
case AMDGPU::SI_WQM_HELPER:
lowerLiveMaskQuery(MBB, MI, LiveMaskReg, State == StateWQM);
break;
case AMDGPU::SI_DEMOTE_I1: {
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
br label %.continue0

.continue0:
%live = call i1 @llvm.amdgcn.ps.live()
%live = call i1 @llvm.amdgcn.wqm.helper()
%live.cond = select i1 %live, i32 0, i32 1065353216
%live.v0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %live.cond, i32 85, i32 15, i32 15, i1 true)
%live.v0f = bitcast i32 %live.v0 to float
Expand Down Expand Up @@ -228,7 +228,7 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index

.continue0:
%count = phi i32 [ 0, %.entry ], [ 0, %.demote0 ], [ %next, %.continue1 ]
%live = call i1 @llvm.amdgcn.ps.live()
%live = call i1 @llvm.amdgcn.wqm.helper()
%live.cond = select i1 %live, i32 0, i32 %count
%live.v0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %live.cond, i32 85, i32 15, i32 15, i1 true)
%live.v0f = bitcast i32 %live.v0 to float
Expand All @@ -255,7 +255,7 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
}

declare void @llvm.amdgcn.wqm.demote(i1) #0
declare i1 @llvm.amdgcn.ps.live() #1
declare i1 @llvm.amdgcn.wqm.helper() #0
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare float @llvm.amdgcn.wqm.f32(float) #1
Expand Down

0 comments on commit 4055769

Please sign in to comment.