Skip to content

Commit

Permalink
[AMDGPU] Fix whole wavefront mode
Browse files Browse the repository at this point in the history
We cannot move wwm over exec copies because the exec register needs an exact exec mask.

Differential Revision: https://reviews.llvm.org/D76232
  • Loading branch information
Flakebi committed Mar 17, 2020
1 parent 1f93b16 commit 6e29846
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 61 deletions.
34 changes: 1 addition & 33 deletions llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,6 @@ class SIWholeQuadMode : public MachineFunctionPass {
void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);
char analyzeFunction(MachineFunction &MF);

bool requiresCorrectState(const MachineInstr &MI) const;

MachineBasicBlock::iterator saveSCC(MachineBasicBlock &MBB,
MachineBasicBlock::iterator Before);
MachineBasicBlock::iterator
Expand Down Expand Up @@ -526,36 +524,6 @@ char SIWholeQuadMode::analyzeFunction(MachineFunction &MF) {
return GlobalFlags;
}

/// Whether \p MI really requires the exec state computed during analysis.
///
/// Scalar instructions must occasionally be marked WQM for correct propagation
/// (e.g. thread masks leading up to branches), but when it comes to actual
/// execution, they don't care about EXEC.
bool SIWholeQuadMode::requiresCorrectState(const MachineInstr &MI) const {
if (MI.isTerminator())
return true;

// Skip instructions that are not affected by EXEC
if (TII->isScalarUnit(MI))
return false;

// Generic instructions such as COPY will either disappear by register
// coalescing or be lowered to SALU or VALU instructions.
if (MI.isTransient()) {
if (MI.getNumExplicitOperands() >= 1) {
const MachineOperand &Op = MI.getOperand(0);
if (Op.isReg()) {
if (TRI->isSGPRReg(*MRI, Op.getReg())) {
// SGPR instructions are not affected by EXEC
return false;
}
}
}
}

return true;
}

MachineBasicBlock::iterator
SIWholeQuadMode::saveSCC(MachineBasicBlock &MBB,
MachineBasicBlock::iterator Before) {
Expand Down Expand Up @@ -742,7 +710,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
if (II != IE) {
MachineInstr &MI = *II;

if (requiresCorrectState(MI)) {
if (MI.isTerminator() || TII->mayReadEXEC(*MRI, MI)) {
auto III = Instructions.find(&MI);
if (III != Instructions.end()) {
if (III->second.Needs & StateWWM)
Expand Down
56 changes: 28 additions & 28 deletions llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
Original file line number Diff line number Diff line change
Expand Up @@ -375,9 +375,9 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
;
; GFX8-LABEL: add_i32_varying:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v2, v0
; GFX8-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: s_mov_b64 exec, s[2:3]
; GFX8-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -428,9 +428,9 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
;
; GFX9-LABEL: add_i32_varying:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_mov_b64 exec, s[2:3]
; GFX9-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -480,9 +480,9 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
;
; GFX1064-LABEL: add_i32_varying:
; GFX1064: ; %bb.0: ; %entry
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v2, v0
; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: s_mov_b64 exec, s[2:3]
; GFX1064-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -539,10 +539,10 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
;
; GFX1032-LABEL: add_i32_varying:
; GFX1032: ; %bb.0: ; %entry
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: ; implicit-def: $vcc_hi
; GFX1032-NEXT: v_mov_b32_e32 v2, v0
; GFX1032-NEXT: s_or_saveexec_b32 s2, -1
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: s_mov_b32 exec_lo, s2
; GFX1032-NEXT: v_cmp_ne_u32_e64 s2, 1, 0
Expand Down Expand Up @@ -614,9 +614,9 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
;
; GFX8-LABEL: add_i32_varying_gfx1032:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v2, v0
; GFX8-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: s_mov_b64 exec, s[2:3]
; GFX8-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -667,9 +667,9 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
;
; GFX9-LABEL: add_i32_varying_gfx1032:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_mov_b64 exec, s[2:3]
; GFX9-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -719,9 +719,9 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
;
; GFX1064-LABEL: add_i32_varying_gfx1032:
; GFX1064: ; %bb.0: ; %entry
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v2, v0
; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: s_mov_b64 exec, s[2:3]
; GFX1064-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -778,10 +778,10 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
;
; GFX1032-LABEL: add_i32_varying_gfx1032:
; GFX1032: ; %bb.0: ; %entry
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: ; implicit-def: $vcc_hi
; GFX1032-NEXT: v_mov_b32_e32 v2, v0
; GFX1032-NEXT: s_or_saveexec_b32 s2, -1
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: s_mov_b32 exec_lo, s2
; GFX1032-NEXT: v_cmp_ne_u32_e64 s2, 1, 0
Expand Down Expand Up @@ -853,9 +853,9 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
;
; GFX8-LABEL: add_i32_varying_gfx1064:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v2, v0
; GFX8-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: s_mov_b64 exec, s[2:3]
; GFX8-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -906,9 +906,9 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
;
; GFX9-LABEL: add_i32_varying_gfx1064:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_mov_b64 exec, s[2:3]
; GFX9-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -958,9 +958,9 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
;
; GFX1064-LABEL: add_i32_varying_gfx1064:
; GFX1064: ; %bb.0: ; %entry
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v2, v0
; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: s_mov_b64 exec, s[2:3]
; GFX1064-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -1017,10 +1017,10 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
;
; GFX1032-LABEL: add_i32_varying_gfx1064:
; GFX1032: ; %bb.0: ; %entry
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: ; implicit-def: $vcc_hi
; GFX1032-NEXT: v_mov_b32_e32 v2, v0
; GFX1032-NEXT: s_or_saveexec_b32 s2, -1
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: s_mov_b32 exec_lo, s2
; GFX1032-NEXT: v_cmp_ne_u32_e64 s2, 1, 0
Expand Down Expand Up @@ -1934,9 +1934,9 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
;
; GFX8-LABEL: sub_i32_varying:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v2, v0
; GFX8-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: s_mov_b64 exec, s[2:3]
; GFX8-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -1987,9 +1987,9 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
;
; GFX9-LABEL: sub_i32_varying:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_mov_b64 exec, s[2:3]
; GFX9-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -2039,9 +2039,9 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
;
; GFX1064-LABEL: sub_i32_varying:
; GFX1064: ; %bb.0: ; %entry
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v2, v0
; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: s_mov_b64 exec, s[2:3]
; GFX1064-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -2098,10 +2098,10 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
;
; GFX1032-LABEL: sub_i32_varying:
; GFX1032: ; %bb.0: ; %entry
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: ; implicit-def: $vcc_hi
; GFX1032-NEXT: v_mov_b32_e32 v2, v0
; GFX1032-NEXT: s_or_saveexec_b32 s2, -1
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: s_mov_b32 exec_lo, s2
; GFX1032-NEXT: v_cmp_ne_u32_e64 s2, 1, 0
Expand Down Expand Up @@ -2917,9 +2917,9 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
;
; GFX8-LABEL: or_i32_varying:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v2, v0
; GFX8-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: s_mov_b64 exec, s[2:3]
; GFX8-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -2970,9 +2970,9 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
;
; GFX9-LABEL: or_i32_varying:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_mov_b64 exec, s[2:3]
; GFX9-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -3022,9 +3022,9 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
;
; GFX1064-LABEL: or_i32_varying:
; GFX1064: ; %bb.0: ; %entry
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v2, v0
; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: s_mov_b64 exec, s[2:3]
; GFX1064-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -3081,10 +3081,10 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
;
; GFX1032-LABEL: or_i32_varying:
; GFX1032: ; %bb.0: ; %entry
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: ; implicit-def: $vcc_hi
; GFX1032-NEXT: v_mov_b32_e32 v2, v0
; GFX1032-NEXT: s_or_saveexec_b32 s2, -1
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: s_mov_b32 exec_lo, s2
; GFX1032-NEXT: v_cmp_ne_u32_e64 s2, 1, 0
Expand Down Expand Up @@ -3159,9 +3159,9 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
;
; GFX8-LABEL: xor_i32_varying:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v2, v0
; GFX8-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: s_mov_b64 exec, s[2:3]
; GFX8-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -3212,9 +3212,9 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
;
; GFX9-LABEL: xor_i32_varying:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_mov_b64 exec, s[2:3]
; GFX9-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -3264,9 +3264,9 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
;
; GFX1064-LABEL: xor_i32_varying:
; GFX1064: ; %bb.0: ; %entry
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v2, v0
; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: s_mov_b64 exec, s[2:3]
; GFX1064-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -3323,10 +3323,10 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
;
; GFX1032-LABEL: xor_i32_varying:
; GFX1032: ; %bb.0: ; %entry
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: ; implicit-def: $vcc_hi
; GFX1032-NEXT: v_mov_b32_e32 v2, v0
; GFX1032-NEXT: s_or_saveexec_b32 s2, -1
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: s_mov_b32 exec_lo, s2
; GFX1032-NEXT: v_cmp_ne_u32_e64 s2, 1, 0
Expand Down Expand Up @@ -4265,9 +4265,9 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
;
; GFX8-LABEL: umax_i32_varying:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v2, v0
; GFX8-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: s_mov_b64 exec, s[2:3]
; GFX8-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -4318,9 +4318,9 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
;
; GFX9-LABEL: umax_i32_varying:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_mov_b64 exec, s[2:3]
; GFX9-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -4370,9 +4370,9 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
;
; GFX1064-LABEL: umax_i32_varying:
; GFX1064: ; %bb.0: ; %entry
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v2, v0
; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: s_mov_b64 exec, s[2:3]
; GFX1064-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
Expand Down Expand Up @@ -4429,10 +4429,10 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
;
; GFX1032-LABEL: umax_i32_varying:
; GFX1032: ; %bb.0: ; %entry
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: ; implicit-def: $vcc_hi
; GFX1032-NEXT: v_mov_b32_e32 v2, v0
; GFX1032-NEXT: s_or_saveexec_b32 s2, -1
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: s_mov_b32 exec_lo, s2
; GFX1032-NEXT: v_cmp_ne_u32_e64 s2, 1, 0
Expand Down
Loading

0 comments on commit 6e29846

Please sign in to comment.