Skip to content

release/19.x: [AMDGPU] Disable inline constants for pseudo scalar transcendentals (#104395) #105472

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -1289,6 +1289,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }

/// \returns true if inline constants are not supported for F16 pseudo
/// scalar transcendentals.
bool hasNoF16PseudoScalarTransInlineConstants() const {
return getGeneration() == GFX12;
}

/// \returns The maximum number of instructions that can be enclosed in an
/// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
/// instruction.
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5768,6 +5768,10 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
return false;
}
}
} else if (ST.hasNoF16PseudoScalarTransInlineConstants() && !MO->isReg() &&
isF16PseudoScalarTrans(MI.getOpcode()) &&
isInlineConstant(*MO, OpInfo)) {
return false;
}

if (MO->isReg()) {
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -946,6 +946,14 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
Opcode == AMDGPU::DS_GWS_BARRIER;
}

static bool isF16PseudoScalarTrans(unsigned Opcode) {
return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
Opcode == AMDGPU::V_S_LOG_F16_e64 ||
Opcode == AMDGPU::V_S_RCP_F16_e64 ||
Opcode == AMDGPU::V_S_RSQ_F16_e64 ||
Opcode == AMDGPU::V_S_SQRT_F16_e64;
}

static bool doesNotReadTiedSource(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
}
Expand Down
120 changes: 120 additions & 0 deletions llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s

# Do not use inline constants for f16 pseudo scalar transcendentals.
# But allow literal constants.

---
name: exp_f16_imm
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: exp_f16_imm
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
; GCN-NEXT: [[V_S_EXP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_EXP_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
%0:sgpr_32 = S_MOV_B32 15360
%1:sgpr_32 = V_S_EXP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
...

---
name: exp_f16_literal
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: exp_f16_literal
; GCN: [[V_S_EXP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_EXP_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
%0:sgpr_32 = S_MOV_B32 16960
%1:sgpr_32 = V_S_EXP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
...

---
name: log_f16_imm
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: log_f16_imm
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
; GCN-NEXT: [[V_S_LOG_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_LOG_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
%0:sgpr_32 = S_MOV_B32 15360
%1:sgpr_32 = V_S_LOG_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
...

---
name: log_f16_literal
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: log_f16_literal
; GCN: [[V_S_LOG_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_LOG_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
%0:sgpr_32 = S_MOV_B32 16960
%1:sgpr_32 = V_S_LOG_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
...

---
name: rcp_f16_imm
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: rcp_f16_imm
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
; GCN-NEXT: [[V_S_RCP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RCP_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
%0:sgpr_32 = S_MOV_B32 15360
%1:sgpr_32 = V_S_RCP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
...

---
name: rcp_f16_literal
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: rcp_f16_literal
; GCN: [[V_S_RCP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RCP_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
%0:sgpr_32 = S_MOV_B32 16960
%1:sgpr_32 = V_S_RCP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
...

---
name: rsq_f16_imm
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: rsq_f16_imm
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
; GCN-NEXT: [[V_S_RSQ_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RSQ_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
%0:sgpr_32 = S_MOV_B32 15360
%1:sgpr_32 = V_S_RSQ_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
...

---
name: rsq_f16_literal
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: rsq_f16_literal
; GCN: [[V_S_RSQ_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RSQ_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
%0:sgpr_32 = S_MOV_B32 16960
%1:sgpr_32 = V_S_RSQ_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
...

---
name: sqrt_f16_imm
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: sqrt_f16_imm
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
; GCN-NEXT: [[V_S_SQRT_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_SQRT_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
%0:sgpr_32 = S_MOV_B32 15360
%1:sgpr_32 = V_S_SQRT_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
...

---
name: sqrt_f16_literal
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: sqrt_f16_literal
; GCN: [[V_S_SQRT_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_SQRT_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
%0:sgpr_32 = S_MOV_B32 16960
%1:sgpr_32 = V_S_SQRT_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
...
Loading