-
Notifications
You must be signed in to change notification settings - Fork 13.5k
release/19.x: [AMDGPU] Disable inline constants for pseudo scalar transcendentals (#104395) #105472
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@arsenm What do you think about merging this PR to the release branch? |
@llvm/pr-subscribers-backend-amdgpu Author: None (llvmbot) ChangesBackport fc6300a Requested by: @perlfu Full diff: https://github.com/llvm/llvm-project/pull/105472.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index def89c785b8552..902f51ae358d59 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1289,6 +1289,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
+ /// \returns true if inline constants are not supported for F16 pseudo
+ /// scalar transcendentals.
+ bool hasNoF16PseudoScalarTransInlineConstants() const {
+ return getGeneration() == GFX12;
+ }
+
/// \returns The maximum number of instructions that can be enclosed in an
/// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
/// instruction.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 463737f645d459..27b8c1b17422af 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5768,6 +5768,10 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
return false;
}
}
+ } else if (ST.hasNoF16PseudoScalarTransInlineConstants() && !MO->isReg() &&
+ isF16PseudoScalarTrans(MI.getOpcode()) &&
+ isInlineConstant(*MO, OpInfo)) {
+ return false;
}
if (MO->isReg()) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 1712dfe8d406cc..91855fb14f6f37 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -946,6 +946,14 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
Opcode == AMDGPU::DS_GWS_BARRIER;
}
+ static bool isF16PseudoScalarTrans(unsigned Opcode) {
+ return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
+ Opcode == AMDGPU::V_S_LOG_F16_e64 ||
+ Opcode == AMDGPU::V_S_RCP_F16_e64 ||
+ Opcode == AMDGPU::V_S_RSQ_F16_e64 ||
+ Opcode == AMDGPU::V_S_SQRT_F16_e64;
+ }
+
static bool doesNotReadTiedSource(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
}
diff --git a/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir
new file mode 100644
index 00000000000000..17bed38bd046d7
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir
@@ -0,0 +1,120 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
+
+# Do not use inline constants for f16 pseudo scalar transcendentals.
+# But allow literal constants.
+
+---
+name: exp_f16_imm
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: exp_f16_imm
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
+ ; GCN-NEXT: [[V_S_EXP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_EXP_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+ %0:sgpr_32 = S_MOV_B32 15360
+ %1:sgpr_32 = V_S_EXP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: exp_f16_literal
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: exp_f16_literal
+ ; GCN: [[V_S_EXP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_EXP_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
+ %0:sgpr_32 = S_MOV_B32 16960
+ %1:sgpr_32 = V_S_EXP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: log_f16_imm
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: log_f16_imm
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
+ ; GCN-NEXT: [[V_S_LOG_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_LOG_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+ %0:sgpr_32 = S_MOV_B32 15360
+ %1:sgpr_32 = V_S_LOG_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: log_f16_literal
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: log_f16_literal
+ ; GCN: [[V_S_LOG_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_LOG_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
+ %0:sgpr_32 = S_MOV_B32 16960
+ %1:sgpr_32 = V_S_LOG_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: rcp_f16_imm
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: rcp_f16_imm
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
+ ; GCN-NEXT: [[V_S_RCP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RCP_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+ %0:sgpr_32 = S_MOV_B32 15360
+ %1:sgpr_32 = V_S_RCP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: rcp_f16_literal
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: rcp_f16_literal
+ ; GCN: [[V_S_RCP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RCP_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
+ %0:sgpr_32 = S_MOV_B32 16960
+ %1:sgpr_32 = V_S_RCP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: rsq_f16_imm
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: rsq_f16_imm
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
+ ; GCN-NEXT: [[V_S_RSQ_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RSQ_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+ %0:sgpr_32 = S_MOV_B32 15360
+ %1:sgpr_32 = V_S_RSQ_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: rsq_f16_literal
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: rsq_f16_literal
+ ; GCN: [[V_S_RSQ_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RSQ_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
+ %0:sgpr_32 = S_MOV_B32 16960
+ %1:sgpr_32 = V_S_RSQ_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: sqrt_f16_imm
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: sqrt_f16_imm
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
+ ; GCN-NEXT: [[V_S_SQRT_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_SQRT_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+ %0:sgpr_32 = S_MOV_B32 15360
+ %1:sgpr_32 = V_S_SQRT_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: sqrt_f16_literal
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: sqrt_f16_literal
+ ; GCN: [[V_S_SQRT_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_SQRT_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
+ %0:sgpr_32 = S_MOV_B32 16960
+ %1:sgpr_32 = V_S_SQRT_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
|
…lvm#104395) Prevent operand folding from inlining constants into pseudo scalar transcendental f16 instructions. However still allow literal constants. (cherry picked from commit fc6300a)
@perlfu (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
Backport fc6300a
Requested by: @perlfu