-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AMDGPU] Add UniformBinFrag to SALU fminimum/fmaximum patterns. NFCI. #142169
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
SALU patterns should have UniformBinFrag because they can only handle uniform inputs. VALU patterns do not need DivergentBinFrag because they work for both uniform and divergent inputs; instead we can use AddedComplexity to ensure that SALU patterns are preferred.
@llvm/pr-subscribers-backend-amdgpu Author: Jay Foad (jayfoad) ChangesSALU patterns should have UniformBinFrag because they can only handle Full diff: https://github.com/llvm/llvm-project/pull/142169.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 40b3dfb94ce2f..e0a36758534d5 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -922,11 +922,11 @@ let SubtargetPredicate = HasSALUFloatInsts, mayRaiseFPException = 1,
// On GFX12 MIN/MAX instructions do not read MODE register.
let SubtargetPredicate = isGFX12Plus, mayRaiseFPException = 1, isCommutable = 1,
- isReMaterializable = 1, SchedRW = [WriteSFPU], AddedComplexity = 17 in {
- def S_MINIMUM_F32 : SOP2_F32_Inst<"s_minimum_f32", fminimum>;
- def S_MAXIMUM_F32 : SOP2_F32_Inst<"s_maximum_f32", fmaximum>;
- def S_MINIMUM_F16 : SOP2_F16_Inst<"s_minimum_f16", fminimum>;
- def S_MAXIMUM_F16 : SOP2_F16_Inst<"s_maximum_f16", fmaximum>;
+ isReMaterializable = 1, SchedRW = [WriteSFPU], AddedComplexity = 25 in {
+ def S_MINIMUM_F32 : SOP2_F32_Inst<"s_minimum_f32", UniformBinFrag<fminimum>>;
+ def S_MAXIMUM_F32 : SOP2_F32_Inst<"s_maximum_f32", UniformBinFrag<fmaximum>>;
+ def S_MINIMUM_F16 : SOP2_F16_Inst<"s_minimum_f16", UniformBinFrag<fminimum>>;
+ def S_MAXIMUM_F16 : SOP2_F16_Inst<"s_maximum_f16", UniformBinFrag<fmaximum>>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 0252c4f1b0929..594b37bb6e21a 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -168,10 +168,10 @@ defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs
} // End SchedRW = [WriteIntMul]
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0, AddedComplexity = 1 in {
-defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fminimum>>;
-defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fmaximum>>;
-defm V_MINIMUM_F16 : VOP3Inst_t16 <"v_minimum_f16", VOP_F16_F16_F16, DivergentBinFrag<fminimum>>;
-defm V_MAXIMUM_F16 : VOP3Inst_t16 <"v_maximum_f16", VOP_F16_F16_F16, DivergentBinFrag<fmaximum>>;
+defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, fminimum>;
+defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, fmaximum>;
+defm V_MINIMUM_F16 : VOP3Inst_t16 <"v_minimum_f16", VOP_F16_F16_F16, fminimum>;
+defm V_MAXIMUM_F16 : VOP3Inst_t16 <"v_maximum_f16", VOP_F16_F16_F16, fmaximum>;
let SchedRW = [WriteDoubleAdd] in {
defm V_MINIMUM_F64 : VOP3Inst <"v_minimum_f64", VOP3_Profile<VOP_F64_F64_F64>, fminimum>;
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/116/builds/13619 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/138/builds/13981 Here is the relevant piece of the build log for the reference
|
SALU patterns should have UniformBinFrag because they can only handle
uniform inputs. VALU patterns do not need DivergentBinFrag because they
work for both uniform and divergent inputs; instead we can use
AddedComplexity to ensure that SALU patterns are preferred.