@@ -942,28 +942,90 @@ Value *VPInstruction::generate(VPTransformState &State) {
942
942
}
943
943
}
944
944
945
+ std::optional<InstructionCost> VPRecipeWithIRFlags::getCostForRecipeWithOpcode (
946
+ unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const {
947
+ Type *ScalarTy = Ctx.Types .inferScalarType (this );
948
+ Type *ResultTy = VF.isVector () ? toVectorTy (ScalarTy, VF) : ScalarTy;
949
+ switch (Opcode) {
950
+ case Instruction::FNeg:
951
+ return Ctx.TTI .getArithmeticInstrCost (Opcode, ResultTy, Ctx.CostKind );
952
+ case Instruction::UDiv:
953
+ case Instruction::SDiv:
954
+ case Instruction::SRem:
955
+ case Instruction::URem:
956
+ case Instruction::Add:
957
+ case Instruction::FAdd:
958
+ case Instruction::Sub:
959
+ case Instruction::FSub:
960
+ case Instruction::Mul:
961
+ case Instruction::FMul:
962
+ case Instruction::FDiv:
963
+ case Instruction::FRem:
964
+ case Instruction::Shl:
965
+ case Instruction::LShr:
966
+ case Instruction::AShr:
967
+ case Instruction::And:
968
+ case Instruction::Or:
969
+ case Instruction::Xor: {
970
+ TargetTransformInfo::OperandValueInfo RHSInfo = {
971
+ TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None};
972
+
973
+ if (VF.isVector ()) {
974
+ // Certain instructions can be cheaper to vectorize if they have a
975
+ // constant second vector operand. One example of this are shifts on x86.
976
+ VPValue *RHS = getOperand (1 );
977
+ RHSInfo = Ctx.getOperandInfo (RHS);
978
+
979
+ if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
980
+ getOperand (1 )->isDefinedOutsideLoopRegions ())
981
+ RHSInfo.Kind = TargetTransformInfo::OK_UniformValue;
982
+ }
983
+
984
+ Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue ());
985
+ SmallVector<const Value *, 4 > Operands;
986
+ if (CtxI)
987
+ Operands.append (CtxI->value_op_begin (), CtxI->value_op_end ());
988
+ return Ctx.TTI .getArithmeticInstrCost (
989
+ Opcode, ResultTy, Ctx.CostKind ,
990
+ {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
991
+ RHSInfo, Operands, CtxI, &Ctx.TLI );
992
+ }
993
+ case Instruction::Freeze:
994
+ // This opcode is unknown. Assume that it is the same as 'mul'.
995
+ return Ctx.TTI .getArithmeticInstrCost (Instruction::Mul, ResultTy,
996
+ Ctx.CostKind );
997
+ case Instruction::ExtractValue:
998
+ return Ctx.TTI .getInsertExtractValueCost (Instruction::ExtractValue,
999
+ Ctx.CostKind );
1000
+ case Instruction::ICmp:
1001
+ case Instruction::FCmp: {
1002
+ Type *ScalarOpTy = Ctx.Types .inferScalarType (getOperand (0 ));
1003
+ Type *OpTy = VF.isVector () ? toVectorTy (ScalarOpTy, VF) : ScalarOpTy;
1004
+ Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue ());
1005
+ return Ctx.TTI .getCmpSelInstrCost (
1006
+ Opcode, OpTy, CmpInst::makeCmpResultType (OpTy), getPredicate (),
1007
+ Ctx.CostKind , {TTI::OK_AnyValue, TTI::OP_None},
1008
+ {TTI::OK_AnyValue, TTI::OP_None}, CtxI);
1009
+ }
1010
+ }
1011
+ return std::nullopt;
1012
+ }
1013
+
945
1014
InstructionCost VPInstruction::computeCost (ElementCount VF,
946
1015
VPCostContext &Ctx) const {
947
1016
if (Instruction::isBinaryOp (getOpcode ())) {
948
- Type *ResTy = Ctx.Types .inferScalarType (this );
949
- if (!vputils::onlyFirstLaneUsed (this ))
950
- ResTy = toVectorTy (ResTy, VF);
951
-
952
- if (!getUnderlyingValue ()) {
953
- switch (getOpcode ()) {
954
- case Instruction::FMul:
955
- return Ctx.TTI .getArithmeticInstrCost (getOpcode (), ResTy, Ctx.CostKind );
956
- default :
957
- // TODO: Compute cost for VPInstructions without underlying values once
958
- // the legacy cost model has been retired.
959
- return 0 ;
960
- }
1017
+ if (!getUnderlyingValue () && getOpcode () != Instruction::FMul) {
1018
+ // TODO: Compute cost for VPInstructions without underlying values once
1019
+ // the legacy cost model has been retired.
1020
+ return 0 ;
961
1021
}
962
1022
963
1023
assert (!doesGeneratePerAllLanes () &&
964
1024
" Should only generate a vector value or single scalar, not scalars "
965
1025
" for all lanes." );
966
- return Ctx.TTI .getArithmeticInstrCost (getOpcode (), ResTy, Ctx.CostKind );
1026
+ return *getCostForRecipeWithOpcode (
1027
+ getOpcode (),
1028
+ vputils::onlyFirstLaneUsed (this ) ? ElementCount::getFixed (1 ) : VF, Ctx);
967
1029
}
968
1030
969
1031
switch (getOpcode ()) {
@@ -2033,20 +2095,13 @@ void VPWidenRecipe::execute(VPTransformState &State) {
2033
2095
InstructionCost VPWidenRecipe::computeCost (ElementCount VF,
2034
2096
VPCostContext &Ctx) const {
2035
2097
switch (Opcode) {
2036
- case Instruction::FNeg: {
2037
- Type *VectorTy = toVectorTy (Ctx.Types .inferScalarType (this ), VF);
2038
- return Ctx.TTI .getArithmeticInstrCost (
2039
- Opcode, VectorTy, Ctx.CostKind ,
2040
- {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
2041
- {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None});
2042
- }
2043
-
2044
2098
case Instruction::UDiv:
2045
2099
case Instruction::SDiv:
2046
2100
case Instruction::SRem:
2047
2101
case Instruction::URem:
2048
2102
// More complex computation, let the legacy cost-model handle this for now.
2049
2103
return Ctx.getLegacyCost (cast<Instruction>(getUnderlyingValue ()), VF);
2104
+ case Instruction::FNeg:
2050
2105
case Instruction::Add:
2051
2106
case Instruction::FAdd:
2052
2107
case Instruction::Sub:
@@ -2060,45 +2115,12 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
2060
2115
case Instruction::AShr:
2061
2116
case Instruction::And:
2062
2117
case Instruction::Or:
2063
- case Instruction::Xor: {
2064
- VPValue *RHS = getOperand (1 );
2065
- // Certain instructions can be cheaper to vectorize if they have a constant
2066
- // second vector operand. One example of this are shifts on x86.
2067
- TargetTransformInfo::OperandValueInfo RHSInfo = Ctx.getOperandInfo (RHS);
2068
-
2069
- if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
2070
- getOperand (1 )->isDefinedOutsideLoopRegions ())
2071
- RHSInfo.Kind = TargetTransformInfo::OK_UniformValue;
2072
- Type *VectorTy = toVectorTy (Ctx.Types .inferScalarType (this ), VF);
2073
- Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue ());
2074
-
2075
- SmallVector<const Value *, 4 > Operands;
2076
- if (CtxI)
2077
- Operands.append (CtxI->value_op_begin (), CtxI->value_op_end ());
2078
- return Ctx.TTI .getArithmeticInstrCost (
2079
- Opcode, VectorTy, Ctx.CostKind ,
2080
- {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
2081
- RHSInfo, Operands, CtxI, &Ctx.TLI );
2082
- }
2083
- case Instruction::Freeze: {
2084
- // This opcode is unknown. Assume that it is the same as 'mul'.
2085
- Type *VectorTy = toVectorTy (Ctx.Types .inferScalarType (this ), VF);
2086
- return Ctx.TTI .getArithmeticInstrCost (Instruction::Mul, VectorTy,
2087
- Ctx.CostKind );
2088
- }
2089
- case Instruction::ExtractValue: {
2090
- return Ctx.TTI .getInsertExtractValueCost (Instruction::ExtractValue,
2091
- Ctx.CostKind );
2092
- }
2118
+ case Instruction::Xor:
2119
+ case Instruction::Freeze:
2120
+ case Instruction::ExtractValue:
2093
2121
case Instruction::ICmp:
2094
- case Instruction::FCmp: {
2095
- Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue ());
2096
- Type *VectorTy = toVectorTy (Ctx.Types .inferScalarType (getOperand (0 )), VF);
2097
- return Ctx.TTI .getCmpSelInstrCost (
2098
- Opcode, VectorTy, CmpInst::makeCmpResultType (VectorTy), getPredicate (),
2099
- Ctx.CostKind , {TTI::OK_AnyValue, TTI::OP_None},
2100
- {TTI::OK_AnyValue, TTI::OP_None}, CtxI);
2101
- }
2122
+ case Instruction::FCmp:
2123
+ return *getCostForRecipeWithOpcode (getOpcode (), VF, Ctx);
2102
2124
default :
2103
2125
llvm_unreachable (" Unsupported opcode for instruction" );
2104
2126
}
@@ -2972,7 +2994,6 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
2972
2994
// transform, avoid computing their cost multiple times for now.
2973
2995
Ctx.SkipCostComputation .insert (UI);
2974
2996
2975
- Type *ResultTy = Ctx.Types .inferScalarType (this );
2976
2997
switch (UI->getOpcode ()) {
2977
2998
case Instruction::GetElementPtr:
2978
2999
// We mark this instruction as zero-cost because the cost of GEPs in
@@ -2996,6 +3017,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
2996
3017
SmallVector<Type *, 4 > Tys;
2997
3018
for (VPValue *ArgOp : drop_end (operands ()))
2998
3019
Tys.push_back (Ctx.Types .inferScalarType (ArgOp));
3020
+ Type *ResultTy = Ctx.Types .inferScalarType (this );
2999
3021
return Ctx.TTI .getCallInstrCost (CalledFn, ResultTy, Tys, Ctx.CostKind );
3000
3022
}
3001
3023
case Instruction::Add:
@@ -3012,12 +3034,8 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
3012
3034
case Instruction::And:
3013
3035
case Instruction::Or:
3014
3036
case Instruction::Xor: {
3015
- auto Op2Info = Ctx.getOperandInfo (getOperand (1 ));
3016
- SmallVector<const Value *, 4 > Operands (UI->operand_values ());
3017
- return Ctx.TTI .getArithmeticInstrCost (
3018
- UI->getOpcode (), ResultTy, Ctx.CostKind ,
3019
- {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
3020
- Op2Info, Operands, UI, &Ctx.TLI ) *
3037
+ return *getCostForRecipeWithOpcode (getOpcode (), ElementCount::getFixed (1 ),
3038
+ Ctx) *
3021
3039
(isSingleScalar () ? 1 : VF.getFixedValue ());
3022
3040
}
3023
3041
}
0 commit comments