@@ -3078,8 +3078,6 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm
30783078//
30793079GenTree* Lowering::LowerHWIntrinsicCndSel (GenTreeHWIntrinsic* node)
30803080{
3081- assert (!comp->canUseEvexEncodingDebugOnly ());
3082-
30833081 var_types simdType = node->gtType ;
30843082 CorInfoType simdBaseJitType = node->GetSimdBaseJitType ();
30853083 var_types simdBaseType = node->GetSimdBaseType ();
@@ -3102,17 +3100,38 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node)
31023100 // we can optimize the entire conditional select to
31033101 // a single BlendVariable instruction (if supported by the architecture)
31043102
3105- // TODO-XARCH-AVX512 Use VPBLENDM* and take input directly from K registers if cond is from MoveMaskToVectorSpecial.
31063103 // First, determine if the condition is a per-element mask
31073104 if (op1->OperIsHWIntrinsic () && HWIntrinsicInfo::ReturnsPerElementMask (op1->AsHWIntrinsic ()->GetHWIntrinsicId ()))
31083105 {
31093106 // Next, determine if the target architecture supports BlendVariable
31103107 NamedIntrinsic blendVariableId = NI_Illegal;
31113108
3112- // For Vector256 (simdSize == 32), BlendVariable for floats/doubles is available on AVX, whereas other types
3113- // require AVX2
3114- if (simdSize == 32 )
3109+ bool isOp1CvtMaskToVector = op1-> AsHWIntrinsic ()-> OperIsConvertMaskToVector ();
3110+
3111+ if (( simdSize == 64 ) || isOp1CvtMaskToVector )
31153112 {
3113+ GenTree* maskNode;
3114+
3115+ if (isOp1CvtMaskToVector)
3116+ {
3117+ maskNode = op1->AsHWIntrinsic ()->Op (1 );
3118+ BlockRange ().Remove (op1);
3119+ }
3120+ else
3121+ {
3122+ maskNode = comp->gtNewSimdCvtVectorToMaskNode (TYP_MASK, op1, simdBaseJitType, simdSize);
3123+ BlockRange ().InsertBefore (node, maskNode);
3124+ }
3125+
3126+ assert (maskNode->TypeGet () == TYP_MASK);
3127+ blendVariableId = NI_EVEX_BlendVariableMask;
3128+ op1 = maskNode;
3129+ }
3130+ else if (simdSize == 32 )
3131+ {
3132+ // For Vector256 (simdSize == 32), BlendVariable for floats/doubles
3133+ // is available on AVX, whereas other types (integrals) require AVX2
3134+
31163135 if (varTypeIsFloating (simdBaseType))
31173136 {
31183137 // This should have already been confirmed
@@ -3124,9 +3143,9 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node)
31243143 blendVariableId = NI_AVX2_BlendVariable;
31253144 }
31263145 }
3127- // For Vector128, BlendVariable is available on SSE41
31283146 else if (comp->compOpportunisticallyDependsOn (InstructionSet_SSE41))
31293147 {
3148+ // For Vector128, BlendVariable is available on SSE41
31303149 blendVariableId = NI_SSE41_BlendVariable;
31313150 }
31323151
0 commit comments