allow any baseline intrinsics in lowering

saucecontrol · saucecontrol · commit 8ac975e20425 · 2025-05-14T15:57:02.000-07:00
diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h
@@ -546,6 +546,7 @@ struct HWIntrinsicInfo
     static bool isScalarIsa(CORINFO_InstructionSet isa);
 
 #ifdef TARGET_XARCH
+    static bool                isBaselineIsa(CORINFO_InstructionSet isa);
     static bool                isAVX2GatherIntrinsic(NamedIntrinsic id);
     static FloatComparisonMode lookupFloatComparisonModeForSwappedArgs(FloatComparisonMode comparison);
     static NamedIntrinsic      lookupIdForFloatComparisonMode(NamedIntrinsic      intrinsic,
diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
@@ -403,8 +403,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
     GenTree*               embMaskNode = nullptr;
     GenTree*               embMaskOp   = nullptr;
 
-    // We need to validate that other phases of the compiler haven't introduced unsupported intrinsics
-    assert(compiler->compIsaSupportedDebugOnly(isa));
+    // We need to validate that other phases of the compiler haven't introduced unsupported intrinsics.
+    // We allow an exception for baseline intrinsics to be introduced unconditionally in LIR.
+    assert(compiler->compIsaSupportedDebugOnly(isa) || HWIntrinsicInfo::isBaselineIsa(isa));
     assert(HWIntrinsicInfo::RequiresCodegen(intrinsicId));
     assert(!HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsicId) || !varTypeIsSmall(node->GetSimdBaseType()));
 
@@ -1827,7 +1828,6 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
     regNumber      targetReg   = node->GetRegNum();
     var_types      baseType    = node->GetSimdBaseType();
 
-    assert(compiler->compIsaSupportedDebugOnly(InstructionSet_SSE));
     assert((baseType >= TYP_BYTE) && (baseType <= TYP_DOUBLE));
 
     GenTree* op1 = (node->GetOperandCount() >= 1) ? node->Op(1) : nullptr;
diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp
@@ -855,6 +855,39 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic
     }
 }
 
+//------------------------------------------------------------------------
+// isBaselineIsa: Gets a value that indicates whether the InstructionSet is
+// part of the required hardware support for this platform
+//
+// Arguments:
+//    isa - The InstructionSet to check
+//
+// Return Value:
+//    true if isa is part of the baseline; otherwise, false
+bool HWIntrinsicInfo::isBaselineIsa(CORINFO_InstructionSet isa)
+{
+    switch (isa)
+    {
+        case InstructionSet_X86Base:
+        case InstructionSet_SSE:
+        case InstructionSet_SSE2:
+#ifdef TARGET_AMD64
+        case InstructionSet_X86Base_X64:
+        case InstructionSet_SSE_X64:
+        case InstructionSet_SSE2_X64:
+#endif // TARGET_AMD64
+        case InstructionSet_Vector128:
+        {
+            return true;
+        }
+
+        default:
+        {
+            return false;
+        }
+    }
+}
+
 //------------------------------------------------------------------------
 // isFullyImplementedIsa: Gets a value that indicates whether the InstructionSet is fully implemented
 //
diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
@@ -868,6 +868,11 @@ void Lowering::LowerCast(GenTree* tree)
     {
         // If we don't have AVX10v2 saturating conversion instructions for
         // floating->integral, we have to handle the saturation logic here.
+        //
+        // Since this implements ordinary casts, we bend the normal rules around ISA support
+        // for HWIntrinsics and assume the baseline ISA set (SSE2 and below) is available.
+        // For this reason, we eschew most gentree convenience methods (e.g. gtNewSimdBinOpNode)
+        // and create the HWIntrinsic nodes explicitly, as most helpers assert ISA support.
 
         JITDUMP("LowerCast before:\n");
         DISPTREERANGE(BlockRange(), tree);
@@ -904,8 +909,8 @@ void Lowering::LowerCast(GenTree* tree)
             GenTree* zero = comp->gtNewZeroConNode(TYP_SIMD16);
             GenTree* fixupVal =
                 comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, srcVector, zero, maxScalarIntrinsic, srcBaseType, 16);
-
-            GenTree* toScalar = comp->gtNewSimdToScalarNode(srcType, fixupVal, srcBaseType, 16);
+            GenTree* toScalar =
+                comp->gtNewSimdHWIntrinsicNode(srcType, fixupVal, NI_Vector128_ToScalar, srcBaseType, 16);
 
             castRange.InsertAtEnd(zero);
             castRange.InsertAtEnd(fixupVal);
@@ -915,9 +920,6 @@ void Lowering::LowerCast(GenTree* tree)
         }
         else
         {
-            assert(comp->IsBaselineSimdIsaSupportedDebugOnly());
-            assert(!TargetArchitecture::Is64Bit || comp->compIsaSupportedDebugOnly(InstructionSet_SSE2_X64));
-
             // We need to fix up NaN as well as handle possible overflow. Signed conversions
             // return int/long.MinValue for any overflow, which is correct for saturation of
             // negative, but the result must be replaced with MaxValue for positive overflow.
@@ -953,16 +955,14 @@ void Lowering::LowerCast(GenTree* tree)
                     if (srcType == TYP_FLOAT)
                     {
                         maxFloatSimdVal->f32[0] = 4294967296.0f;
-                        convertIntrinsic        = comp->compOpportunisticallyDependsOn(InstructionSet_SSE_X64)
-                                                      ? NI_SSE_X64_ConvertToInt64WithTruncation
-                                                      : NI_SSE2_ConvertToVector128Int32WithTruncation;
+                        convertIntrinsic        = TargetArchitecture::Is64Bit ? NI_SSE_X64_ConvertToInt64WithTruncation
+                                                                              : NI_SSE2_ConvertToVector128Int32WithTruncation;
                     }
                     else
                     {
                         maxFloatSimdVal->f64[0] = 4294967296.0;
-                        convertIntrinsic        = comp->compOpportunisticallyDependsOn(InstructionSet_SSE2_X64)
-                                                      ? NI_SSE2_X64_ConvertToInt64WithTruncation
-                                                      : NI_SSE2_ConvertToVector128Int32WithTruncation;
+                        convertIntrinsic        = TargetArchitecture::Is64Bit ? NI_SSE2_X64_ConvertToInt64WithTruncation
+                                                                              : NI_SSE2_ConvertToVector128Int32WithTruncation;
                     }
                     break;
                 }
@@ -1023,6 +1023,7 @@ void Lowering::LowerCast(GenTree* tree)
                 //   var fixupVal = Sse.And(srcVec, nanMask);
                 //   convertResult = Sse.ConvertToInt32WithTruncation(fixupVal);
 
+                NamedIntrinsic andIntrinsic = (srcType == TYP_FLOAT) ? NI_SSE_And : NI_SSE2_And;
                 NamedIntrinsic compareNaNIntrinsic =
                     (srcType == TYP_FLOAT) ? NI_SSE_CompareScalarOrdered : NI_SSE2_CompareScalarOrdered;
 
@@ -1033,8 +1034,9 @@ void Lowering::LowerCast(GenTree* tree)
                 castRange.InsertAtEnd(srcClone);
                 castRange.InsertAtEnd(nanMask);
 
-                srcClone          = comp->gtClone(srcVector);
-                GenTree* fixupVal = comp->gtNewSimdBinOpNode(GT_AND, TYP_SIMD16, nanMask, srcClone, srcBaseType, 16);
+                srcClone = comp->gtClone(srcVector);
+                GenTree* fixupVal =
+                    comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, nanMask, srcClone, andIntrinsic, srcBaseType, 16);
 
                 castRange.InsertAtEnd(srcClone);
                 castRange.InsertAtEnd(fixupVal);
@@ -1120,15 +1122,16 @@ void Lowering::LowerCast(GenTree* tree)
                             // This creates the equivalent of the following C# code:
                             //   floorVal = ((srcVector.AsUInt64() >>> 21) << 21).AsDouble();
 
-                            GenTree* twentyOne  = comp->gtNewIconNode(21);
-                            GenTree* rightShift = comp->gtNewSimdBinOpNode(GT_RSZ, TYP_SIMD16, floorVal, twentyOne,
-                                                                           CORINFO_TYPE_ULONG, 16);
+                            GenTree* twentyOne = comp->gtNewIconNode(21);
+                            GenTree* rightShift =
+                                comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, floorVal, twentyOne,
+                                                               NI_SSE2_ShiftRightLogical, CORINFO_TYPE_ULONG, 16);
                             castRange.InsertAtEnd(twentyOne);
                             castRange.InsertAtEnd(rightShift);
 
                             twentyOne = comp->gtClone(twentyOne);
-                            floorVal  = comp->gtNewSimdBinOpNode(GT_LSH, TYP_SIMD16, rightShift, twentyOne,
-                                                                 CORINFO_TYPE_ULONG, 16);
+                            floorVal  = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, rightShift, twentyOne,
+                                                                       NI_SSE2_ShiftLeftLogical, CORINFO_TYPE_ULONG, 16);
                             castRange.InsertAtEnd(twentyOne);
                             castRange.InsertAtEnd(floorVal);
                         }
@@ -1191,21 +1194,23 @@ void Lowering::LowerCast(GenTree* tree)
 
                             GenTree* thirtyOne = comp->gtNewIconNode(31);
                             GenTree* mask =
-                                comp->gtNewSimdBinOpNode(GT_RSH, TYP_SIMD16, result, thirtyOne, CORINFO_TYPE_INT, 16);
+                                comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, result, thirtyOne,
+                                                               NI_SSE2_ShiftRightArithmetic, CORINFO_TYPE_INT, 16);
                             GenTree* andMask =
-                                comp->gtNewSimdBinOpNode(GT_AND, TYP_SIMD16, mask, negated, dstBaseType, 16);
+                                comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, mask, negated, NI_SSE2_And, dstBaseType, 16);
 
                             castRange.InsertAtEnd(thirtyOne);
                             castRange.InsertAtEnd(mask);
                             castRange.InsertAtEnd(andMask);
 
-                            convertResult =
-                                comp->gtNewSimdBinOpNode(GT_OR, TYP_SIMD16, andMask, resultClone, dstBaseType, 16);
+                            convertResult = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, andMask, resultClone, NI_SSE2_Or,
+                                                                           dstBaseType, 16);
                         }
 
                         // Because the results are in a SIMD register, we need to ToScalar() them out.
                         castRange.InsertAtEnd(convertResult);
-                        convertResult = comp->gtNewSimdToScalarNode(TYP_INT, convertResult, dstBaseType, 16);
+                        convertResult = comp->gtNewSimdHWIntrinsicNode(TYP_INT, convertResult, NI_Vector128_ToScalar,
+                                                                       dstBaseType, 16);
                     }
                     else
                     {
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
@@ -294,20 +294,14 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
     if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
     {
         if (srcType == TYP_FLOAT
-#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
-            // Arm64: src = float, dst is overflow conversion.
+#if defined(TARGET_64BIT)
+            // 64-bit: src = float, dst is overflow conversion.
             // This goes through helper and hence src needs to be converted to double.
             && tree->gtOverflow()
-#elif defined(TARGET_AMD64)
-            // Amd64: src = float, dst = overflow conversion or SSE2 is not enabled
-            && (tree->gtOverflow() || !IsBaselineSimdIsaSupported())
-#elif defined(TARGET_ARM)
-            // Arm: src = float, dst = int64/uint64 or overflow conversion.
-            && (tree->gtOverflow() || varTypeIsLong(dstType))
 #else
-            // x86: src = float, dst = int64/uint64 or overflow conversion or SSE2 is not enabled
-            && (tree->gtOverflow() || varTypeIsLong(dstType) || !IsBaselineSimdIsaSupported())
-#endif
+            // 32-bit: src = float, dst = int64/uint64 or overflow conversion.
+            && (tree->gtOverflow() || varTypeIsLong(dstType))
+#endif // TARGET_64BIT
         )
         {
             oper = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE);
@@ -328,39 +322,24 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
         {
             if (!tree->gtOverflow())
             {
-// ARM64 and LoongArch64 optimize all non-overflow checking conversions
-#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
+#ifdef TARGET_64BIT
                 return nullptr;
 #else
-#if defined(TARGET_XARCH)
-                if (IsBaselineSimdIsaSupported() && (!varTypeIsLong(dstType) || TargetArchitecture::Is64Bit))
+                if (!varTypeIsLong(dstType))
                 {
                     return nullptr;
                 }
-#endif // TARGET_XARCH
+
                 switch (dstType)
                 {
-                    case TYP_INT:
-#ifdef TARGET_XARCH
-                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
-#endif // TARGET_XARCH
-                        return nullptr;
-
-                    case TYP_UINT:
-#if defined(TARGET_ARM)
-                        return nullptr;
-#endif
-                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
-
                     case TYP_LONG:
                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
-
                     case TYP_ULONG:
                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
                     default:
                         unreached();
                 }
-#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64
+#endif // TARGET_64BIT
             }
             else
             {
diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp
@@ -826,6 +826,10 @@ Compiler::fgWalkResult Rationalizer::RationalizeVisitor::PreOrderVisit(GenTree**
 #if defined(FEATURE_HW_INTRINSICS)
     else if (node->OperIsHWIntrinsic())
     {
+        // All intrinsics introduced in HIR must be explicitly supported.
+        NamedIntrinsic intrinsicId = node->AsHWIntrinsic()->GetHWIntrinsicId();
+        assert(m_compiler->compIsaSupportedDebugOnly(HWIntrinsicInfo::lookupIsa(intrinsicId)));
+
         if (node->AsHWIntrinsic()->IsUserCall())
         {
             m_rationalizer.RewriteHWIntrinsicAsUserCall(use, this->m_ancestors);

Original file line number	Diff line number	Diff line change
`@@ -826,6 +826,10 @@ Compiler::fgWalkResult Rationalizer::RationalizeVisitor::PreOrderVisit(GenTree**`
`826`	`826`	`#if defined(FEATURE_HW_INTRINSICS)`
`827`	`827`	`else if (node->OperIsHWIntrinsic())`
`828`	`828`	`{`
	`829`	`+ // All intrinsics introduced in HIR must be explicitly supported.`
	`830`	`+ NamedIntrinsic intrinsicId = node->AsHWIntrinsic()->GetHWIntrinsicId();`
	`831`	`+ assert(m_compiler->compIsaSupportedDebugOnly(HWIntrinsicInfo::lookupIsa(intrinsicId)));`
	`832`	`+`
`829`	`833`	`if (node->AsHWIntrinsic()->IsUserCall())`
`830`	`834`	`{`
`831`	`835`	`m_rationalizer.RewriteHWIntrinsicAsUserCall(use, this->m_ancestors);`