@@ -2330,40 +2330,132 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
23302330 // Vector256<long> div_i64 = Vector256.ConvertToInt64(div_f64);
23312331 // Vector128<int> div_i32 = Vector256.Narrow(div_i64.GetLower(), div_i64.GetUpper());
23322332 // return div_i32;
2333- regNumber op2Reg = op2->GetRegNum ();
2334- regNumber tmpReg1 = internalRegisters.Extract (node, RBM_ALLFLOAT);
2333+ regNumber op2Reg = op2->GetRegNum ();
2334+ regNumber tmpReg1 = REG_NA;
2335+ if (!compiler->compOpportunisticallyDependsOn (InstructionSet_AVX512))
2336+ {
2337+ tmpReg1 = internalRegisters.Extract (node, compiler->compOpportunisticallyDependsOn (InstructionSet_AVX)
2338+ ? RBM_ALLFLOAT
2339+ : SRBM_XMM0);
2340+ }
23352341 regNumber tmpReg2 = internalRegisters.Extract (node, RBM_ALLFLOAT);
2336- emitAttr typeSize = emitTypeSize (node->TypeGet ());
2342+ regNumber tmpReg3 = internalRegisters.Extract (node, RBM_ALLFLOAT);
2343+ var_types nodeType = node->TypeGet ();
2344+ emitAttr typeSize = emitTypeSize (nodeType);
23372345 noway_assert (typeSize == EA_16BYTE || typeSize == EA_32BYTE);
2338- emitAttr divTypeSize = typeSize == EA_16BYTE ? EA_32BYTE : EA_64BYTE ;
2346+ emitAttr divTypeSize = typeSize;
23392347
2340- simd_t negOneIntVec = simd_t::AllBitsSet ();
2341- simd_t minValueInt{};
2342- int numElements = genTypeSize (node->TypeGet ()) / 4 ;
2343- for (int i = 0 ; i < numElements; i++)
2348+ if (compiler->compOpportunisticallyDependsOn (InstructionSet_AVX512))
23442349 {
2345- minValueInt. i32 [i] = INT_MIN ;
2350+ divTypeSize = typeSize == EA_16BYTE ? EA_32BYTE : EA_64BYTE ;
23462351 }
2347- CORINFO_FIELD_HANDLE minValueFld = emit->emitSimdConst (&minValueInt, typeSize);
2348- CORINFO_FIELD_HANDLE negOneFld = emit->emitSimdConst (&negOneIntVec, typeSize);
2352+ else if (compiler->compOpportunisticallyDependsOn (InstructionSet_AVX) && typeSize == EA_16BYTE)
2353+ {
2354+ divTypeSize = EA_32BYTE;
2355+ }
2356+ simd_t negOneIntVec = simd_t::AllBitsSet ();
2357+ CORINFO_FIELD_HANDLE negOneFld = emit->emitSimdConst (&negOneIntVec, typeSize);
23492358
23502359 // div-by-zero check
2351- emit->emitIns_SIMD_R_R_R (INS_xorpd, typeSize, tmpReg1, tmpReg1, tmpReg1 , instOptions);
2352- emit->emitIns_SIMD_R_R_R (INS_pcmpeqd, typeSize, tmpReg1, tmpReg1 , op2Reg, instOptions);
2353- emit->emitIns_R_R (INS_ptest, typeSize, tmpReg1, tmpReg1 , instOptions);
2360+ emit->emitIns_SIMD_R_R_R (INS_xorpd, typeSize, tmpReg2, tmpReg2, tmpReg2 , instOptions);
2361+ emit->emitIns_SIMD_R_R_R (INS_pcmpeqd, typeSize, tmpReg2, tmpReg2 , op2Reg, instOptions);
2362+ emit->emitIns_R_R (INS_ptest, typeSize, tmpReg2, tmpReg2 , instOptions);
23542363 genJumpToThrowHlpBlk (EJ_jne, SCK_DIV_BY_ZERO);
23552364
23562365 // overflow check
2357- emit->emitIns_SIMD_R_R_C (INS_pcmpeqd, typeSize, tmpReg1, op1Reg, minValueFld, 0 , instOptions);
2358- emit->emitIns_SIMD_R_R_C (INS_pcmpeqd, typeSize, tmpReg2, op2Reg, negOneFld, 0 , instOptions);
2359- emit->emitIns_SIMD_R_R_R (INS_pandd, typeSize, tmpReg1, tmpReg1, tmpReg2, instOptions);
2360- emit->emitIns_R_R (INS_ptest, typeSize, tmpReg1, tmpReg1, instOptions);
2361- genJumpToThrowHlpBlk (EJ_jne, SCK_OVERFLOW);
2362-
2363- emit->emitIns_R_R (INS_cvtdq2pd, divTypeSize, tmpReg1, op1Reg, instOptions);
2364- emit->emitIns_R_R (INS_cvtdq2pd, divTypeSize, tmpReg2, op2Reg, instOptions);
2365- emit->emitIns_SIMD_R_R_R (INS_divpd, divTypeSize, targetReg, tmpReg1, tmpReg2, instOptions);
2366- emit->emitIns_R_R (INS_cvttpd2dq, divTypeSize, targetReg, targetReg, instOptions);
2366+ if (varTypeIsSigned (baseType))
2367+ {
2368+ simd_t minValueInt{};
2369+ int numElements = genTypeSize (nodeType) / 4 ;
2370+ for (int i = 0 ; i < numElements; i++)
2371+ {
2372+ minValueInt.i32 [i] = INT_MIN;
2373+ }
2374+ CORINFO_FIELD_HANDLE minValueFld = emit->emitSimdConst (&minValueInt, typeSize);
2375+
2376+ emit->emitIns_SIMD_R_R_C (INS_pcmpeqd, typeSize, tmpReg2, op1Reg, minValueFld, 0 , instOptions);
2377+ emit->emitIns_SIMD_R_R_C (INS_pcmpeqd, typeSize, tmpReg3, op2Reg, negOneFld, 0 , instOptions);
2378+ emit->emitIns_SIMD_R_R_R (INS_pandd, typeSize, tmpReg2, tmpReg2, tmpReg3, instOptions);
2379+ emit->emitIns_R_R (INS_ptest, typeSize, tmpReg2, tmpReg2, instOptions);
2380+ genJumpToThrowHlpBlk (EJ_jne, SCK_OVERFLOW);
2381+ emit->emitIns_R_R (INS_cvtdq2pd, divTypeSize, tmpReg2, op1Reg, instOptions);
2382+ emit->emitIns_R_R (INS_cvtdq2pd, divTypeSize, tmpReg3, op2Reg, instOptions);
2383+ }
2384+ else if (compiler->compOpportunisticallyDependsOn (InstructionSet_AVX512))
2385+ {
2386+ emit->emitIns_R_R (INS_vcvtudq2pd, divTypeSize, tmpReg2, op1Reg, instOptions);
2387+ emit->emitIns_R_R (INS_vcvtudq2pd, divTypeSize, tmpReg3, op2Reg, instOptions);
2388+ }
2389+ else
2390+ {
2391+ simd_t double2To32Const{};
2392+ int numElements = genTypeSize (nodeType) / 2 ;
2393+ for (int i = 0 ; i < numElements; i++)
2394+ {
2395+ double2To32Const.f64 [i] = 4294967296.0 ; // 2^32
2396+ }
2397+ CORINFO_FIELD_HANDLE double2To32ConstFld = emit->emitSimdConst (&double2To32Const, divTypeSize);
2398+
2399+ // Convert uint -> double
2400+ // tmpReg2 = double(op1Reg)
2401+ // tmpReg3 = double(op2Reg)
2402+ if (compiler->compOpportunisticallyDependsOn (InstructionSet_AVX))
2403+ {
2404+ emit->emitIns_R_R (INS_cvtdq2pd, divTypeSize, tmpReg1, op1Reg, instOptions);
2405+ emit->emitIns_Mov (INS_movups, divTypeSize, tmpReg2, tmpReg1, false , instOptions);
2406+ emit->emitIns_R_C (INS_addpd, divTypeSize, tmpReg2, double2To32ConstFld, instOptions);
2407+ emit->emitIns_SIMD_R_R_R_R (INS_blendvpd, divTypeSize, tmpReg2, tmpReg1, tmpReg2, tmpReg1,
2408+ instOptions);
2409+
2410+ emit->emitIns_R_R (INS_cvtdq2pd, divTypeSize, tmpReg1, op2Reg, instOptions);
2411+ emit->emitIns_Mov (INS_movups, divTypeSize, tmpReg3, tmpReg1, false , instOptions);
2412+ emit->emitIns_R_C (INS_addpd, divTypeSize, tmpReg3, double2To32ConstFld, instOptions);
2413+ emit->emitIns_SIMD_R_R_R_R (INS_blendvpd, divTypeSize, tmpReg3, tmpReg1, tmpReg3, tmpReg1,
2414+ instOptions);
2415+ }
2416+ else
2417+ {
2418+ emit->emitIns_R_R (INS_cvtdq2pd, divTypeSize, tmpReg1, op1Reg, instOptions);
2419+ emit->emitIns_Mov (INS_movups, typeSize, tmpReg2, tmpReg1, false , instOptions);
2420+ emit->emitIns_R_C (INS_addpd, typeSize, tmpReg2, double2To32ConstFld, instOptions);
2421+ emit->emitIns_R_R (INS_blendvpd, typeSize, tmpReg1, tmpReg2, instOptions);
2422+ emit->emitIns_Mov (INS_movups, typeSize, tmpReg2, tmpReg1, instOptions);
2423+
2424+ emit->emitIns_R_R (INS_cvtdq2pd, divTypeSize, tmpReg1, op2Reg, instOptions);
2425+ emit->emitIns_Mov (INS_movups, typeSize, tmpReg3, tmpReg1, false , instOptions);
2426+ emit->emitIns_R_C (INS_addpd, typeSize, tmpReg3, double2To32ConstFld, instOptions);
2427+ emit->emitIns_R_R (INS_blendvpd, typeSize, tmpReg1, tmpReg3, instOptions);
2428+ emit->emitIns_Mov (INS_movups, typeSize, tmpReg3, tmpReg1, instOptions);
2429+ }
2430+ }
2431+
2432+ if (varTypeIsSigned (baseType) || compiler->compOpportunisticallyDependsOn (InstructionSet_AVX512))
2433+ {
2434+ emit->emitIns_SIMD_R_R_R (INS_divpd, divTypeSize, targetReg, tmpReg2, tmpReg3, instOptions);
2435+ emit->emitIns_R_R (varTypeIsSigned (baseType) ? INS_cvttpd2dq : INS_vcvttpd2udq, divTypeSize, targetReg,
2436+ targetReg, instOptions);
2437+ }
2438+ else
2439+ {
2440+ assert (varTypeIsUnsigned (baseType));
2441+ emit->emitIns_SIMD_R_R_R (INS_divpd, divTypeSize, tmpReg1, tmpReg2, tmpReg3, instOptions);
2442+
2443+ if (compiler->compOpportunisticallyDependsOn (InstructionSet_AVX))
2444+ {
2445+ emit->emitIns_R_R (INS_cvttpd2dq, divTypeSize, tmpReg3, tmpReg1, instOptions);
2446+ emit->emitIns_Mov (INS_movups, typeSize, tmpReg1, op1Reg, instOptions);
2447+ emit->emitIns_SIMD_R_R_R_R (INS_blendvpd, typeSize, targetReg, tmpReg3, tmpReg1, tmpReg3,
2448+ instOptions);
2449+ }
2450+ else
2451+ {
2452+ emit->emitIns_R_R (INS_cvttpd2dq, divTypeSize, tmpReg1, tmpReg1, instOptions);
2453+ emit->emitIns_Mov (INS_movups, typeSize, tmpReg2, op1Reg, instOptions);
2454+ emit->emitIns_R_R (INS_blendvpd, typeSize, tmpReg1, tmpReg2, instOptions);
2455+ emit->emitIns_Mov (INS_movups, typeSize, targetReg, tmpReg1, false );
2456+ }
2457+ }
2458+
23672459 break ;
23682460 }
23692461
0 commit comments