@@ -2360,10 +2360,16 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
23602360 // Vector256<long> div_i64 = Vector256.ConvertToInt64(div_f64);
23612361 // Vector128<int> div_i32 = Vector256.Narrow(div_i64.GetLower(), div_i64.GetUpper());
23622362 // return div_i32;
2363- regNumber op2Reg = op2->GetRegNum ();
2364- regNumber tmpReg1 = internalRegisters.Extract (node, RBM_ALLFLOAT);
2363+ regNumber op2Reg = op2->GetRegNum ();
2364+ regNumber tmpReg1 = REG_NA;
2365+ if (!compiler->compOpportunisticallyDependsOn (InstructionSet_AVX512))
2366+ {
2367+ tmpReg1 = internalRegisters.Extract (node, compiler->compOpportunisticallyDependsOn (InstructionSet_AVX)
2368+ ? RBM_ALLFLOAT
2369+ : SRBM_XMM0);
2370+ }
23652371 regNumber tmpReg2 = internalRegisters.Extract (node, RBM_ALLFLOAT);
2366- regNumber tmpReg3 = targetReg ;
2372+ regNumber tmpReg3 = internalRegisters. Extract (node, RBM_ALLFLOAT) ;
23672373 var_types nodeType = node->TypeGet ();
23682374 emitAttr typeSize = emitTypeSize (nodeType);
23692375 noway_assert (typeSize == EA_16BYTE || typeSize == EA_32BYTE);
@@ -2381,9 +2387,9 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
23812387 CORINFO_FIELD_HANDLE negOneFld = emit->emitSimdConst (&negOneIntVec, typeSize);
23822388
23832389 // div-by-zero check
2384- emit->emitIns_SIMD_R_R_R (INS_xorpd, typeSize, tmpReg1, tmpReg1, tmpReg1 , instOptions);
2385- emit->emitIns_SIMD_R_R_R (INS_pcmpeqd, typeSize, tmpReg1, tmpReg1 , op2Reg, instOptions);
2386- emit->emitIns_R_R (INS_ptest, typeSize, tmpReg1, tmpReg1 , instOptions);
2390+ emit->emitIns_SIMD_R_R_R (INS_xorpd, typeSize, tmpReg2, tmpReg2, tmpReg2 , instOptions);
2391+ emit->emitIns_SIMD_R_R_R (INS_pcmpeqd, typeSize, tmpReg2, tmpReg2 , op2Reg, instOptions);
2392+ emit->emitIns_R_R (INS_ptest, typeSize, tmpReg2, tmpReg2 , instOptions);
23872393 genJumpToThrowHlpBlk (EJ_jne, SCK_DIV_BY_ZERO);
23882394
23892395 // overflow check
@@ -2397,18 +2403,18 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
23972403 }
23982404 CORINFO_FIELD_HANDLE minValueFld = emit->emitSimdConst (&minValueInt, typeSize);
23992405
2400- emit->emitIns_SIMD_R_R_C (INS_pcmpeqd, typeSize, tmpReg1 , op1Reg, minValueFld, 0 , instOptions);
2401- emit->emitIns_SIMD_R_R_C (INS_pcmpeqd, typeSize, tmpReg2 , op2Reg, negOneFld, 0 , instOptions);
2402- emit->emitIns_SIMD_R_R_R (INS_pandd, typeSize, tmpReg1, tmpReg1, tmpReg2 , instOptions);
2403- emit->emitIns_R_R (INS_ptest, typeSize, tmpReg1, tmpReg1 , instOptions);
2406+ emit->emitIns_SIMD_R_R_C (INS_pcmpeqd, typeSize, tmpReg2 , op1Reg, minValueFld, 0 , instOptions);
2407+ emit->emitIns_SIMD_R_R_C (INS_pcmpeqd, typeSize, tmpReg3 , op2Reg, negOneFld, 0 , instOptions);
2408+ emit->emitIns_SIMD_R_R_R (INS_pandd, typeSize, tmpReg2, tmpReg2, tmpReg3 , instOptions);
2409+ emit->emitIns_R_R (INS_ptest, typeSize, tmpReg2, tmpReg2 , instOptions);
24042410 genJumpToThrowHlpBlk (EJ_jne, SCK_OVERFLOW);
2405- emit->emitIns_R_R (INS_cvtdq2pd, divTypeSize, tmpReg1 , op1Reg, instOptions);
2406- emit->emitIns_R_R (INS_cvtdq2pd, divTypeSize, tmpReg2 , op2Reg, instOptions);
2411+ emit->emitIns_R_R (INS_cvtdq2pd, divTypeSize, tmpReg2 , op1Reg, instOptions);
2412+ emit->emitIns_R_R (INS_cvtdq2pd, divTypeSize, tmpReg3 , op2Reg, instOptions);
24072413 }
24082414 else if (compiler->compOpportunisticallyDependsOn (InstructionSet_AVX512))
24092415 {
2410- emit->emitIns_R_R (INS_vcvtudq2pd, divTypeSize, tmpReg1 , op1Reg, instOptions);
2411- emit->emitIns_R_R (INS_vcvtudq2pd, divTypeSize, tmpReg2 , op2Reg, instOptions);
2416+ emit->emitIns_R_R (INS_vcvtudq2pd, divTypeSize, tmpReg2 , op1Reg, instOptions);
2417+ emit->emitIns_R_R (INS_vcvtudq2pd, divTypeSize, tmpReg3 , op2Reg, instOptions);
24122418 }
24132419 else
24142420 {
@@ -2419,79 +2425,64 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
24192425 double2To32Const.f64 [i] = 4294967296.0 ; // 2^32
24202426 }
24212427 CORINFO_FIELD_HANDLE double2To32ConstFld = emit->emitSimdConst (&double2To32Const, divTypeSize);
2422- tmpReg3 = internalRegisters.Extract (node, RBM_ALLFLOAT);
24232428
2429+ // Convert uint -> double
2430+ // tmpReg2 = double(op1Reg)
2431+ // tmpReg3 = double(op2Reg)
24242432 if (compiler->compOpportunisticallyDependsOn (InstructionSet_AVX))
24252433 {
2426- emit->emitIns_R_R (INS_cvtdq2pd, divTypeSize, tmpReg3 , op1Reg, instOptions);
2427- emit->emitIns_Mov (INS_movups, divTypeSize, tmpReg1, tmpReg3 , false , instOptions);
2428- emit->emitIns_R_C (INS_addpd, divTypeSize, tmpReg1 , double2To32ConstFld, instOptions);
2429- emit->emitIns_SIMD_R_R_R_R (INS_blendvpd, divTypeSize, tmpReg1, tmpReg3, tmpReg1, tmpReg3 ,
2434+ emit->emitIns_R_R (INS_cvtdq2pd, divTypeSize, tmpReg1 , op1Reg, instOptions);
2435+ emit->emitIns_Mov (INS_movups, divTypeSize, tmpReg2, tmpReg1 , false , instOptions);
2436+ emit->emitIns_R_C (INS_addpd, divTypeSize, tmpReg2 , double2To32ConstFld, instOptions);
2437+ emit->emitIns_SIMD_R_R_R_R (INS_blendvpd, divTypeSize, tmpReg2, tmpReg1, tmpReg2, tmpReg1 ,
24302438 instOptions);
24312439
2432- emit->emitIns_R_R (INS_cvtdq2pd, divTypeSize, tmpReg3 , op2Reg, instOptions);
2433- emit->emitIns_Mov (INS_movups, divTypeSize, tmpReg2, tmpReg3 , false , instOptions);
2434- emit->emitIns_R_C (INS_addpd, divTypeSize, tmpReg2 , double2To32ConstFld, instOptions);
2435- emit->emitIns_SIMD_R_R_R_R (INS_blendvpd, divTypeSize, tmpReg2, tmpReg3, tmpReg2 , tmpReg3,
2440+ emit->emitIns_R_R (INS_cvtdq2pd, divTypeSize, tmpReg1 , op2Reg, instOptions);
2441+ emit->emitIns_Mov (INS_movups, divTypeSize, tmpReg3, tmpReg1 , false , instOptions);
2442+ emit->emitIns_R_C (INS_addpd, divTypeSize, tmpReg3 , double2To32ConstFld, instOptions);
2443+ emit->emitIns_SIMD_R_R_R_R (INS_blendvpd, divTypeSize, tmpReg3, tmpReg1 , tmpReg3, tmpReg1 ,
24362444 instOptions);
24372445 }
24382446 else
24392447 {
2440- // blendvpd requires that the mask value is in XMM0. Preserve XMM0 in a tmp reg while we convert
2441- // from uint -> long -> double, then restore XMM0 when we're done
2442- emit->emitIns_Mov (INS_movups, typeSize, tmpReg3, JITREG_XMM0, false );
2443-
2444- emit->emitIns_R_R (INS_cvtdq2pd, divTypeSize, JITREG_XMM0, op1Reg, instOptions);
2445- emit->emitIns_Mov (INS_movups, typeSize, tmpReg1, JITREG_XMM0, false , instOptions);
2446- emit->emitIns_R_C (INS_addpd, typeSize, tmpReg1, double2To32ConstFld, instOptions);
2447- emit->emitIns_R_R (INS_blendvpd, typeSize, JITREG_XMM0, tmpReg1, instOptions);
2448- emit->emitIns_Mov (INS_movups, typeSize, tmpReg1, JITREG_XMM0, instOptions);
2449-
2450- emit->emitIns_R_R (INS_cvtdq2pd, divTypeSize, JITREG_XMM0, op2Reg, instOptions);
2451- emit->emitIns_Mov (INS_movups, typeSize, tmpReg2, JITREG_XMM0, false , instOptions);
2448+ emit->emitIns_R_R (INS_cvtdq2pd, divTypeSize, tmpReg1, op1Reg, instOptions);
2449+ emit->emitIns_Mov (INS_movups, typeSize, tmpReg2, tmpReg1, false , instOptions);
24522450 emit->emitIns_R_C (INS_addpd, typeSize, tmpReg2, double2To32ConstFld, instOptions);
2453- emit->emitIns_R_R (INS_blendvpd, typeSize, JITREG_XMM0, tmpReg2, instOptions);
2454- emit->emitIns_Mov (INS_movups, typeSize, tmpReg2, JITREG_XMM0, instOptions);
2455-
2456- emit->emitIns_Mov (INS_movups, typeSize, JITREG_XMM0, tmpReg3, false );
2451+ emit->emitIns_R_R (INS_blendvpd, typeSize, tmpReg1, tmpReg2, instOptions);
2452+ emit->emitIns_Mov (INS_movups, typeSize, tmpReg2, tmpReg1, instOptions);
2453+
2454+ emit->emitIns_R_R (INS_cvtdq2pd, divTypeSize, tmpReg1, op2Reg, instOptions);
2455+ emit->emitIns_Mov (INS_movups, typeSize, tmpReg3, tmpReg1, false , instOptions);
2456+ emit->emitIns_R_C (INS_addpd, typeSize, tmpReg3, double2To32ConstFld, instOptions);
2457+ emit->emitIns_R_R (INS_blendvpd, typeSize, tmpReg1, tmpReg3, instOptions);
2458+ emit->emitIns_Mov (INS_movups, typeSize, tmpReg3, tmpReg1, instOptions);
24572459 }
24582460 }
24592461
2460- emit->emitIns_SIMD_R_R_R (INS_divpd, divTypeSize, tmpReg3, tmpReg1, tmpReg2, instOptions);
2461-
24622462 if (varTypeIsSigned (baseType) || compiler->compOpportunisticallyDependsOn (InstructionSet_AVX512))
24632463 {
2464+ emit->emitIns_SIMD_R_R_R (INS_divpd, divTypeSize, targetReg, tmpReg2, tmpReg3, instOptions);
24642465 emit->emitIns_R_R (varTypeIsSigned (baseType) ? INS_cvttpd2dq : INS_vcvttpd2udq, divTypeSize, targetReg,
2465- tmpReg3 , instOptions);
2466+ targetReg , instOptions);
24662467 }
24672468 else
24682469 {
24692470 assert (varTypeIsUnsigned (baseType));
2471+ emit->emitIns_SIMD_R_R_R (INS_divpd, divTypeSize, tmpReg1, tmpReg2, tmpReg3, instOptions);
24702472
24712473 if (compiler->compOpportunisticallyDependsOn (InstructionSet_AVX))
24722474 {
2473- emit->emitIns_R_R (INS_cvttpd2dq, divTypeSize, tmpReg2, tmpReg3 , instOptions);
2474- emit->emitIns_Mov (INS_movups, typeSize, tmpReg3 , op1Reg, instOptions);
2475- emit->emitIns_SIMD_R_R_R_R (INS_blendvpd, typeSize, targetReg, tmpReg2, tmpReg3, tmpReg2 ,
2475+ emit->emitIns_R_R (INS_cvttpd2dq, divTypeSize, tmpReg3, tmpReg1 , instOptions);
2476+ emit->emitIns_Mov (INS_movups, typeSize, tmpReg1 , op1Reg, instOptions);
2477+ emit->emitIns_SIMD_R_R_R_R (INS_blendvpd, typeSize, targetReg, tmpReg3, tmpReg1, tmpReg3 ,
24762478 instOptions);
24772479 }
24782480 else
24792481 {
2480- emit->emitIns_R_R (INS_cvttpd2dq, divTypeSize, tmpReg2, tmpReg3, instOptions);
2481- emit->emitIns_Mov (INS_movups, typeSize, tmpReg3, op1Reg, instOptions);
2482- if (targetReg != JITREG_XMM0)
2483- {
2484- emit->emitIns_Mov (INS_movups, typeSize, tmpReg1, JITREG_XMM0, false );
2485- }
2486-
2487- emit->emitIns_Mov (INS_movups, typeSize, JITREG_XMM0, tmpReg2, instOptions);
2488- emit->emitIns_R_R (INS_blendvpd, typeSize, JITREG_XMM0, tmpReg3, instOptions);
2489-
2490- if (targetReg != JITREG_XMM0)
2491- {
2492- emit->emitIns_Mov (INS_movups, typeSize, targetReg, JITREG_XMM0, false );
2493- emit->emitIns_Mov (INS_movups, typeSize, JITREG_XMM0, tmpReg1, false );
2494- }
2482+ emit->emitIns_R_R (INS_cvttpd2dq, divTypeSize, tmpReg1, tmpReg1, instOptions);
2483+ emit->emitIns_Mov (INS_movups, typeSize, tmpReg2, op1Reg, instOptions);
2484+ emit->emitIns_R_R (INS_blendvpd, typeSize, tmpReg1, tmpReg2, instOptions);
2485+ emit->emitIns_Mov (INS_movups, typeSize, targetReg, tmpReg1, false );
24952486 }
24962487 }
24972488
0 commit comments