@@ -2583,10 +2583,35 @@ static bool interp__builtin_elementwise_maxmin(InterpState &S, CodePtr OpPC,
25832583 return true ;
25842584}
25852585
2586- static bool interp__builtin_ia32_pmadd (InterpState &S, CodePtr OpPC,
2587- const CallExpr *Call,
2588- unsigned BuiltinID) {
2589- return true ; // TODO: Implement the builtin.
2586+ static bool interp__builtin_ia32_pmadd (
2587+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
2588+ llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &,
2589+ const APSInt &)>
2590+ Fn) {
2591+ assert (Call->getArg (0 )->getType ()->isVectorType () &&
2592+ Call->getArg (1 )->getType ()->isVectorType ());
2593+ const Pointer &RHS = S.Stk .pop <Pointer>();
2594+ const Pointer &LHS = S.Stk .pop <Pointer>();
2595+ const Pointer &Dst = S.Stk .peek <Pointer>();
2596+
2597+ const auto *VT = Call->getArg (0 )->getType ()->castAs <VectorType>();
2598+ PrimType ElemT = *S.getContext ().classify (VT->getElementType ());
2599+ unsigned NumElems = VT->getNumElements ();
2600+ bool DestUnsigned = Call->getType ()->isUnsignedIntegerOrEnumerationType ();
2601+
2602+ for (unsigned I = 0 ; I != NumElems; I += 2 ) {
2603+ INT_TYPE_SWITCH_NO_BOOL (ElemT, {
2604+ APSInt LoLHS = LHS.elem <T>(I).toAPSInt ();
2605+ APSInt HiLHS = LHS.elem <T>(I + 1 ).toAPSInt ();
2606+ APSInt LoRHS = RHS.elem <T>(I).toAPSInt ();
2607+ APSInt HiRHS = RHS.elem <T>(I + 1 ).toAPSInt ();
2608+ Dst.elem <T>(I) =
2609+ static_cast <T>(APSInt (Fn (LoLHS, HiLHS, LoRHS, HiRHS), DestUnsigned));
2610+ });
2611+ }
2612+
2613+ Dst.initializeAllElements ();
2614+ return true ;
25902615}
25912616
25922617static bool interp__builtin_ia32_pmul (InterpState &S, CodePtr OpPC,
@@ -3503,12 +3528,26 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
35033528 case clang::X86::BI__builtin_ia32_pmaddubsw128:
35043529 case clang::X86::BI__builtin_ia32_pmaddubsw256:
35053530 case clang::X86::BI__builtin_ia32_pmaddubsw512:
3506- return true ; // TODO: Use interp__builtin_i32_pmadd.
3507-
3531+ return interp__builtin_ia32_pmadd (
3532+ S, OpPC, Call,
3533+ [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
3534+ const APSInt &HiRHS) {
3535+ unsigned BitWidth = 2 * LoLHS.getBitWidth ();
3536+ return (LoLHS.zext (BitWidth) * LoRHS.sext (BitWidth))
3537+ .sadd_sat ((HiLHS.zext (BitWidth) * HiRHS.sext (BitWidth)));
3538+ });
3539+
35083540 case clang::X86::BI__builtin_ia32_pmaddwd128:
35093541 case clang::X86::BI__builtin_ia32_pmaddwd256:
35103542 case clang::X86::BI__builtin_ia32_pmaddwd512:
3511- return true ; // TODO: Use interp__builtin_i32_pmadd.
3543+ return interp__builtin_ia32_pmadd (
3544+ S, OpPC, Call,
3545+ [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
3546+ const APSInt &HiRHS) {
3547+ unsigned BitWidth = 2 * LoLHS.getBitWidth ();
3548+ return (LoLHS.sext (BitWidth) * LoRHS.sext (BitWidth)) +
3549+ (HiLHS.sext (BitWidth) * HiRHS.sext (BitWidth));
3550+ });
35123551
35133552 case clang::X86::BI__builtin_ia32_pmulhuw128:
35143553 case clang::X86::BI__builtin_ia32_pmulhuw256:
0 commit comments