[Math] Replace VecCore functions with std::simd functions

guitargeek · guitargeek · commit a293bdb0c22f · 2025-12-19T09:18:51.000+01:00
diff --git a/hist/hist/inc/TF1.h b/hist/hist/inc/TF1.h
@@ -841,7 +841,7 @@ inline double TF1::EvalParVec(const Double_t *data, const Double_t *params)
       //    res = GetSave(x);
       return TMath::SignalingNaN();
    }
-   return vecCore::Get<ROOT::Double_v>(res, 0);
+   return res[0];
 }
 #endif
 
diff --git a/hist/hist/src/TFormula.cxx b/hist/hist/src/TFormula.cxx
@@ -3134,7 +3134,7 @@ Double_t TFormula::EvalPar(const Double_t *x,const Double_t *params) const
 
    if (fNdim == 0 || !x) {
       ROOT::Double_v ret =  DoEvalVec(nullptr, params);
-      return vecCore::Get( ret, 0 );
+      return ret[0];
    }
 
     // otherwise, regular Double_t inputs on a vectorized function
@@ -3150,15 +3150,15 @@ Double_t TFormula::EvalPar(const Double_t *x,const Double_t *params) const
          xvec[i] = x[i];
 
       ROOT::Double_v ans = DoEvalVec(xvec.data(), params);
-      return vecCore::Get(ans, 0);
+      return ans[0];
    }
    // allocating a vector is much slower (we do only for dim > 4)
    std::vector<ROOT::Double_v> xvec(fNdim);
    for (int i = 0; i < fNdim; i++)
       xvec[i] = x[i];
 
    ROOT::Double_v ans = DoEvalVec(xvec.data(), params);
-   return  vecCore::Get(ans, 0);
+   return  ans[0];
 
 #else
    // this should never happen, because fVectorized can only be set true with
@@ -3393,16 +3393,16 @@ ROOT::Double_v TFormula::EvalParVec(const ROOT::Double_v *x, const Double_t *par
    if (gDebug)
       Info("EvalPar", "Function is not vectorized - converting ROOT::Double_v into Double_t and back");
 
-   const int vecSize = vecCore::VectorSize<ROOT::Double_v>();
+   const int vecSize = ROOT::Double_v::size();
    std::vector<Double_t>  xscalars(vecSize*fNdim);
 
    for (int i = 0; i < vecSize; i++)
       for (int j = 0; j < fNdim; j++)
-         xscalars[i*fNdim+j] = vecCore::Get(x[j],i);
+         xscalars[i*fNdim+j] = x[j][i];
 
    ROOT::Double_v answers(0.);
    for (int i = 0; i < vecSize; i++)
-      vecCore::Set(answers, i, DoEval(&xscalars[i*fNdim], params));
+      answers[i] = DoEval(&xscalars[i*fNdim], params);
 
    return answers;
 }
diff --git a/math/mathcore/inc/Fit/FitData.h b/math/mathcore/inc/Fit/FitData.h
@@ -358,9 +358,9 @@ namespace ROOT {
          static unsigned VectorPadding(unsigned dataSize)
          {
             unsigned padding = 0;
-            unsigned modP = (dataSize) % vecCore::VectorSize<ROOT::Double_v>();
+            unsigned modP = (dataSize) % ROOT::Double_v::size();
             if (modP > 0)
-               padding = vecCore::VectorSize<ROOT::Double_v>() - modP;
+               padding = ROOT::Double_v::size() - modP;
             return padding;
          }
 #else
diff --git a/math/mathcore/inc/Fit/FitUtil.h b/math/mathcore/inc/Fit/FitUtil.h
@@ -36,19 +36,6 @@
 
 //#define DEBUG_FITUTIL
 
-#ifdef R__HAS_VECCORE
-namespace vecCore {
-template <class T>
-vecCore::Mask<T> Int2Mask(unsigned i)
-{
-   T x;
-   for (unsigned j = 0; j < vecCore::VectorSize<T>(); j++)
-      vecCore::Set<T>(x, j, j);
-   return vecCore::Mask<T>(x < T(i));
-}
-}
-#endif
-
 namespace ROOT {
 
    namespace Fit {
@@ -61,6 +48,32 @@ namespace ROOT {
 */
 namespace FitUtil {
 
+namespace Detail {
+
+#ifdef R__HAS_VECCORE
+template <class T>
+vecCore::Mask<T> Int2Mask(unsigned i)
+{
+   T x;
+   for (unsigned j = 0; j < T::size(); j++) {
+      x[j] = j;
+   }
+   return vecCore::Mask<T>(x < T(i));
+}
+#endif
+
+template <typename T>
+auto ReduceAdd(const T &v)
+{
+   typename T::value_type result(0);
+   for (size_t i = 0; i < T::size(); ++i) {
+      result += v[i];
+   }
+   return result;
+}
+
+} // namespace Detail
+
   typedef  ROOT::Math::IParamMultiFunction IModelFunction;
   typedef  ROOT::Math::IParamMultiGradFunction IGradModelFunction;
 
@@ -235,23 +248,15 @@ namespace FitUtil {
 
      inline double ExecFunc(const IModelFunctionTempl<ROOT::Double_v> *f, const double *x, const double *p) const
      {
-        // Figure out the size of the SIMD vectors.
-        constexpr static int vecSize = sizeof(ROOT::Double_v) / sizeof(double);
-        double xBuffer[vecSize];
         ROOT::Double_v xx[fDim];
         for (unsigned int i = 0; i < fDim; ++i) {
-           // The Load() function reads multiple values from the pointed-to
-           // memory into xx. This is why we have to copy the input values from
-           // the x array into a zero-padded buffer to read from. Otherwise,
-           // Load() would access the x array out of bounds.
-           *xBuffer = x[i];
-           for(int j = 1; j < vecSize; ++j) {
-              xBuffer[j] = 0.0;
+           xx[i][0] = x[i];
+           for(std::size_t j = 1; j < ROOT::Double_v::size(); ++j) {
+              xx[j] = 0.0;
            }
-           vecCore::Load<ROOT::Double_v>(xx[i], xBuffer);
         }
         auto res = (*f)(xx, p);
-        return vecCore::Get<ROOT::Double_v>(res, 0);
+        return res[0];
      }
 
 #if __clang_major__ > 16
@@ -440,9 +445,7 @@ namespace FitUtil {
 
 
             // avoid infinity or nan in chi2 values due to wrong function values
-            auto m = vecCore::Mask_v<T>(chi2 > maxResValue);
-
-            vecCore::MaskedAssign<T>(chi2, m, maxResValue);
+            where(chi2 > maxResValue, chi2) = maxResValue;
 
             return chi2;
          };
@@ -478,10 +481,9 @@ namespace FitUtil {
 
          // Last SIMD vector of elements (if padding needed)
          if (data.Size() % vecSize != 0)
-            vecCore::MaskedAssign(res, vecCore::Int2Mask<T>(data.Size() % vecSize),
-                                  res + mapFunction(data.Size() / vecSize));
+            where(Detail::Int2Mask<T>(data.Size() % vecSize), res) = res + mapFunction(data.Size() / vecSize);
 
-         return vecCore::ReduceAdd(res);
+         return Detail::ReduceAdd(res);
       }
 
       static double EvalLogL(const IModelFunctionTempl<T> &func, const UnBinData &data, const double *const p,
@@ -539,7 +541,7 @@ namespace FitUtil {
                T xmin_v, xmax_v;
                vecCore::Load<T>(xmin_v, xmin.data());
                vecCore::Load<T>(xmax_v, xmax.data());
-               if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) {
+               if (Detail::ReduceAdd(func(&xmin_v, p)) != 0 || Detail::ReduceAdd(func(&xmax_v, p)) != 0) {
                   MATH_ERROR_MSG("FitUtil::EvaluateLogLikelihood", "A range has not been set and the function is not zero at +/- inf");
                   return 0;
                }
@@ -660,17 +662,17 @@ namespace FitUtil {
          if (remainingPoints > 0) {
             auto remainingPointsContribution = mapFunction(numVectors);
             // Add the contribution from the valid remaining points and store the result in the output variable
-            auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
-            vecCore::MaskedAssign(logl_v, remainingMask, logl_v + remainingPointsContribution.logvalue);
-            vecCore::MaskedAssign(sumW_v, remainingMask, sumW_v + remainingPointsContribution.weight);
-            vecCore::MaskedAssign(sumW2_v, remainingMask, sumW2_v + remainingPointsContribution.weight2);
+            auto remainingMask = Detail::Int2Mask<T>(remainingPoints);
+            where(remainingMask, logl_v) = logl_v + remainingPointsContribution.logvalue;
+            where(remainingMask, sumW_v) = sumW_v + remainingPointsContribution.weight;
+            where(remainingMask, sumW2_v) = sumW2_v + remainingPointsContribution.weight2;
          }
 
 
          //reduce vector type to double.
-         double logl  = vecCore::ReduceAdd(logl_v);
-         double sumW  = vecCore::ReduceAdd(sumW_v);
-         double sumW2 = vecCore::ReduceAdd(sumW2_v);
+         double logl  = Detail::ReduceAdd(logl_v);
+         double sumW  = Detail::ReduceAdd(sumW_v);
+         double sumW2 = Detail::ReduceAdd(sumW2_v);
 
          if (extended) {
             // add Poisson extended term
@@ -697,7 +699,7 @@ namespace FitUtil {
                   T xmin_v, xmax_v;
                   vecCore::Load<T>(xmin_v, xmin.data());
                   vecCore::Load<T>(xmax_v, xmax.data());
-                  if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) {
+                  if (Detail::ReduceAdd(func(&xmin_v, p)) != 0 || Detail::ReduceAdd(func(&xmax_v, p)) != 0) {
                      MATH_ERROR_MSG("FitUtil::EvaluateLogLikelihood", "A range has not been set and the function is not zero at +/- inf");
                      return 0;
                   }
@@ -791,7 +793,7 @@ namespace FitUtil {
 
             // EvalLog protects against 0 values of fval but don't want to add in the -log sum
             // negative values of fval
-            vecCore::MaskedAssign<T>(fval, fval < 0.0, 0.0);
+            where(fval < 0.0, fval) =  0.0;
 
             T nloglike{}; // negative loglikelihood
 
@@ -810,7 +812,7 @@ namespace FitUtil {
                if (extended) {
                   nloglike =  weight * ( fval - y);
                }
-               vecCore::MaskedAssign<T>(nloglike, y != 0, nloglike + weight * y *( ROOT::Math::Util::EvalLog(y) -  ROOT::Math::Util::EvalLog(fval)) );
+               where(y != 0, nloglike) = nloglike + weight * y *( ROOT::Math::Util::EvalLog(y) -  ROOT::Math::Util::EvalLog(fval));
 
             } else {
                // standard case no weights or iWeight=1
@@ -819,8 +821,7 @@ namespace FitUtil {
                // (same formula as in Baker-Cousins paper, page 439 except a factor of 2
                if (extended) nloglike = fval - y;
 
-               vecCore::MaskedAssign<T>(
-                  nloglike, y > 0, nloglike + y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval)));
+               where(y > 0, nloglike) = nloglike + y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval));
             }
 
             return nloglike;
@@ -859,10 +860,10 @@ namespace FitUtil {
 
          // Last padded SIMD vector of elements
          if (data.Size() % vecSize != 0)
-            vecCore::MaskedAssign(res, vecCore::Int2Mask<T>(data.Size() % vecSize),
-                                  res + mapFunction(data.Size() / vecSize));
+            where(Detail::Int2Mask<T>(data.Size() % vecSize), res) =
+                                  res + mapFunction(data.Size() / vecSize);
 
-         return vecCore::ReduceAdd(res);
+         return Detail::ReduceAdd(res);
       }
 
       static double EvalChi2Effective(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int &)
@@ -879,10 +880,10 @@ namespace FitUtil {
          auto mask = rval > -vecCore::NumericLimits<T>::Max() && rval < vecCore::NumericLimits<T>::Max();
 
          // Case +inf or nan
-         vecCore::MaskedAssign(rval, !mask, +vecCore::NumericLimits<T>::Max());
+         where(!mask, rval) = +vecCore::NumericLimits<T>::Max();
 
          // Case -inf
-         vecCore::MaskedAssign(rval, !mask && rval < 0, -vecCore::NumericLimits<T>::Max());
+         where(!mask && rval < 0, rval) = -vecCore::NumericLimits<T>::Max();
 
          return mask;
       }
@@ -978,8 +979,8 @@ namespace FitUtil {
                }
 
                // calculate derivative point contribution (only for valid points)
-               vecCore::MaskedAssign(pointContributionVec[ipar], validPointsMasks[i],
-                                     -2.0 * (y - fval) * invError * invError * gradFunc[ipar]);
+               where(validPointsMasks[i], pointContributionVec[ipar]) =
+                                     -2.0 * (y - fval) * invError * invError * gradFunc[ipar];
             }
 
             return pointContributionVec;
@@ -1035,14 +1036,14 @@ namespace FitUtil {
          if (remainingPoints > 0) {
             auto remainingPointsContribution = mapFunction(numVectors);
             // Add the contribution from the valid remaining points and store the result in the output variable
-            auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
+            auto remainingMask = Detail::Int2Mask<T>(remainingPoints);
             for (unsigned int param = 0; param < npar; param++) {
-               vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
+               where(remainingMask, gVec[param]) = gVec[param] + remainingPointsContribution[param];
             }
          }
          // reduce final gradient result from T to double
          for (unsigned int param = 0; param < npar; param++) {
-            grad[param] = vecCore::ReduceAdd(gVec[param]);
+            grad[param] = Detail::ReduceAdd(gVec[param]);
          }
 
          // correct the number of points
@@ -1054,7 +1055,7 @@ namespace FitUtil {
 
             for (const auto &mask : validPointsMasks) {
                for (unsigned int i = 0; i < vecSize; i++) {
-                  nRejected += !vecCore::Get(mask, i);
+                  nRejected += !mask[i];
                }
             }
 
@@ -1094,7 +1095,7 @@ namespace FitUtil {
       // const auto x = vecCore::FromPtr<ROOT::Double_v>(data.GetCoordComponent(i, 0));
       // auto fval = func(&x, p);
       // auto logPdf = ROOT::Math::Util::EvalLog(fval);
-      // return vecCore::Get<ROOT::Double_v>(logPdf, 0);
+      // return logPdf[0];
 
       static void
       EvalPoissonLogLGradient(const IModelFunctionTempl<T> &f, const BinData &data, const double *p, double *grad,
@@ -1156,7 +1157,7 @@ namespace FitUtil {
                vecCore::Mask<T> positiveValuesMask = fval > 0;
 
                // df/dp * (1.  - y/f )
-               vecCore::MaskedAssign(pointContributionVec[ipar], positiveValuesMask, gradFunc[ipar] * (1. - y / fval));
+               where(positiveValuesMask, pointContributionVec[ipar]) = gradFunc[ipar] * (1. - y / fval);
 
                vecCore::Mask<T> validNegativeValuesMask = !positiveValuesMask && gradFunc[ipar] != 0;
 
@@ -1234,14 +1235,14 @@ namespace FitUtil {
          if (remainingPoints > 0) {
             auto remainingPointsContribution = mapFunction(numVectors);
             // Add the contribution from the valid remaining points and store the result in the output variable
-            auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
+            auto remainingMask = Detail::Int2Mask<T>(remainingPoints);
             for (unsigned int param = 0; param < npar; param++) {
-               vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
+               where(remainingMask, gVec[param]) = gVec[param] + remainingPointsContribution[param];
             }
          }
          // reduce final gradient result from T to double
          for (unsigned int param = 0; param < npar; param++) {
-            grad[param] = vecCore::ReduceAdd(gVec[param]);
+            grad[param] = Detail::ReduceAdd(gVec[param]);
          }
 
 #ifdef DEBUG_FITUTIL
@@ -1317,8 +1318,9 @@ namespace FitUtil {
             vecCore::Mask<T> positiveValues = fval > 0;
 
             for (unsigned int kpar = 0; kpar < npar; ++kpar) {
-               if (!vecCore::MaskEmpty(positiveValues))
-                  vecCore::MaskedAssign<T>(pointContributionVec[kpar], positiveValues, -1. / fval * gradFunc[kpar]);
+               if (!vecCore::MaskEmpty(positiveValues)) {
+                  where(positiveValues, pointContributionVec[kpar]) = -1. / fval * gradFunc[kpar];
+               }
 
                vecCore::Mask<T> nonZeroGradientValues = !positiveValues && gradFunc[kpar] != 0;
                if (!vecCore::MaskEmpty(nonZeroGradientValues)) {
@@ -1383,14 +1385,14 @@ namespace FitUtil {
          if (remainingPoints > 0) {
             auto remainingPointsContribution = mapFunction(numVectors);
             // Add the contribution from the valid remaining points and store the result in the output variable
-            auto remainingMask = vecCore::Int2Mask<T>(initialNPoints % vecSize);
+            auto remainingMask = Detail::Int2Mask<T>(initialNPoints % vecSize);
             for (unsigned int param = 0; param < npar; param++) {
-               vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
+               where(remainingMask, gVec[param]) = gVec[param] + remainingPointsContribution[param];
             }
          }
          // reduce final gradient result from T to double
          for (unsigned int param = 0; param < npar; param++) {
-            grad[param] = vecCore::ReduceAdd(gVec[param]);
+            grad[param] = Detail::ReduceAdd(gVec[param]);
          }
 
 #ifdef DEBUG_FITUTIL
diff --git a/math/mathcore/src/VectorizedTMath.cxx b/math/mathcore/src/VectorizedTMath.cxx
diff --git a/test/TFormulaVecTests.h b/test/TFormulaVecTests.h

Original file line number	Diff line number	Diff line change
`@@ -841,7 +841,7 @@ inline double TF1::EvalParVec(const Double_t data, const Double_t params)`
`841`	`841`	`// res = GetSave(x);`
`842`	`842`	`return TMath::SignalingNaN();`
`843`	`843`	`}`
`844`		`- return vecCore::Get<ROOT::Double_v>(res, 0);`
	`844`	`+ return res[0];`
`845`	`845`	`}`
`846`	`846`	`#endif`
`847`	`847`
Original file line number	Diff line number	Diff line change
`@@ -358,9 +358,9 @@ namespace ROOT {`
`358`	`358`	`static unsigned VectorPadding(unsigned dataSize)`
`359`	`359`	`{`
`360`	`360`	`unsigned padding = 0;`
`361`		`- unsigned modP = (dataSize) % vecCore::VectorSize<ROOT::Double_v>();`
	`361`	`+ unsigned modP = (dataSize) % ROOT::Double_v::size();`
`362`	`362`	`if (modP > 0)`
`363`		`- padding = vecCore::VectorSize<ROOT::Double_v>() - modP;`
	`363`	`+ padding = ROOT::Double_v::size() - modP;`
`364`	`364`	`return padding;`
`365`	`365`	`}`
`366`	`366`	`#else`