Skip to content

Commit e293465

Browse files
committed
[Math] Replace VecCore functions with std::simd functions
1 parent b5c806c commit e293465

File tree

5 files changed

+102
-93
lines changed

5 files changed

+102
-93
lines changed

hist/hist/inc/TF1.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -841,7 +841,7 @@ inline double TF1::EvalParVec(const Double_t *data, const Double_t *params)
841841
// res = GetSave(x);
842842
return TMath::SignalingNaN();
843843
}
844-
return vecCore::Get<ROOT::Double_v>(res, 0);
844+
return res[0];
845845
}
846846
#endif
847847

hist/hist/src/TFormula.cxx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3134,7 +3134,7 @@ Double_t TFormula::EvalPar(const Double_t *x,const Double_t *params) const
31343134

31353135
if (fNdim == 0 || !x) {
31363136
ROOT::Double_v ret = DoEvalVec(nullptr, params);
3137-
return vecCore::Get( ret, 0 );
3137+
return ret[0];
31383138
}
31393139

31403140
// otherwise, regular Double_t inputs on a vectorized function
@@ -3150,15 +3150,15 @@ Double_t TFormula::EvalPar(const Double_t *x,const Double_t *params) const
31503150
xvec[i] = x[i];
31513151

31523152
ROOT::Double_v ans = DoEvalVec(xvec.data(), params);
3153-
return vecCore::Get(ans, 0);
3153+
return ans[0];
31543154
}
31553155
// allocating a vector is much slower (we do only for dim > 4)
31563156
std::vector<ROOT::Double_v> xvec(fNdim);
31573157
for (int i = 0; i < fNdim; i++)
31583158
xvec[i] = x[i];
31593159

31603160
ROOT::Double_v ans = DoEvalVec(xvec.data(), params);
3161-
return vecCore::Get(ans, 0);
3161+
return ans[0];
31623162

31633163
#else
31643164
// this should never happen, because fVectorized can only be set true with
@@ -3398,7 +3398,7 @@ ROOT::Double_v TFormula::EvalParVec(const ROOT::Double_v *x, const Double_t *par
33983398

33993399
for (int i = 0; i < vecSize; i++)
34003400
for (int j = 0; j < fNdim; j++)
3401-
xscalars[i*fNdim+j] = vecCore::Get(x[j],i);
3401+
xscalars[i*fNdim+j] = x[j][i];
34023402

34033403
ROOT::Double_v answers(0.);
34043404
for (int i = 0; i < vecSize; i++)

math/mathcore/inc/Fit/FitUtil.h

Lines changed: 65 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -36,19 +36,6 @@
3636

3737
//#define DEBUG_FITUTIL
3838

39-
#ifdef R__HAS_VECCORE
40-
namespace vecCore {
41-
template <class T>
42-
vecCore::Mask<T> Int2Mask(unsigned i)
43-
{
44-
T x;
45-
for (unsigned j = 0; j < vecCore::VectorSize<T>(); j++)
46-
vecCore::Set<T>(x, j, j);
47-
return vecCore::Mask<T>(x < T(i));
48-
}
49-
}
50-
#endif
51-
5239
namespace ROOT {
5340

5441
namespace Fit {
@@ -61,6 +48,30 @@ namespace ROOT {
6148
*/
6249
namespace FitUtil {
6350

51+
namespace Detail {
52+
53+
template <class T>
54+
vecCore::Mask<T> Int2Mask(unsigned i)
55+
{
56+
T x;
57+
for (unsigned j = 0; j < T::size(); j++) {
58+
x[j] = j;
59+
}
60+
return vecCore::Mask<T>(x < T(i));
61+
}
62+
63+
template <typename T>
64+
auto ReduceAdd(const T &v)
65+
{
66+
typename T::value_type result(0);
67+
for (size_t i = 0; i < T::size(); ++i) {
68+
result += v[i];
69+
}
70+
return result;
71+
}
72+
73+
} // namespace Detail
74+
6475
typedef ROOT::Math::IParamMultiFunction IModelFunction;
6576
typedef ROOT::Math::IParamMultiGradFunction IGradModelFunction;
6677

@@ -235,23 +246,15 @@ namespace FitUtil {
235246

236247
inline double ExecFunc(const IModelFunctionTempl<ROOT::Double_v> *f, const double *x, const double *p) const
237248
{
238-
// Figure out the size of the SIMD vectors.
239-
constexpr static int vecSize = sizeof(ROOT::Double_v) / sizeof(double);
240-
double xBuffer[vecSize];
241249
ROOT::Double_v xx[fDim];
242250
for (unsigned int i = 0; i < fDim; ++i) {
243-
// The Load() function reads multiple values from the pointed-to
244-
// memory into xx. This is why we have to copy the input values from
245-
// the x array into a zero-padded buffer to read from. Otherwise,
246-
// Load() would access the x array out of bounds.
247-
*xBuffer = x[i];
248-
for(int j = 1; j < vecSize; ++j) {
249-
xBuffer[j] = 0.0;
251+
xx[i][0] = x[i];
252+
for(std::size_t j = 1; j < ROOT::Double_v::size(); ++j) {
253+
xx[j] = 0.0;
250254
}
251-
vecCore::Load<ROOT::Double_v>(xx[i], xBuffer);
252255
}
253256
auto res = (*f)(xx, p);
254-
return vecCore::Get<ROOT::Double_v>(res, 0);
257+
return res[0];
255258
}
256259

257260
#if __clang_major__ > 16
@@ -440,9 +443,7 @@ namespace FitUtil {
440443

441444

442445
// avoid infinity or nan in chi2 values due to wrong function values
443-
auto m = vecCore::Mask_v<T>(chi2 > maxResValue);
444-
445-
vecCore::MaskedAssign<T>(chi2, m, maxResValue);
446+
where(chi2 > maxResValue, chi2) = maxResValue;
446447

447448
return chi2;
448449
};
@@ -478,10 +479,9 @@ namespace FitUtil {
478479

479480
// Last SIMD vector of elements (if padding needed)
480481
if (data.Size() % vecSize != 0)
481-
vecCore::MaskedAssign(res, vecCore::Int2Mask<T>(data.Size() % vecSize),
482-
res + mapFunction(data.Size() / vecSize));
482+
where(Detail::Int2Mask<T>(data.Size() % vecSize), res) = res + mapFunction(data.Size() / vecSize);
483483

484-
return vecCore::ReduceAdd(res);
484+
return Detail::ReduceAdd(res);
485485
}
486486

487487
static double EvalLogL(const IModelFunctionTempl<T> &func, const UnBinData &data, const double *const p,
@@ -539,7 +539,7 @@ namespace FitUtil {
539539
T xmin_v, xmax_v;
540540
vecCore::Load<T>(xmin_v, xmin.data());
541541
vecCore::Load<T>(xmax_v, xmax.data());
542-
if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) {
542+
if (Detail::ReduceAdd(func(&xmin_v, p)) != 0 || Detail::ReduceAdd(func(&xmax_v, p)) != 0) {
543543
MATH_ERROR_MSG("FitUtil::EvaluateLogLikelihood", "A range has not been set and the function is not zero at +/- inf");
544544
return 0;
545545
}
@@ -660,17 +660,17 @@ namespace FitUtil {
660660
if (remainingPoints > 0) {
661661
auto remainingPointsContribution = mapFunction(numVectors);
662662
// Add the contribution from the valid remaining points and store the result in the output variable
663-
auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
664-
vecCore::MaskedAssign(logl_v, remainingMask, logl_v + remainingPointsContribution.logvalue);
665-
vecCore::MaskedAssign(sumW_v, remainingMask, sumW_v + remainingPointsContribution.weight);
666-
vecCore::MaskedAssign(sumW2_v, remainingMask, sumW2_v + remainingPointsContribution.weight2);
663+
auto remainingMask = Detail::Int2Mask<T>(remainingPoints);
664+
where(remainingMask, logl_v) = logl_v + remainingPointsContribution.logvalue;
665+
where(remainingMask, sumW_v) = sumW_v + remainingPointsContribution.weight;
666+
where(remainingMask, sumW2_v) = sumW2_v + remainingPointsContribution.weight2;
667667
}
668668

669669

670670
//reduce vector type to double.
671-
double logl = vecCore::ReduceAdd(logl_v);
672-
double sumW = vecCore::ReduceAdd(sumW_v);
673-
double sumW2 = vecCore::ReduceAdd(sumW2_v);
671+
double logl = Detail::ReduceAdd(logl_v);
672+
double sumW = Detail::ReduceAdd(sumW_v);
673+
double sumW2 = Detail::ReduceAdd(sumW2_v);
674674

675675
if (extended) {
676676
// add Poisson extended term
@@ -697,7 +697,7 @@ namespace FitUtil {
697697
T xmin_v, xmax_v;
698698
vecCore::Load<T>(xmin_v, xmin.data());
699699
vecCore::Load<T>(xmax_v, xmax.data());
700-
if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) {
700+
if (Detail::ReduceAdd(func(&xmin_v, p)) != 0 || Detail::ReduceAdd(func(&xmax_v, p)) != 0) {
701701
MATH_ERROR_MSG("FitUtil::EvaluateLogLikelihood", "A range has not been set and the function is not zero at +/- inf");
702702
return 0;
703703
}
@@ -791,7 +791,7 @@ namespace FitUtil {
791791

792792
// EvalLog protects against 0 values of fval but don't want to add in the -log sum
793793
// negative values of fval
794-
vecCore::MaskedAssign<T>(fval, fval < 0.0, 0.0);
794+
where(fval < 0.0, fval) = 0.0;
795795

796796
T nloglike{}; // negative loglikelihood
797797

@@ -810,7 +810,7 @@ namespace FitUtil {
810810
if (extended) {
811811
nloglike = weight * ( fval - y);
812812
}
813-
vecCore::MaskedAssign<T>(nloglike, y != 0, nloglike + weight * y *( ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval)) );
813+
where(y != 0, nloglike) = nloglike + weight * y *( ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval));
814814

815815
} else {
816816
// standard case no weights or iWeight=1
@@ -819,8 +819,7 @@ namespace FitUtil {
819819
// (same formula as in Baker-Cousins paper, page 439 except a factor of 2
820820
if (extended) nloglike = fval - y;
821821

822-
vecCore::MaskedAssign<T>(
823-
nloglike, y > 0, nloglike + y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval)));
822+
where(y > 0, nloglike) = nloglike + y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval));
824823
}
825824

826825
return nloglike;
@@ -859,10 +858,10 @@ namespace FitUtil {
859858

860859
// Last padded SIMD vector of elements
861860
if (data.Size() % vecSize != 0)
862-
vecCore::MaskedAssign(res, vecCore::Int2Mask<T>(data.Size() % vecSize),
863-
res + mapFunction(data.Size() / vecSize));
861+
where(Detail::Int2Mask<T>(data.Size() % vecSize), res) =
862+
res + mapFunction(data.Size() / vecSize);
864863

865-
return vecCore::ReduceAdd(res);
864+
return Detail::ReduceAdd(res);
866865
}
867866

868867
static double EvalChi2Effective(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int &)
@@ -879,10 +878,10 @@ namespace FitUtil {
879878
auto mask = rval > -vecCore::NumericLimits<T>::Max() && rval < vecCore::NumericLimits<T>::Max();
880879

881880
// Case +inf or nan
882-
vecCore::MaskedAssign(rval, !mask, +vecCore::NumericLimits<T>::Max());
881+
where(!mask, rval) = +vecCore::NumericLimits<T>::Max();
883882

884883
// Case -inf
885-
vecCore::MaskedAssign(rval, !mask && rval < 0, -vecCore::NumericLimits<T>::Max());
884+
where(!mask && rval < 0, rval) = -vecCore::NumericLimits<T>::Max();
886885

887886
return mask;
888887
}
@@ -978,8 +977,8 @@ namespace FitUtil {
978977
}
979978

980979
// calculate derivative point contribution (only for valid points)
981-
vecCore::MaskedAssign(pointContributionVec[ipar], validPointsMasks[i],
982-
-2.0 * (y - fval) * invError * invError * gradFunc[ipar]);
980+
where(validPointsMasks[i], pointContributionVec[ipar]) =
981+
-2.0 * (y - fval) * invError * invError * gradFunc[ipar];
983982
}
984983

985984
return pointContributionVec;
@@ -1035,14 +1034,14 @@ namespace FitUtil {
10351034
if (remainingPoints > 0) {
10361035
auto remainingPointsContribution = mapFunction(numVectors);
10371036
// Add the contribution from the valid remaining points and store the result in the output variable
1038-
auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
1037+
auto remainingMask = Detail::Int2Mask<T>(remainingPoints);
10391038
for (unsigned int param = 0; param < npar; param++) {
1040-
vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
1039+
where(remainingMask, gVec[param]) = gVec[param] + remainingPointsContribution[param];
10411040
}
10421041
}
10431042
// reduce final gradient result from T to double
10441043
for (unsigned int param = 0; param < npar; param++) {
1045-
grad[param] = vecCore::ReduceAdd(gVec[param]);
1044+
grad[param] = Detail::ReduceAdd(gVec[param]);
10461045
}
10471046

10481047
// correct the number of points
@@ -1054,7 +1053,7 @@ namespace FitUtil {
10541053

10551054
for (const auto &mask : validPointsMasks) {
10561055
for (unsigned int i = 0; i < vecSize; i++) {
1057-
nRejected += !vecCore::Get(mask, i);
1056+
nRejected += !mask[i];
10581057
}
10591058
}
10601059

@@ -1094,7 +1093,7 @@ namespace FitUtil {
10941093
// const auto x = vecCore::FromPtr<ROOT::Double_v>(data.GetCoordComponent(i, 0));
10951094
// auto fval = func(&x, p);
10961095
// auto logPdf = ROOT::Math::Util::EvalLog(fval);
1097-
// return vecCore::Get<ROOT::Double_v>(logPdf, 0);
1096+
// return logPdf[0];
10981097

10991098
static void
11001099
EvalPoissonLogLGradient(const IModelFunctionTempl<T> &f, const BinData &data, const double *p, double *grad,
@@ -1156,7 +1155,7 @@ namespace FitUtil {
11561155
vecCore::Mask<T> positiveValuesMask = fval > 0;
11571156

11581157
// df/dp * (1. - y/f )
1159-
vecCore::MaskedAssign(pointContributionVec[ipar], positiveValuesMask, gradFunc[ipar] * (1. - y / fval));
1158+
where(positiveValuesMask, pointContributionVec[ipar]) = gradFunc[ipar] * (1. - y / fval);
11601159

11611160
vecCore::Mask<T> validNegativeValuesMask = !positiveValuesMask && gradFunc[ipar] != 0;
11621161

@@ -1234,14 +1233,14 @@ namespace FitUtil {
12341233
if (remainingPoints > 0) {
12351234
auto remainingPointsContribution = mapFunction(numVectors);
12361235
// Add the contribution from the valid remaining points and store the result in the output variable
1237-
auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
1236+
auto remainingMask = Detail::Int2Mask<T>(remainingPoints);
12381237
for (unsigned int param = 0; param < npar; param++) {
1239-
vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
1238+
where(remainingMask, gVec[param]) = gVec[param] + remainingPointsContribution[param];
12401239
}
12411240
}
12421241
// reduce final gradient result from T to double
12431242
for (unsigned int param = 0; param < npar; param++) {
1244-
grad[param] = vecCore::ReduceAdd(gVec[param]);
1243+
grad[param] = Detail::ReduceAdd(gVec[param]);
12451244
}
12461245

12471246
#ifdef DEBUG_FITUTIL
@@ -1317,8 +1316,9 @@ namespace FitUtil {
13171316
vecCore::Mask<T> positiveValues = fval > 0;
13181317

13191318
for (unsigned int kpar = 0; kpar < npar; ++kpar) {
1320-
if (!vecCore::MaskEmpty(positiveValues))
1321-
vecCore::MaskedAssign<T>(pointContributionVec[kpar], positiveValues, -1. / fval * gradFunc[kpar]);
1319+
if (!vecCore::MaskEmpty(positiveValues)) {
1320+
where(positiveValues, pointContributionVec[kpar]) = -1. / fval * gradFunc[kpar];
1321+
}
13221322

13231323
vecCore::Mask<T> nonZeroGradientValues = !positiveValues && gradFunc[kpar] != 0;
13241324
if (!vecCore::MaskEmpty(nonZeroGradientValues)) {
@@ -1383,14 +1383,14 @@ namespace FitUtil {
13831383
if (remainingPoints > 0) {
13841384
auto remainingPointsContribution = mapFunction(numVectors);
13851385
// Add the contribution from the valid remaining points and store the result in the output variable
1386-
auto remainingMask = vecCore::Int2Mask<T>(initialNPoints % vecSize);
1386+
auto remainingMask = Detail::Int2Mask<T>(initialNPoints % vecSize);
13871387
for (unsigned int param = 0; param < npar; param++) {
1388-
vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
1388+
where(remainingMask, gVec[param]) = gVec[param] + remainingPointsContribution[param];
13891389
}
13901390
}
13911391
// reduce final gradient result from T to double
13921392
for (unsigned int param = 0; param < npar; param++) {
1393-
grad[param] = vecCore::ReduceAdd(gVec[param]);
1393+
grad[param] = Detail::ReduceAdd(gVec[param]);
13941394
}
13951395

13961396
#ifdef DEBUG_FITUTIL

0 commit comments

Comments
 (0)