Skip to content

Commit a293bdb

Browse files
committed
[Math] Replace VecCore functions with std::simd functions
1 parent b5c806c commit a293bdb

File tree

6 files changed

+108
-97
lines changed

6 files changed

+108
-97
lines changed

hist/hist/inc/TF1.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -841,7 +841,7 @@ inline double TF1::EvalParVec(const Double_t *data, const Double_t *params)
841841
// res = GetSave(x);
842842
return TMath::SignalingNaN();
843843
}
844-
return vecCore::Get<ROOT::Double_v>(res, 0);
844+
return res[0];
845845
}
846846
#endif
847847

hist/hist/src/TFormula.cxx

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3134,7 +3134,7 @@ Double_t TFormula::EvalPar(const Double_t *x,const Double_t *params) const
31343134

31353135
if (fNdim == 0 || !x) {
31363136
ROOT::Double_v ret = DoEvalVec(nullptr, params);
3137-
return vecCore::Get( ret, 0 );
3137+
return ret[0];
31383138
}
31393139

31403140
// otherwise, regular Double_t inputs on a vectorized function
@@ -3150,15 +3150,15 @@ Double_t TFormula::EvalPar(const Double_t *x,const Double_t *params) const
31503150
xvec[i] = x[i];
31513151

31523152
ROOT::Double_v ans = DoEvalVec(xvec.data(), params);
3153-
return vecCore::Get(ans, 0);
3153+
return ans[0];
31543154
}
31553155
// allocating a vector is much slower (we do only for dim > 4)
31563156
std::vector<ROOT::Double_v> xvec(fNdim);
31573157
for (int i = 0; i < fNdim; i++)
31583158
xvec[i] = x[i];
31593159

31603160
ROOT::Double_v ans = DoEvalVec(xvec.data(), params);
3161-
return vecCore::Get(ans, 0);
3161+
return ans[0];
31623162

31633163
#else
31643164
// this should never happen, because fVectorized can only be set true with
@@ -3393,16 +3393,16 @@ ROOT::Double_v TFormula::EvalParVec(const ROOT::Double_v *x, const Double_t *par
33933393
if (gDebug)
33943394
Info("EvalPar", "Function is not vectorized - converting ROOT::Double_v into Double_t and back");
33953395

3396-
const int vecSize = vecCore::VectorSize<ROOT::Double_v>();
3396+
const int vecSize = ROOT::Double_v::size();
33973397
std::vector<Double_t> xscalars(vecSize*fNdim);
33983398

33993399
for (int i = 0; i < vecSize; i++)
34003400
for (int j = 0; j < fNdim; j++)
3401-
xscalars[i*fNdim+j] = vecCore::Get(x[j],i);
3401+
xscalars[i*fNdim+j] = x[j][i];
34023402

34033403
ROOT::Double_v answers(0.);
34043404
for (int i = 0; i < vecSize; i++)
3405-
vecCore::Set(answers, i, DoEval(&xscalars[i*fNdim], params));
3405+
answers[i] = DoEval(&xscalars[i*fNdim], params);
34063406

34073407
return answers;
34083408
}

math/mathcore/inc/Fit/FitData.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -358,9 +358,9 @@ namespace ROOT {
358358
static unsigned VectorPadding(unsigned dataSize)
359359
{
360360
unsigned padding = 0;
361-
unsigned modP = (dataSize) % vecCore::VectorSize<ROOT::Double_v>();
361+
unsigned modP = (dataSize) % ROOT::Double_v::size();
362362
if (modP > 0)
363-
padding = vecCore::VectorSize<ROOT::Double_v>() - modP;
363+
padding = ROOT::Double_v::size() - modP;
364364
return padding;
365365
}
366366
#else

math/mathcore/inc/Fit/FitUtil.h

Lines changed: 67 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -36,19 +36,6 @@
3636

3737
//#define DEBUG_FITUTIL
3838

39-
#ifdef R__HAS_VECCORE
40-
namespace vecCore {
41-
template <class T>
42-
vecCore::Mask<T> Int2Mask(unsigned i)
43-
{
44-
T x;
45-
for (unsigned j = 0; j < vecCore::VectorSize<T>(); j++)
46-
vecCore::Set<T>(x, j, j);
47-
return vecCore::Mask<T>(x < T(i));
48-
}
49-
}
50-
#endif
51-
5239
namespace ROOT {
5340

5441
namespace Fit {
@@ -61,6 +48,32 @@ namespace ROOT {
6148
*/
6249
namespace FitUtil {
6350

51+
namespace Detail {
52+
53+
#ifdef R__HAS_VECCORE
54+
template <class T>
55+
vecCore::Mask<T> Int2Mask(unsigned i)
56+
{
57+
T x;
58+
for (unsigned j = 0; j < T::size(); j++) {
59+
x[j] = j;
60+
}
61+
return vecCore::Mask<T>(x < T(i));
62+
}
63+
#endif
64+
65+
template <typename T>
66+
auto ReduceAdd(const T &v)
67+
{
68+
typename T::value_type result(0);
69+
for (size_t i = 0; i < T::size(); ++i) {
70+
result += v[i];
71+
}
72+
return result;
73+
}
74+
75+
} // namespace Detail
76+
6477
typedef ROOT::Math::IParamMultiFunction IModelFunction;
6578
typedef ROOT::Math::IParamMultiGradFunction IGradModelFunction;
6679

@@ -235,23 +248,15 @@ namespace FitUtil {
235248

236249
inline double ExecFunc(const IModelFunctionTempl<ROOT::Double_v> *f, const double *x, const double *p) const
237250
{
238-
// Figure out the size of the SIMD vectors.
239-
constexpr static int vecSize = sizeof(ROOT::Double_v) / sizeof(double);
240-
double xBuffer[vecSize];
241251
ROOT::Double_v xx[fDim];
242252
for (unsigned int i = 0; i < fDim; ++i) {
243-
// The Load() function reads multiple values from the pointed-to
244-
// memory into xx. This is why we have to copy the input values from
245-
// the x array into a zero-padded buffer to read from. Otherwise,
246-
// Load() would access the x array out of bounds.
247-
*xBuffer = x[i];
248-
for(int j = 1; j < vecSize; ++j) {
249-
xBuffer[j] = 0.0;
253+
xx[i][0] = x[i];
254+
for(std::size_t j = 1; j < ROOT::Double_v::size(); ++j) {
255+
xx[j] = 0.0;
250256
}
251-
vecCore::Load<ROOT::Double_v>(xx[i], xBuffer);
252257
}
253258
auto res = (*f)(xx, p);
254-
return vecCore::Get<ROOT::Double_v>(res, 0);
259+
return res[0];
255260
}
256261

257262
#if __clang_major__ > 16
@@ -440,9 +445,7 @@ namespace FitUtil {
440445

441446

442447
// avoid infinity or nan in chi2 values due to wrong function values
443-
auto m = vecCore::Mask_v<T>(chi2 > maxResValue);
444-
445-
vecCore::MaskedAssign<T>(chi2, m, maxResValue);
448+
where(chi2 > maxResValue, chi2) = maxResValue;
446449

447450
return chi2;
448451
};
@@ -478,10 +481,9 @@ namespace FitUtil {
478481

479482
// Last SIMD vector of elements (if padding needed)
480483
if (data.Size() % vecSize != 0)
481-
vecCore::MaskedAssign(res, vecCore::Int2Mask<T>(data.Size() % vecSize),
482-
res + mapFunction(data.Size() / vecSize));
484+
where(Detail::Int2Mask<T>(data.Size() % vecSize), res) = res + mapFunction(data.Size() / vecSize);
483485

484-
return vecCore::ReduceAdd(res);
486+
return Detail::ReduceAdd(res);
485487
}
486488

487489
static double EvalLogL(const IModelFunctionTempl<T> &func, const UnBinData &data, const double *const p,
@@ -539,7 +541,7 @@ namespace FitUtil {
539541
T xmin_v, xmax_v;
540542
vecCore::Load<T>(xmin_v, xmin.data());
541543
vecCore::Load<T>(xmax_v, xmax.data());
542-
if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) {
544+
if (Detail::ReduceAdd(func(&xmin_v, p)) != 0 || Detail::ReduceAdd(func(&xmax_v, p)) != 0) {
543545
MATH_ERROR_MSG("FitUtil::EvaluateLogLikelihood", "A range has not been set and the function is not zero at +/- inf");
544546
return 0;
545547
}
@@ -660,17 +662,17 @@ namespace FitUtil {
660662
if (remainingPoints > 0) {
661663
auto remainingPointsContribution = mapFunction(numVectors);
662664
// Add the contribution from the valid remaining points and store the result in the output variable
663-
auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
664-
vecCore::MaskedAssign(logl_v, remainingMask, logl_v + remainingPointsContribution.logvalue);
665-
vecCore::MaskedAssign(sumW_v, remainingMask, sumW_v + remainingPointsContribution.weight);
666-
vecCore::MaskedAssign(sumW2_v, remainingMask, sumW2_v + remainingPointsContribution.weight2);
665+
auto remainingMask = Detail::Int2Mask<T>(remainingPoints);
666+
where(remainingMask, logl_v) = logl_v + remainingPointsContribution.logvalue;
667+
where(remainingMask, sumW_v) = sumW_v + remainingPointsContribution.weight;
668+
where(remainingMask, sumW2_v) = sumW2_v + remainingPointsContribution.weight2;
667669
}
668670

669671

670672
//reduce vector type to double.
671-
double logl = vecCore::ReduceAdd(logl_v);
672-
double sumW = vecCore::ReduceAdd(sumW_v);
673-
double sumW2 = vecCore::ReduceAdd(sumW2_v);
673+
double logl = Detail::ReduceAdd(logl_v);
674+
double sumW = Detail::ReduceAdd(sumW_v);
675+
double sumW2 = Detail::ReduceAdd(sumW2_v);
674676

675677
if (extended) {
676678
// add Poisson extended term
@@ -697,7 +699,7 @@ namespace FitUtil {
697699
T xmin_v, xmax_v;
698700
vecCore::Load<T>(xmin_v, xmin.data());
699701
vecCore::Load<T>(xmax_v, xmax.data());
700-
if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) {
702+
if (Detail::ReduceAdd(func(&xmin_v, p)) != 0 || Detail::ReduceAdd(func(&xmax_v, p)) != 0) {
701703
MATH_ERROR_MSG("FitUtil::EvaluateLogLikelihood", "A range has not been set and the function is not zero at +/- inf");
702704
return 0;
703705
}
@@ -791,7 +793,7 @@ namespace FitUtil {
791793

792794
// EvalLog protects against 0 values of fval but don't want to add in the -log sum
793795
// negative values of fval
794-
vecCore::MaskedAssign<T>(fval, fval < 0.0, 0.0);
796+
where(fval < 0.0, fval) = 0.0;
795797

796798
T nloglike{}; // negative loglikelihood
797799

@@ -810,7 +812,7 @@ namespace FitUtil {
810812
if (extended) {
811813
nloglike = weight * ( fval - y);
812814
}
813-
vecCore::MaskedAssign<T>(nloglike, y != 0, nloglike + weight * y *( ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval)) );
815+
where(y != 0, nloglike) = nloglike + weight * y *( ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval));
814816

815817
} else {
816818
// standard case no weights or iWeight=1
@@ -819,8 +821,7 @@ namespace FitUtil {
819821
// (same formula as in Baker-Cousins paper, page 439 except a factor of 2
820822
if (extended) nloglike = fval - y;
821823

822-
vecCore::MaskedAssign<T>(
823-
nloglike, y > 0, nloglike + y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval)));
824+
where(y > 0, nloglike) = nloglike + y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval));
824825
}
825826

826827
return nloglike;
@@ -859,10 +860,10 @@ namespace FitUtil {
859860

860861
// Last padded SIMD vector of elements
861862
if (data.Size() % vecSize != 0)
862-
vecCore::MaskedAssign(res, vecCore::Int2Mask<T>(data.Size() % vecSize),
863-
res + mapFunction(data.Size() / vecSize));
863+
where(Detail::Int2Mask<T>(data.Size() % vecSize), res) =
864+
res + mapFunction(data.Size() / vecSize);
864865

865-
return vecCore::ReduceAdd(res);
866+
return Detail::ReduceAdd(res);
866867
}
867868

868869
static double EvalChi2Effective(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int &)
@@ -879,10 +880,10 @@ namespace FitUtil {
879880
auto mask = rval > -vecCore::NumericLimits<T>::Max() && rval < vecCore::NumericLimits<T>::Max();
880881

881882
// Case +inf or nan
882-
vecCore::MaskedAssign(rval, !mask, +vecCore::NumericLimits<T>::Max());
883+
where(!mask, rval) = +vecCore::NumericLimits<T>::Max();
883884

884885
// Case -inf
885-
vecCore::MaskedAssign(rval, !mask && rval < 0, -vecCore::NumericLimits<T>::Max());
886+
where(!mask && rval < 0, rval) = -vecCore::NumericLimits<T>::Max();
886887

887888
return mask;
888889
}
@@ -978,8 +979,8 @@ namespace FitUtil {
978979
}
979980

980981
// calculate derivative point contribution (only for valid points)
981-
vecCore::MaskedAssign(pointContributionVec[ipar], validPointsMasks[i],
982-
-2.0 * (y - fval) * invError * invError * gradFunc[ipar]);
982+
where(validPointsMasks[i], pointContributionVec[ipar]) =
983+
-2.0 * (y - fval) * invError * invError * gradFunc[ipar];
983984
}
984985

985986
return pointContributionVec;
@@ -1035,14 +1036,14 @@ namespace FitUtil {
10351036
if (remainingPoints > 0) {
10361037
auto remainingPointsContribution = mapFunction(numVectors);
10371038
// Add the contribution from the valid remaining points and store the result in the output variable
1038-
auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
1039+
auto remainingMask = Detail::Int2Mask<T>(remainingPoints);
10391040
for (unsigned int param = 0; param < npar; param++) {
1040-
vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
1041+
where(remainingMask, gVec[param]) = gVec[param] + remainingPointsContribution[param];
10411042
}
10421043
}
10431044
// reduce final gradient result from T to double
10441045
for (unsigned int param = 0; param < npar; param++) {
1045-
grad[param] = vecCore::ReduceAdd(gVec[param]);
1046+
grad[param] = Detail::ReduceAdd(gVec[param]);
10461047
}
10471048

10481049
// correct the number of points
@@ -1054,7 +1055,7 @@ namespace FitUtil {
10541055

10551056
for (const auto &mask : validPointsMasks) {
10561057
for (unsigned int i = 0; i < vecSize; i++) {
1057-
nRejected += !vecCore::Get(mask, i);
1058+
nRejected += !mask[i];
10581059
}
10591060
}
10601061

@@ -1094,7 +1095,7 @@ namespace FitUtil {
10941095
// const auto x = vecCore::FromPtr<ROOT::Double_v>(data.GetCoordComponent(i, 0));
10951096
// auto fval = func(&x, p);
10961097
// auto logPdf = ROOT::Math::Util::EvalLog(fval);
1097-
// return vecCore::Get<ROOT::Double_v>(logPdf, 0);
1098+
// return logPdf[0];
10981099

10991100
static void
11001101
EvalPoissonLogLGradient(const IModelFunctionTempl<T> &f, const BinData &data, const double *p, double *grad,
@@ -1156,7 +1157,7 @@ namespace FitUtil {
11561157
vecCore::Mask<T> positiveValuesMask = fval > 0;
11571158

11581159
// df/dp * (1. - y/f )
1159-
vecCore::MaskedAssign(pointContributionVec[ipar], positiveValuesMask, gradFunc[ipar] * (1. - y / fval));
1160+
where(positiveValuesMask, pointContributionVec[ipar]) = gradFunc[ipar] * (1. - y / fval);
11601161

11611162
vecCore::Mask<T> validNegativeValuesMask = !positiveValuesMask && gradFunc[ipar] != 0;
11621163

@@ -1234,14 +1235,14 @@ namespace FitUtil {
12341235
if (remainingPoints > 0) {
12351236
auto remainingPointsContribution = mapFunction(numVectors);
12361237
// Add the contribution from the valid remaining points and store the result in the output variable
1237-
auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
1238+
auto remainingMask = Detail::Int2Mask<T>(remainingPoints);
12381239
for (unsigned int param = 0; param < npar; param++) {
1239-
vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
1240+
where(remainingMask, gVec[param]) = gVec[param] + remainingPointsContribution[param];
12401241
}
12411242
}
12421243
// reduce final gradient result from T to double
12431244
for (unsigned int param = 0; param < npar; param++) {
1244-
grad[param] = vecCore::ReduceAdd(gVec[param]);
1245+
grad[param] = Detail::ReduceAdd(gVec[param]);
12451246
}
12461247

12471248
#ifdef DEBUG_FITUTIL
@@ -1317,8 +1318,9 @@ namespace FitUtil {
13171318
vecCore::Mask<T> positiveValues = fval > 0;
13181319

13191320
for (unsigned int kpar = 0; kpar < npar; ++kpar) {
1320-
if (!vecCore::MaskEmpty(positiveValues))
1321-
vecCore::MaskedAssign<T>(pointContributionVec[kpar], positiveValues, -1. / fval * gradFunc[kpar]);
1321+
if (!vecCore::MaskEmpty(positiveValues)) {
1322+
where(positiveValues, pointContributionVec[kpar]) = -1. / fval * gradFunc[kpar];
1323+
}
13221324

13231325
vecCore::Mask<T> nonZeroGradientValues = !positiveValues && gradFunc[kpar] != 0;
13241326
if (!vecCore::MaskEmpty(nonZeroGradientValues)) {
@@ -1383,14 +1385,14 @@ namespace FitUtil {
13831385
if (remainingPoints > 0) {
13841386
auto remainingPointsContribution = mapFunction(numVectors);
13851387
// Add the contribution from the valid remaining points and store the result in the output variable
1386-
auto remainingMask = vecCore::Int2Mask<T>(initialNPoints % vecSize);
1388+
auto remainingMask = Detail::Int2Mask<T>(initialNPoints % vecSize);
13871389
for (unsigned int param = 0; param < npar; param++) {
1388-
vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
1390+
where(remainingMask, gVec[param]) = gVec[param] + remainingPointsContribution[param];
13891391
}
13901392
}
13911393
// reduce final gradient result from T to double
13921394
for (unsigned int param = 0; param < npar; param++) {
1393-
grad[param] = vecCore::ReduceAdd(gVec[param]);
1395+
grad[param] = Detail::ReduceAdd(gVec[param]);
13941396
}
13951397

13961398
#ifdef DEBUG_FITUTIL

0 commit comments

Comments
 (0)