Skip to content

Commit 8c4657a

Browse files
committed
[Math] Replace VecCore functions with std::simd functions
1 parent b5c806c commit 8c4657a

File tree

5 files changed

+47
-39
lines changed

5 files changed

+47
-39
lines changed

hist/hist/inc/TF1.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -841,7 +841,7 @@ inline double TF1::EvalParVec(const Double_t *data, const Double_t *params)
841841
// res = GetSave(x);
842842
return TMath::SignalingNaN();
843843
}
844-
return vecCore::Get<ROOT::Double_v>(res, 0);
844+
return res[0];
845845
}
846846
#endif
847847

hist/hist/src/TFormula.cxx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3134,7 +3134,7 @@ Double_t TFormula::EvalPar(const Double_t *x,const Double_t *params) const
31343134

31353135
if (fNdim == 0 || !x) {
31363136
ROOT::Double_v ret = DoEvalVec(nullptr, params);
3137-
return vecCore::Get( ret, 0 );
3137+
return ret[0];
31383138
}
31393139

31403140
// otherwise, regular Double_t inputs on a vectorized function
@@ -3150,15 +3150,15 @@ Double_t TFormula::EvalPar(const Double_t *x,const Double_t *params) const
31503150
xvec[i] = x[i];
31513151

31523152
ROOT::Double_v ans = DoEvalVec(xvec.data(), params);
3153-
return vecCore::Get(ans, 0);
3153+
return ans[0];
31543154
}
31553155
// allocating a vector is much slower (we do only for dim > 4)
31563156
std::vector<ROOT::Double_v> xvec(fNdim);
31573157
for (int i = 0; i < fNdim; i++)
31583158
xvec[i] = x[i];
31593159

31603160
ROOT::Double_v ans = DoEvalVec(xvec.data(), params);
3161-
return vecCore::Get(ans, 0);
3161+
return ans[0];
31623162

31633163
#else
31643164
// this should never happen, because fVectorized can only be set true with
@@ -3398,7 +3398,7 @@ ROOT::Double_v TFormula::EvalParVec(const ROOT::Double_v *x, const Double_t *par
33983398

33993399
for (int i = 0; i < vecSize; i++)
34003400
for (int j = 0; j < fNdim; j++)
3401-
xscalars[i*fNdim+j] = vecCore::Get(x[j],i);
3401+
xscalars[i*fNdim+j] = x[j][i];
34023402

34033403
ROOT::Double_v answers(0.);
34043404
for (int i = 0; i < vecSize; i++)

math/mathcore/inc/Fit/FitUtil.h

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ namespace FitUtil {
251251
vecCore::Load<ROOT::Double_v>(xx[i], xBuffer);
252252
}
253253
auto res = (*f)(xx, p);
254-
return vecCore::Get<ROOT::Double_v>(res, 0);
254+
return res[0];
255255
}
256256

257257
#if __clang_major__ > 16
@@ -442,7 +442,7 @@ namespace FitUtil {
442442
// avoid infinity or nan in chi2 values due to wrong function values
443443
auto m = vecCore::Mask_v<T>(chi2 > maxResValue);
444444

445-
vecCore::MaskedAssign<T>(chi2, m, maxResValue);
445+
where(m, chi2) = maxResValue;
446446

447447
return chi2;
448448
};
@@ -478,8 +478,7 @@ namespace FitUtil {
478478

479479
// Last SIMD vector of elements (if padding needed)
480480
if (data.Size() % vecSize != 0)
481-
vecCore::MaskedAssign(res, vecCore::Int2Mask<T>(data.Size() % vecSize),
482-
res + mapFunction(data.Size() / vecSize));
481+
where(vecCore::Int2Mask<T>(data.Size() % vecSize), res) = res + mapFunction(data.Size() / vecSize);
483482

484483
return vecCore::ReduceAdd(res);
485484
}
@@ -661,9 +660,9 @@ namespace FitUtil {
661660
auto remainingPointsContribution = mapFunction(numVectors);
662661
// Add the contribution from the valid remaining points and store the result in the output variable
663662
auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
664-
vecCore::MaskedAssign(logl_v, remainingMask, logl_v + remainingPointsContribution.logvalue);
665-
vecCore::MaskedAssign(sumW_v, remainingMask, sumW_v + remainingPointsContribution.weight);
666-
vecCore::MaskedAssign(sumW2_v, remainingMask, sumW2_v + remainingPointsContribution.weight2);
663+
where(remainingMask, logl_v) = logl_v + remainingPointsContribution.logvalue;
664+
where(remainingMask, sumW_v) = sumW_v + remainingPointsContribution.weight;
665+
where(remainingMask, sumW2_v) = sumW2_v + remainingPointsContribution.weight2;
667666
}
668667

669668

@@ -791,7 +790,7 @@ namespace FitUtil {
791790

792791
// EvalLog protects against 0 values of fval but don't want to add in the -log sum
793792
// negative values of fval
794-
vecCore::MaskedAssign<T>(fval, fval < 0.0, 0.0);
793+
where(fval < 0.0, fval) = 0.0;
795794

796795
T nloglike{}; // negative loglikelihood
797796

@@ -810,7 +809,7 @@ namespace FitUtil {
810809
if (extended) {
811810
nloglike = weight * ( fval - y);
812811
}
813-
vecCore::MaskedAssign<T>(nloglike, y != 0, nloglike + weight * y *( ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval)) );
812+
where(y != 0, nloglike) = nloglike + weight * y *( ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval));
814813

815814
} else {
816815
// standard case no weights or iWeight=1
@@ -1054,7 +1053,7 @@ namespace FitUtil {
10541053

10551054
for (const auto &mask : validPointsMasks) {
10561055
for (unsigned int i = 0; i < vecSize; i++) {
1057-
nRejected += !vecCore::Get(mask, i);
1056+
nRejected += !mask[i];
10581057
}
10591058
}
10601059

@@ -1094,7 +1093,7 @@ namespace FitUtil {
10941093
// const auto x = vecCore::FromPtr<ROOT::Double_v>(data.GetCoordComponent(i, 0));
10951094
// auto fval = func(&x, p);
10961095
// auto logPdf = ROOT::Math::Util::EvalLog(fval);
1097-
// return vecCore::Get<ROOT::Double_v>(logPdf, 0);
1096+
// return logPdf[0];
10981097

10991098
static void
11001099
EvalPoissonLogLGradient(const IModelFunctionTempl<T> &f, const BinData &data, const double *p, double *grad,

math/mathcore/src/VectorizedTMath.cxx

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,19 @@
44

55
#ifdef ROOT_VECTORIZED_TMATH
66

7+
namespace {
8+
9+
template <class T, class V, class M>
10+
T blend_simd(M const &mask, V const &src1, V const &src2)
11+
{
12+
T v{};
13+
where(mask, v) = src1;
14+
where(!mask, v) = src2;
15+
return v;
16+
}
17+
18+
} // namespace
19+
720
namespace TMath {
821
////////////////////////////////////////////////////////////////////////////////
922
::ROOT::Double_v Log2(::ROOT::Double_v &x)
@@ -32,7 +45,7 @@ ::ROOT::Double_v Gaus(::ROOT::Double_v &x, Double_t mean, Double_t sigma, Bool_t
3245

3346
// For those entries of |arg| > 39 result is zero in double precision
3447
::ROOT::Double_v out =
35-
vecCore::Blend<::ROOT::Double_v>(abs(arg) < ::ROOT::Double_v(39.0),
48+
blend_simd<::ROOT::Double_v>(abs(arg) < ::ROOT::Double_v(39.0),
3649
exp(::ROOT::Double_v(-0.5) * arg * arg), ::ROOT::Double_v(0.0));
3750
if (norm)
3851
out *= 0.3989422804014327 * inv_sigma; // 1/sqrt(2*Pi)=0.3989422804014327
@@ -65,7 +78,7 @@ ::ROOT::Double_v LaplaceDistI(::ROOT::Double_v &x, Double_t alpha, Double_t beta
6578
{
6679
::ROOT::Double_v alpha_v = ::ROOT::Double_v(alpha);
6780
::ROOT::Double_v beta_v_inv = ::ROOT::Double_v(1.0) / ::ROOT::Double_v(beta);
68-
return vecCore::Blend<::ROOT::Double_v>(
81+
return blend_simd<::ROOT::Double_v>(
6982
x <= alpha_v, 0.5 * exp(-abs((x - alpha_v) * beta_v_inv)),
7083
1 - 0.5 * exp(-abs((x - alpha_v) * beta_v_inv)));
7184
}
@@ -98,9 +111,9 @@ ::ROOT::Double_v Freq(::ROOT::Double_v &x)
98111

99112
::ROOT::Double_v result{};
100113

101-
vecCore::Mask<::ROOT::Double_v> mask1 = v < ::ROOT::Double_v(0.5);
102-
vecCore::Mask<::ROOT::Double_v> mask2 = !mask1 && v < ::ROOT::Double_v(4.0);
103-
vecCore::Mask<::ROOT::Double_v> mask3 = !(mask1 || mask2);
114+
auto mask1 = v < ::ROOT::Double_v(0.5);
115+
auto mask2 = !mask1 && v < ::ROOT::Double_v(4.0);
116+
auto mask3 = !(mask1 || mask2);
104117

105118
::ROOT::Double_v v2 = v * v;
106119
::ROOT::Double_v v3 = v2 * v;
@@ -110,20 +123,16 @@ ::ROOT::Double_v Freq(::ROOT::Double_v &x)
110123
::ROOT::Double_v v7 = v6 * v;
111124
::ROOT::Double_v v8 = v7 * v;
112125

113-
vecCore::MaskedAssign<::ROOT::Double_v>(
114-
result, mask1, v * (p10 + p11 * v2 + p12 * v4 + p13 * v6) / (q10 + q11 * v2 + q12 * v4 + v6));
115-
vecCore::MaskedAssign<::ROOT::Double_v>(
116-
result, mask2,
117-
::ROOT::Double_v(1.0) -
126+
where(mask1, result) = v * (p10 + p11 * v2 + p12 * v4 + p13 * v6) / (q10 + q11 * v2 + q12 * v4 + v6);
127+
where(mask2, result) = ::ROOT::Double_v(1.0) -
118128
(p20 + p21 * v + p22 * v2 + p23 * v3 + p24 * v4 + p25 * v5 + p26 * v6 + p27 * v7) /
119-
(exp(v2) * (q20 + q21 * v + q22 * v2 + q23 * v3 + q24 * v4 + q25 * v5 + q26 * v6 + v7)));
120-
vecCore::MaskedAssign<::ROOT::Double_v>(result, mask3,
121-
::ROOT::Double_v(1.0) -
129+
(exp(v2) * (q20 + q21 * v + q22 * v2 + q23 * v3 + q24 * v4 + q25 * v5 + q26 * v6 + v7));
130+
where(mask3, result) = ::ROOT::Double_v(1.0) -
122131
(c1 + (p30 * v8 + p31 * v6 + p32 * v4 + p33 * v2 + p34) /
123132
((q30 * v8 + q31 * v6 + q32 * v4 + q33 * v2 + q34) * v2)) /
124-
(v * exp(v2)));
133+
(v * exp(v2));
125134

126-
return vecCore::Blend<::ROOT::Double_v>(x > 0, ::ROOT::Double_v(0.5) + ::ROOT::Double_v(0.5) * result,
135+
return blend_simd<::ROOT::Double_v>(x > 0, ::ROOT::Double_v(0.5) + ::ROOT::Double_v(0.5) * result,
127136
::ROOT::Double_v(0.5) * (::ROOT::Double_v(1) - result));
128137
}
129138

@@ -153,7 +162,7 @@ ::ROOT::Double_v BesselI0(::ROOT::Double_v &x)
153162
{
154163
::ROOT::Double_v ax = abs(x);
155164

156-
return vecCore::Blend<::ROOT::Double_v>(ax <= 3.75, BesselI0_Split_Less(x), BesselI0_Split_More(ax));
165+
return blend_simd<::ROOT::Double_v>(ax <= 3.75, BesselI0_Split_Less(x), BesselI0_Split_More(ax));
157166
}
158167

159168
////////////////////////////////////////////////////////////////////////////////
@@ -168,7 +177,7 @@ ::ROOT::Double_v BesselI1_Split_More(::ROOT::Double_v &ax, ::ROOT::Double_v &x)
168177
y * (1.63801e-3 + y * (-1.031555e-2 +
169178
y * (2.282967e-2 + y * (-2.895312e-2 +
170179
y * (1.787654e-2 + y * -4.20059e-3))))))));
171-
return vecCore::Blend<::ROOT::Double_v>(x < 0, ::ROOT::Double_v(-1.0) * result, result);
180+
return blend_simd<::ROOT::Double_v>(x < 0, ::ROOT::Double_v(-1.0) * result, result);
172181
}
173182

174183
::ROOT::Double_v BesselI1_Split_Less(::ROOT::Double_v &x)
@@ -183,7 +192,7 @@ ::ROOT::Double_v BesselI1(::ROOT::Double_v &x)
183192
{
184193
::ROOT::Double_v ax = abs(x);
185194

186-
return vecCore::Blend<::ROOT::Double_v>(ax <= 3.75, BesselI1_Split_Less(x), BesselI1_Split_More(ax, x));
195+
return blend_simd<::ROOT::Double_v>(ax <= 3.75, BesselI1_Split_Less(x), BesselI1_Split_More(ax, x));
187196
}
188197

189198
////////////////////////////////////////////////////////////////////////////////
@@ -212,7 +221,7 @@ ::ROOT::Double_v BesselJ0_Split1_Less(::ROOT::Double_v &x)
212221
::ROOT::Double_v BesselJ0(::ROOT::Double_v &x)
213222
{
214223
::ROOT::Double_v ax = abs(x);
215-
return vecCore::Blend<::ROOT::Double_v>(ax < 8, BesselJ0_Split1_Less(x), BesselJ0_Split1_More(ax));
224+
return blend_simd<::ROOT::Double_v>(ax < 8, BesselJ0_Split1_Less(x), BesselJ0_Split1_More(ax));
216225
}
217226

218227
////////////////////////////////////////////////////////////////////////////////
@@ -228,7 +237,7 @@ ::ROOT::Double_v BesselJ1_Split1_More(::ROOT::Double_v &ax, ::ROOT::Double_v &x)
228237
0.04687499995 + y * (-0.2002690873e-3 + y * (0.8449199096e-5 + y * (-0.88228987e-6 - y * 0.105787412e-6)));
229238
::ROOT::Double_v result =
230239
sqrt(0.636619772 / ax) * (cos(xx) * result1 - z * sin(xx) * result2);
231-
vecCore::MaskedAssign<::ROOT::Double_v>(result, x < 0, -result);
240+
where(x < 0, result) = -result;
232241
return result;
233242
}
234243

@@ -244,7 +253,7 @@ ::ROOT::Double_v BesselJ1_Split1_Less(::ROOT::Double_v &x)
244253
::ROOT::Double_v BesselJ1(::ROOT::Double_v &x)
245254
{
246255
::ROOT::Double_v ax = abs(x);
247-
return vecCore::Blend<::ROOT::Double_v>(ax < 8, BesselJ1_Split1_Less(x), BesselJ1_Split1_More(ax, x));
256+
return blend_simd<::ROOT::Double_v>(ax < 8, BesselJ1_Split1_Less(x), BesselJ1_Split1_More(ax, x));
248257
}
249258

250259
} // namespace TMath

test/TFormulaVecTests.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ bool testVec1D(TF1 * f1, const TString & formula, FreeFunc1D func, double x ) {
3131
#ifdef R__HAS_VECCORE
3232
ROOT::Double_v vx = x;
3333
ROOT::Double_v vy = f1->EvalPar(&vx, nullptr);
34-
double y2 = vecCore::Get(vy,0);
34+
double y2 = vy[0];
3535
ret &= CheckValues(formula+TString("_v"), y2, y0);
3636
#endif
3737

@@ -67,7 +67,7 @@ bool testVec2D(TF2 * f1, const TString & formula, FreeFunc2D func, double x, dou
6767
#ifdef R__HAS_VECCORE
6868
ROOT::Double_v vx[2] = { x, y};
6969
ROOT::Double_v vy = f1->EvalPar(vx, nullptr);
70-
double r2 = vecCore::Get(vy,0);
70+
double r2 = vy[0];
7171
ret &= CheckValues(formula+TString("_v"), r2, r0);
7272
#endif
7373

0 commit comments

Comments
 (0)