Skip to content

Commit 46f531b

Browse files
committed
[df] Implement weighted filling of RHist
This needs one indirection to construct the std::tuple and separate the weight argument.
1 parent ad552e4 commit 46f531b

File tree

4 files changed

+197
-3
lines changed

4 files changed

+197
-3
lines changed

tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#ifdef R__HAS_ROOT7
3737
#include <ROOT/RHist.hxx>
3838
#include <ROOT/RHistConcurrentFiller.hxx>
39+
#include <ROOT/RWeight.hxx>
3940
#endif
4041

4142
#include <algorithm>
@@ -46,6 +47,7 @@
4647
#include <stdexcept>
4748
#include <string>
4849
#include <string_view>
50+
#include <tuple>
4951
#include <type_traits>
5052
#include <utility> // std::index_sequence
5153
#include <vector>
@@ -476,8 +478,9 @@ public:
476478
};
477479

478480
#ifdef R__HAS_ROOT7
479-
template <typename BinContentType>
480-
class R__CLING_PTRCHECK(off) RHistFillHelper : public ROOT::Detail::RDF::RActionImpl<RHistFillHelper<BinContentType>> {
481+
template <typename BinContentType, bool WithWeight = false>
482+
class R__CLING_PTRCHECK(off) RHistFillHelper
483+
: public ROOT::Detail::RDF::RActionImpl<RHistFillHelper<BinContentType, WithWeight>> {
481484
public:
482485
using Result_t = ROOT::Experimental::RHist<BinContentType>;
483486

@@ -504,10 +507,23 @@ public:
504507
void Initialize() {}
505508
void InitTask(TTreeReader *, unsigned int) {}
506509

510+
template <typename... ColumnTypes, const std::size_t... I>
511+
void ExecWithWeight(unsigned int slot, const std::tuple<ColumnTypes...> &columnValues, std::index_sequence<I...>)
512+
{
513+
std::tuple args{std::get<I>(columnValues)...};
514+
ROOT::Experimental::RWeight weight(std::get<sizeof...(ColumnTypes) - 1>(columnValues));
515+
fContexts[slot]->Fill(args, weight);
516+
}
517+
507518
template <typename... ColumnTypes>
508519
void Exec(unsigned int slot, const ColumnTypes &...columnValues)
509520
{
510-
fContexts[slot]->Fill(columnValues...);
521+
if constexpr (WithWeight) {
522+
auto t = std::forward_as_tuple(columnValues...);
523+
ExecWithWeight(slot, t, std::make_index_sequence<sizeof...(ColumnTypes) - 1>());
524+
} else {
525+
fContexts[slot]->Fill(columnValues...);
526+
}
511527
}
512528

513529
void Finalize()

tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ struct Histo3D{};
9292
struct HistoND{};
9393
struct HistoNSparseD{};
9494
struct Hist{};
95+
struct HistWithWeight{};
9596
struct Graph{};
9697
struct GraphAsymmErrors{};
9798
struct Profile1D{};
@@ -185,6 +186,18 @@ BuildAction(const ColumnNames_t &columnList, const std::shared_ptr<ROOT::Experim
185186
using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>;
186187
return std::make_unique<Action_t>(Helper_t(h, nSlots), columnList, std::move(prevNode), colRegister);
187188
}
189+
190+
// Action for RHist using RHistConcurrentFiller
191+
template <typename... ColTypes, typename BinContentType, typename PrevNodeType>
192+
std::unique_ptr<RActionBase>
193+
BuildAction(const ColumnNames_t &columnList, const std::shared_ptr<ROOT::Experimental::RHist<BinContentType>> &h,
194+
const unsigned int nSlots, std::shared_ptr<PrevNodeType> prevNode, ActionTags::HistWithWeight,
195+
const RColumnRegister &colRegister)
196+
{
197+
using Helper_t = RHistFillHelper<BinContentType, true>;
198+
using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>;
199+
return std::make_unique<Action_t>(Helper_t(h, nSlots), columnList, std::move(prevNode), colRegister);
200+
}
188201
#endif
189202

190203
template <typename... ColTypes, typename PrevNodeType>

tree/dataframe/inc/ROOT/RDF/RInterface.hxx

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,9 @@
4747

4848
#include "RConfigure.h" // for R__HAS_ROOT7
4949
#ifdef R__HAS_ROOT7
50+
#include <ROOT/RBinWithError.hxx>
5051
#include <ROOT/RHist.hxx>
52+
#include <ROOT/RHistEngine.hxx>
5153
#endif
5254

5355
#include <algorithm>
@@ -2420,6 +2422,82 @@ public:
24202422
return CreateAction<RDFInternal::ActionTags::Hist, ColumnType, ColumnTypes...>(columnList, h, h, fProxiedPtr,
24212423
columnList.size());
24222424
}
2425+
2426+
////////////////////////////////////////////////////////////////////////////
2427+
/// \brief Fill and return an RHist with weights (*lazy action*).
2428+
/// \tparam BinContentType The bin content type of the returned RHist.
2429+
/// \param[in] axes The returned histogram will be constructed using these axes.
2430+
/// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
2431+
/// \param[in] wName The name of the column that will provide the weights.
2432+
/// \return the histogram wrapped in a RResultPtr.
2433+
///
2434+
/// This action is *lazy*: upon invocation of this method the calculation is
2435+
/// booked but not executed. Also see RResultPtr.
2436+
///
2437+
/// This overload is not available for integral bin content types (see \ref RHistEngine::SupportsWeightedFilling).
2438+
///
2439+
/// ### Example usage:
2440+
/// ~~~{.cpp}
2441+
/// ROOT::Experimental::RRegularAxis axis(10, {5.0, 15.0});
2442+
/// auto myHist = myDf.Hist({axis}, {"col0"}, "colW");
2443+
/// ~~~
2444+
template <typename BinContentType = ROOT::Experimental::RBinWithError,
2445+
typename ColumnType = RDFDetail::RInferredType, typename... ColumnTypes>
2446+
RResultPtr<ROOT::Experimental::RHist<BinContentType>>
2447+
Hist(std::vector<ROOT::Experimental::RAxisVariant> axes, const ColumnNames_t &columnList, std::string_view wName)
2448+
{
2449+
static_assert(ROOT::Experimental::RHistEngine<BinContentType>::SupportsWeightedFilling,
2450+
"weighted filling is not supported for integral bin content types");
2451+
2452+
std::shared_ptr h = std::make_shared<ROOT::Experimental::RHist<BinContentType>>(std::move(axes));
2453+
if (h->GetNDimensions() != columnList.size()) {
2454+
throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
2455+
}
2456+
2457+
return Hist<ColumnType, ColumnTypes...>(h, columnList, wName);
2458+
}
2459+
2460+
////////////////////////////////////////////////////////////////////////////
2461+
/// \brief Fill the provided RHist with weights (*lazy action*).
2462+
/// \param[in] h The histogram that should be filled.
2463+
/// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
2464+
/// \param[in] wName The name of the column that will provide the weights.
2465+
/// \return the histogram wrapped in a RResultPtr.
2466+
///
2467+
/// This action is *lazy*: upon invocation of this method the calculation is
2468+
/// booked but not executed. Also see RResultPtr.
2469+
///
2470+
/// This overload is not available for integral bin content types (see \ref RHistEngine::SupportsWeightedFilling).
2471+
///
2472+
/// During execution of the computation graph, the passed histogram must only be accessed with methods that are
2473+
/// allowed during concurrent filling.
2474+
///
2475+
/// ### Example usage:
2476+
/// ~~~{.cpp}
2477+
/// auto h = std::make_shared<ROOT::Experimental::RHist<double>>(10, {5.0, 15.0});
2478+
/// auto myHist = myDf.Hist(h, {"col0"}, "colW");
2479+
/// ~~~
2480+
template <typename ColumnType = RDFDetail::RInferredType, typename... ColumnTypes, typename BinContentType>
2481+
RResultPtr<ROOT::Experimental::RHist<BinContentType>>
2482+
Hist(std::shared_ptr<ROOT::Experimental::RHist<BinContentType>> h, const ColumnNames_t &columnList,
2483+
std::string_view wName)
2484+
{
2485+
static_assert(ROOT::Experimental::RHistEngine<BinContentType>::SupportsWeightedFilling,
2486+
"weighted filling is not supported for integral bin content types");
2487+
2488+
RDFInternal::WarnHist();
2489+
2490+
if (h->GetNDimensions() != columnList.size()) {
2491+
throw std::runtime_error("Wrong number of columns for the passed histogram.");
2492+
}
2493+
2494+
// Add the weight column to the list of argument columns to pass it through the infrastructure.
2495+
ColumnNames_t columnListWithWeights(columnList);
2496+
columnListWithWeights.push_back(std::string(wName));
2497+
2498+
return CreateAction<RDFInternal::ActionTags::HistWithWeight, ColumnType, ColumnTypes...>(
2499+
columnListWithWeights, h, h, fProxiedPtr, columnListWithWeights.size());
2500+
}
24232501
#endif
24242502

24252503
////////////////////////////////////////////////////////////////////////////

tree/dataframe/test/dataframe_hist.cxx

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <ROOT/TestSupport.hxx>
44
#include <ROOT/RDataFrame.hxx>
55
#include <ROOT/RDFHelpers.hxx>
6+
#include <ROOT/RBinWithError.hxx>
67
#include <ROOT/RHist.hxx>
78
#include <ROOT/RRegularAxis.hxx>
89
#include <ROOT/RVariableBinAxis.hxx>
@@ -14,6 +15,7 @@
1415
#include <vector>
1516

1617
using ROOT::RDataFrame;
18+
using ROOT::Experimental::RBinWithError;
1719
using ROOT::Experimental::RHist;
1820
using ROOT::Experimental::RRegularAxis;
1921
using ROOT::Experimental::RVariableBinAxis;
@@ -180,6 +182,91 @@ TEST_P(RDFHist, InvalidNumberOfArgumentsJit)
180182
EXPECT_THROW(dfX.Hist(hist, {"x", "x"}), std::runtime_error);
181183
}
182184

185+
TEST_P(RDFHist, Weight)
186+
{
187+
RDataFrame df(10);
188+
const RRegularAxis axis(10, {5.0, 15.0});
189+
auto hist = df.Define("x", [](ULong64_t e) { return e + 5.5; }, {"rdfentry_"})
190+
.Define("w", [](ULong64_t e) { return 0.1 + e * 0.03; }, {"rdfentry_"})
191+
.Hist</*BinContentType=*/RBinWithError, double, double>({axis}, {"x"}, "w");
192+
EXPECT_EQ(hist->GetNEntries(), 10);
193+
for (auto index : axis.GetNormalRange()) {
194+
auto &bin = hist->GetBinContent(index);
195+
double weight = 0.1 + index.GetIndex() * 0.03;
196+
EXPECT_FLOAT_EQ(bin.fSum, weight);
197+
EXPECT_FLOAT_EQ(bin.fSum2, weight * weight);
198+
}
199+
}
200+
201+
TEST_P(RDFHist, WeightJit)
202+
{
203+
RDataFrame df(10);
204+
const RRegularAxis axis(10, {5.0, 15.0});
205+
auto hist = df.Define("x", "rdfentry_ + 5.5").Define("w", "0.1 + rdfentry_ * 0.03").Hist({axis}, {"x"}, "w");
206+
EXPECT_EQ(hist->GetNEntries(), 10);
207+
for (auto index : axis.GetNormalRange()) {
208+
auto &bin = hist->GetBinContent(index);
209+
double weight = 0.1 + index.GetIndex() * 0.03;
210+
EXPECT_FLOAT_EQ(bin.fSum, weight);
211+
EXPECT_FLOAT_EQ(bin.fSum2, weight * weight);
212+
}
213+
}
214+
215+
TEST_P(RDFHist, PtrWeight)
216+
{
217+
RDataFrame df(10);
218+
auto hist = std::make_shared<RHist<double>>(10, std::make_pair(5.0, 15.0));
219+
auto resPtr = df.Define("x", [](ULong64_t e) { return e + 5.5; }, {"rdfentry_"})
220+
.Define("w", [](ULong64_t e) { return 0.1 + e * 0.03; }, {"rdfentry_"})
221+
.Hist<double, double>(hist, {"x"}, "w");
222+
EXPECT_EQ(hist, resPtr.GetSharedPtr());
223+
EXPECT_EQ(hist->GetNEntries(), 10);
224+
}
225+
226+
TEST_P(RDFHist, PtrWeightJit)
227+
{
228+
RDataFrame df(10);
229+
auto hist = std::make_shared<RHist<double>>(10, std::make_pair(5.0, 15.0));
230+
auto resPtr = df.Define("x", "rdfentry_ + 5.5").Define("w", "0.1 + rdfentry_ * 0.03").Hist(hist, {"x"}, "w");
231+
EXPECT_EQ(hist, resPtr.GetSharedPtr());
232+
EXPECT_EQ(hist->GetNEntries(), 10);
233+
}
234+
235+
TEST_P(RDFHist, WeightInvalidNumberOfArguments)
236+
{
237+
RDataFrame df(10);
238+
const RRegularAxis axis(10, {5.0, 15.0});
239+
auto dfXW = df.Define("x", [](ULong64_t e) { return e + 5.5; }, {"rdfentry_"})
240+
.Define("w", [](ULong64_t e) { return 0.1 + e * 0.03; }, {"rdfentry_"});
241+
try {
242+
// Cannot use EXPECT_THROW because of template arguments...
243+
dfXW.Hist</*BinContentType=*/double, double, double, double>({axis}, {"x", "x"}, "w");
244+
FAIL() << "expected std::runtime_error";
245+
} catch (const std::runtime_error &e) {
246+
// expected
247+
}
248+
249+
auto hist = std::make_shared<RHist<double>>(10, std::make_pair(5.0, 15.0));
250+
try {
251+
// Cannot use EXPECT_THROW because of template arguments...
252+
dfXW.Hist<double, double, double>(hist, {"x", "x"}, "w");
253+
FAIL() << "expected std::runtime_error";
254+
} catch (const std::runtime_error &e) {
255+
// expected
256+
}
257+
}
258+
259+
TEST_P(RDFHist, WeightInvalidNumberOfArgumentsJit)
260+
{
261+
RDataFrame df(10);
262+
const RRegularAxis axis(10, {5.0, 15.0});
263+
auto dfXW = df.Define("x", "rdfentry_ + 5.5").Define("w", "0.1 + rdfentry_ * 0.03");
264+
EXPECT_THROW(dfXW.Hist({axis}, {"x", "x"}, "w"), std::runtime_error);
265+
266+
auto hist = std::make_shared<RHist<double>>(10, std::make_pair(5.0, 15.0));
267+
EXPECT_THROW(dfXW.Hist(hist, {"x", "x"}, "w"), std::runtime_error);
268+
}
269+
183270
INSTANTIATE_TEST_SUITE_P(Seq, RDFHist, ::testing::Values(false));
184271

185272
#ifdef R__USE_IMT

0 commit comments

Comments
 (0)