Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions tree/dataframe/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ if (imt)
list(APPEND RDATAFRAME_EXTRA_DEPS Imt)
endif(imt)

if (root7)
list(APPEND RDATAFRAME_EXTRA_DEPS ROOTHist)
endif()

set (EXTRA_DICT_OPTS)
if (runtime_cxxmodules AND WIN32)
set (EXTRA_DICT_OPTS NO_CXXMODULE)
Expand Down
68 changes: 68 additions & 0 deletions tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@
#include "ROOT/RDF/RActionImpl.hxx"
#include "ROOT/RDF/RMergeableValue.hxx"

#include "RConfigure.h" // for R__HAS_ROOT7
#ifdef R__HAS_ROOT7
#include <ROOT/RHist.hxx>
#include <ROOT/RHistConcurrentFiller.hxx>
#include <ROOT/RWeight.hxx>
#endif

#include <algorithm>
#include <array>
#include <limits>
Expand All @@ -40,6 +47,7 @@
#include <stdexcept>
#include <string>
#include <string_view>
#include <tuple>
#include <type_traits>
#include <utility> // std::index_sequence
#include <vector>
Expand Down Expand Up @@ -469,6 +477,66 @@ public:
}
};

#ifdef R__HAS_ROOT7
template <typename BinContentType, bool WithWeight = false>
class R__CLING_PTRCHECK(off) RHistFillHelper
: public ROOT::Detail::RDF::RActionImpl<RHistFillHelper<BinContentType, WithWeight>> {
public:
using Result_t = ROOT::Experimental::RHist<BinContentType>;

private:
std::unique_ptr<ROOT::Experimental::RHistConcurrentFiller<BinContentType>> fFiller;
std::vector<std::shared_ptr<ROOT::Experimental::RHistFillContext<BinContentType>>> fContexts;

public:
RHistFillHelper(std::shared_ptr<ROOT::Experimental::RHist<BinContentType>> h, unsigned int nSlots)
: fFiller(new ROOT::Experimental::RHistConcurrentFiller<BinContentType>(h)), fContexts(nSlots)
{
for (unsigned int i = 0; i < nSlots; i++) {
fContexts[i] = fFiller->CreateFillContext();
}
}
RHistFillHelper(const RHistFillHelper &) = delete;
RHistFillHelper(RHistFillHelper &&) = default;
RHistFillHelper &operator=(const RHistFillHelper &) = delete;
RHistFillHelper &operator=(RHistFillHelper &&) = default;
~RHistFillHelper() = default;

std::shared_ptr<Result_t> GetResultPtr() const { return fFiller.GetHist(); }

void Initialize() {}
void InitTask(TTreeReader *, unsigned int) {}

template <typename... ColumnTypes, const std::size_t... I>
void ExecWithWeight(unsigned int slot, const std::tuple<ColumnTypes...> &columnValues, std::index_sequence<I...>)
{
std::tuple args{std::get<I>(columnValues)...};
ROOT::Experimental::RWeight weight(std::get<sizeof...(ColumnTypes) - 1>(columnValues));
fContexts[slot]->Fill(args, weight);
}

template <typename... ColumnTypes>
void Exec(unsigned int slot, const ColumnTypes &...columnValues)
{
if constexpr (WithWeight) {
auto t = std::forward_as_tuple(columnValues...);
ExecWithWeight(slot, t, std::make_index_sequence<sizeof...(ColumnTypes) - 1>());
} else {
fContexts[slot]->Fill(columnValues...);
}
}

void Finalize()
{
for (auto &&context : fContexts) {
context->Flush();
}
}

std::string GetActionName() { return "Hist"; }
};
#endif

class R__CLING_PTRCHECK(off) FillTGraphHelper : public ROOT::Detail::RDF::RActionImpl<FillTGraphHelper> {
public:
using Result_t = ::TGraph;
Expand Down
29 changes: 29 additions & 0 deletions tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <string_view>
#include <ROOT/RDF/RVariation.hxx>
#include <ROOT/TypeTraits.hxx>
#include <RConfigure.h> // for R__HAS_ROOT7
#include <TError.h> // gErrorIgnoreLevel
#include <TH1.h>
#include <TROOT.h> // IsImplicitMTEnabled
Expand Down Expand Up @@ -90,6 +91,8 @@ struct Histo2D{};
struct Histo3D{};
struct HistoND{};
struct HistoNSparseD{};
struct Hist{};
struct HistWithWeight{};
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having a second tag felt like the easiest solution, together with just a template argument bool WithWeight = false for RHistFillHelper. Other approaches are certainly possible...

struct Graph{};
struct GraphAsymmErrors{};
struct Profile1D{};
Expand Down Expand Up @@ -171,6 +174,32 @@ BuildAction(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &h,
}
}

#ifdef R__HAS_ROOT7
// Action for RHist using RHistConcurrentFiller
template <typename... ColTypes, typename BinContentType, typename PrevNodeType>
std::unique_ptr<RActionBase>
BuildAction(const ColumnNames_t &columnList, const std::shared_ptr<ROOT::Experimental::RHist<BinContentType>> &h,
const unsigned int nSlots, std::shared_ptr<PrevNodeType> prevNode, ActionTags::Hist,
const RColumnRegister &colRegister)
{
using Helper_t = RHistFillHelper<BinContentType>;
using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>;
return std::make_unique<Action_t>(Helper_t(h, nSlots), columnList, std::move(prevNode), colRegister);
}

// Action for RHist using RHistConcurrentFiller
template <typename... ColTypes, typename BinContentType, typename PrevNodeType>
std::unique_ptr<RActionBase>
BuildAction(const ColumnNames_t &columnList, const std::shared_ptr<ROOT::Experimental::RHist<BinContentType>> &h,
const unsigned int nSlots, std::shared_ptr<PrevNodeType> prevNode, ActionTags::HistWithWeight,
const RColumnRegister &colRegister)
{
using Helper_t = RHistFillHelper<BinContentType, true>;
using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>;
return std::make_unique<Action_t>(Helper_t(h, nSlots), columnList, std::move(prevNode), colRegister);
}
#endif

template <typename... ColTypes, typename PrevNodeType>
std::unique_ptr<RActionBase>
BuildAction(const ColumnNames_t &bl, const std::shared_ptr<TGraph> &g, const unsigned int nSlots,
Expand Down
143 changes: 143 additions & 0 deletions tree/dataframe/inc/ROOT/RDF/RInterface.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@
#include "TProfile2D.h"
#include "TStatistic.h"

#include "RConfigure.h" // for R__HAS_ROOT7
#ifdef R__HAS_ROOT7
#include <ROOT/RBinWithError.hxx>
#include <ROOT/RHist.hxx>
#include <ROOT/RHistEngine.hxx>
#endif

#include <algorithm>
#include <cstddef>
#include <initializer_list>
Expand Down Expand Up @@ -2357,6 +2364,142 @@ public:
columnList, h, h, fProxiedPtr, columnList.size());
}

#ifdef R__HAS_ROOT7
////////////////////////////////////////////////////////////////////////////
/// \brief Fill and return an RHist (*lazy action*).
/// \tparam BinContentType The bin content type of the returned RHist.
/// \param[in] axes The returned histogram will be constructed using these axes.
/// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
/// \return the histogram wrapped in a RResultPtr.
///
/// This action is *lazy*: upon invocation of this method the calculation is
/// booked but not executed. Also see RResultPtr.
///
/// ### Example usage:
/// ~~~{.cpp}
/// ROOT::Experimental::RRegularAxis axis(10, {5.0, 15.0});
/// auto myHist = myDf.Hist({axis}, {"col0"});
/// ~~~
template <typename BinContentType = double, typename ColumnType = RDFDetail::RInferredType, typename... ColumnTypes>
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One could also argue for BinContentType = int or long as default since it's unweighted filling. However, then a number of "post-processing steps" require conversion, for example scaling...

RResultPtr<ROOT::Experimental::RHist<BinContentType>>
Hist(std::vector<ROOT::Experimental::RAxisVariant> axes, const ColumnNames_t &columnList)
{
std::shared_ptr h = std::make_shared<ROOT::Experimental::RHist<BinContentType>>(std::move(axes));
if (h->GetNDimensions() != columnList.size()) {
throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In histv7, we are consistently throwing std::invalid_argument while RDF prefers std::runtime_error. I think we have to decide which "consistency" to follow...

}

return Hist<ColumnType, ColumnTypes...>(h, columnList);
}

////////////////////////////////////////////////////////////////////////////
/// \brief Fill the provided RHist (*lazy action*).
/// \param[in] h The histogram that should be filled.
/// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
/// \return the histogram wrapped in a RResultPtr.
///
/// This action is *lazy*: upon invocation of this method the calculation is
/// booked but not executed. Also see RResultPtr.
///
/// During execution of the computation graph, the passed histogram must only be accessed with methods that are
/// allowed during concurrent filling.
///
/// ### Example usage:
/// ~~~{.cpp}
/// auto h = std::make_shared<ROOT::Experimental::RHist<double>>(10, {5.0, 15.0});
/// auto myHist = myDf.Hist(h, {"col0"});
/// ~~~
template <typename ColumnType = RDFDetail::RInferredType, typename... ColumnTypes, typename BinContentType>
RResultPtr<ROOT::Experimental::RHist<BinContentType>>
Hist(std::shared_ptr<ROOT::Experimental::RHist<BinContentType>> h, const ColumnNames_t &columnList)
Comment on lines +2407 to +2414
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe we should market this overload as "bring your own histogram" 😅

Jokes aside, this will enable some nice use cases, for example filling a shared histogram from multiple computation graphs, as demonstrated by the RDFHist.PtrRunGraphs unit test.

{
RDFInternal::WarnHist();

if (h->GetNDimensions() != columnList.size()) {
throw std::runtime_error("Wrong number of columns for the passed histogram.");
}

return CreateAction<RDFInternal::ActionTags::Hist, ColumnType, ColumnTypes...>(columnList, h, h, fProxiedPtr,
columnList.size());
}

////////////////////////////////////////////////////////////////////////////
/// \brief Fill and return an RHist with weights (*lazy action*).
/// \tparam BinContentType The bin content type of the returned RHist.
/// \param[in] axes The returned histogram will be constructed using these axes.
/// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
/// \param[in] wName The name of the column that will provide the weights.
/// \return the histogram wrapped in a RResultPtr.
///
/// This action is *lazy*: upon invocation of this method the calculation is
/// booked but not executed. Also see RResultPtr.
///
/// This overload is not available for integral bin content types (see \ref RHistEngine::SupportsWeightedFilling).
///
/// ### Example usage:
/// ~~~{.cpp}
/// ROOT::Experimental::RRegularAxis axis(10, {5.0, 15.0});
/// auto myHist = myDf.Hist({axis}, {"col0"}, "colW");
/// ~~~
template <typename BinContentType = ROOT::Experimental::RBinWithError,
typename ColumnType = RDFDetail::RInferredType, typename... ColumnTypes>
RResultPtr<ROOT::Experimental::RHist<BinContentType>>
Hist(std::vector<ROOT::Experimental::RAxisVariant> axes, const ColumnNames_t &columnList, std::string_view wName)
{
static_assert(ROOT::Experimental::RHistEngine<BinContentType>::SupportsWeightedFilling,
"weighted filling is not supported for integral bin content types");

std::shared_ptr h = std::make_shared<ROOT::Experimental::RHist<BinContentType>>(std::move(axes));
if (h->GetNDimensions() != columnList.size()) {
throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
}

return Hist<ColumnType, ColumnTypes...>(h, columnList, wName);
}

////////////////////////////////////////////////////////////////////////////
/// \brief Fill the provided RHist with weights (*lazy action*).
/// \param[in] h The histogram that should be filled.
/// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
/// \param[in] wName The name of the column that will provide the weights.
/// \return the histogram wrapped in a RResultPtr.
///
/// This action is *lazy*: upon invocation of this method the calculation is
/// booked but not executed. Also see RResultPtr.
///
/// This overload is not available for integral bin content types (see \ref RHistEngine::SupportsWeightedFilling).
///
/// During execution of the computation graph, the passed histogram must only be accessed with methods that are
/// allowed during concurrent filling.
///
/// ### Example usage:
/// ~~~{.cpp}
/// auto h = std::make_shared<ROOT::Experimental::RHist<double>>(10, {5.0, 15.0});
/// auto myHist = myDf.Hist(h, {"col0"}, "colW");
/// ~~~
template <typename ColumnType = RDFDetail::RInferredType, typename... ColumnTypes, typename BinContentType>
RResultPtr<ROOT::Experimental::RHist<BinContentType>>
Hist(std::shared_ptr<ROOT::Experimental::RHist<BinContentType>> h, const ColumnNames_t &columnList,
std::string_view wName)
{
static_assert(ROOT::Experimental::RHistEngine<BinContentType>::SupportsWeightedFilling,
"weighted filling is not supported for integral bin content types");

RDFInternal::WarnHist();

if (h->GetNDimensions() != columnList.size()) {
throw std::runtime_error("Wrong number of columns for the passed histogram.");
}

// Add the weight column to the list of argument columns to pass it through the infrastructure.
ColumnNames_t columnListWithWeights(columnList);
columnListWithWeights.push_back(std::string(wName));

return CreateAction<RDFInternal::ActionTags::HistWithWeight, ColumnType, ColumnTypes...>(
columnListWithWeights, h, h, fProxiedPtr, columnListWithWeights.size());
}
#endif

////////////////////////////////////////////////////////////////////////////
/// \brief Fill and return a TGraph object (*lazy action*).
/// \tparam X The type of the column used to fill the x axis.
Expand Down
3 changes: 3 additions & 0 deletions tree/dataframe/inc/ROOT/RDF/Utils.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ struct RInferredType {
namespace Internal {
namespace RDF {

/// Warn once about experimental filling of RHist.
void WarnHist();

using namespace ROOT::TypeTraits;
using namespace ROOT::Detail::RDF;
using namespace ROOT::RDF;
Expand Down
13 changes: 13 additions & 0 deletions tree/dataframe/src/RDFUtils.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "TTree.h"

#include <fstream>
#include <mutex>
#include <nlohmann/json.hpp> // nlohmann::json::parse
#include <stdexcept>
#include <string>
Expand All @@ -45,6 +46,18 @@ ROOT::RLogChannel &ROOT::Detail::RDF::RDFLogChannel()
return c;
}

// A static function, not in an anonymous namespace, because the function name is included in the user-visible message.
static void WarnHist()
{
R__LOG_WARNING(RDFLogChannel()) << "Filling RHist is experimental and still under development.";
}

void ROOT::Internal::RDF::WarnHist()
{
static std::once_flag once;
std::call_once(once, ::WarnHist);
}

namespace {
using TypeInfoRef = std::reference_wrapper<const std::type_info>;
struct TypeInfoRefHash {
Expand Down
4 changes: 4 additions & 0 deletions tree/dataframe/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,10 @@ if (imt)
ROOT_ADD_GTEST(dataframe_concurrency dataframe_concurrency.cxx LIBRARIES ROOTDataFrame)
endif()

if (root7)
ROOT_ADD_GTEST(dataframe_hist dataframe_hist.cxx LIBRARIES ROOTDataFrame ROOTHist)
endif()

if(ARROW_FOUND)
ROOT_ADD_GTEST(datasource_arrow datasource_arrow.cxx LIBRARIES ROOTDataFrame ${ARROW_SHARED_LIB})
target_include_directories(datasource_arrow BEFORE PRIVATE ${ARROW_INCLUDE_DIR})
Expand Down
Loading
Loading