Skip to content

Lift template requirement for TTree Snapshot #19006

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions tree/dataframe/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTDataFrame
ROOT/RDF/InterfaceUtils.hxx
ROOT/RDF/RActionBase.hxx
ROOT/RDF/RAction.hxx
ROOT/RDF/RActionSnapshot.hxx
ROOT/RDF/RActionImpl.hxx
ROOT/RDF/RColumnRegister.hxx
ROOT/RDF/RNewSampleNotifier.hxx
Expand Down Expand Up @@ -108,6 +109,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTDataFrame
src/RDFActionHelpers.cxx
src/RDFColumnReaderUtils.cxx
src/RDFColumnRegister.cxx
src/RDFColumnReaderUtils.cxx
src/RDFDisplay.cxx
src/RDFGraphUtils.cxx
src/RDFHistoModels.cxx
Expand Down
165 changes: 157 additions & 8 deletions tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ using Hist_t = ::TH1D;
class RBranchSet {
std::vector<TBranch *> fBranches;
std::vector<std::string> fNames;
std::vector<bool> fIsCArray;

public:
TBranch *Get(const std::string &name) const
Expand All @@ -91,7 +92,14 @@ public:
return fBranches[std::distance(fNames.begin(), it)];
}

void Insert(const std::string &name, TBranch *address)
bool IsCArray(const std::string &name) const
{
if (auto it = std::find(fNames.begin(), fNames.end(), name); it != fNames.end())
return fIsCArray[std::distance(fNames.begin(), it)];
return false;
}

void Insert(const std::string &name, TBranch *address, bool isCArray = false)
{
if (address == nullptr) {
throw std::logic_error("Trying to insert a null branch address.");
Expand All @@ -104,12 +112,14 @@ public:
}
fNames.emplace_back(name);
fBranches.emplace_back(address);
fIsCArray.push_back(isCArray);
}

void Clear()
{
fBranches.clear();
fNames.clear();
fIsCArray.clear();
}

void AssertNoNullBranchAddresses()
Expand Down Expand Up @@ -1613,9 +1623,10 @@ void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &i
}
}

void SetEmptyBranchesHelper(TTree *inputTree, TTree &outputTree, RBranchSet &outputBranches,
const std::string &inputBranchName, const std::string &outputBranchName,
const std::type_info &typeInfo, int basketSize);
void SetBranchesHelper(TTree *inputTree, TTree &outputTree, RBranchSet &outputBranches, int basketSize,
const std::string &inputBranchName, const std::string &outputBranchName,
const std::type_info &valueTypeID, void *valueAddress, TBranch *&actionHelperBranchPtr,
void *&actionHelperBranchPtrAddress);

/// Ensure that the TTree with the resulting snapshot can be written to the target TFile. This means checking that the
/// TFile can be opened in the mode specified in `opts`, deleting any existing TTrees in case
Expand Down Expand Up @@ -1736,9 +1747,13 @@ public:
void SetEmptyBranches(TTree *inputTree, TTree &outputTree, std::index_sequence<S...>)
{
RBranchSet outputBranches{};
void *dummyValueAddress{};
TBranch *dummyTBranchPtr{};
void *dummyTBranchAddress{};
// We use the expander trick rather than a fold expression to avoid incurring in the bracket depth limit of clang
int expander[] = {(SetEmptyBranchesHelper(inputTree, outputTree, outputBranches, fInputBranchNames[S],
fOutputBranchNames[S], typeid(ColTypes), fOptions.fBasketSize),
int expander[] = {(SetBranchesHelper(inputTree, outputTree, outputBranches, fOptions.fBasketSize,
fInputBranchNames[S], fOutputBranchNames[S], typeid(ColTypes),
dummyValueAddress, dummyTBranchPtr, dummyTBranchAddress),
0)...,
0};
(void)expander;
Expand Down Expand Up @@ -1983,10 +1998,14 @@ public:
template <std::size_t... S>
void SetEmptyBranches(TTree *inputTree, TTree &outputTree, std::index_sequence<S...>)
{
void *dummyValueAddress{};
TBranch *dummyTBranchPtr{};
void *dummyTBranchAddress{};
RBranchSet outputBranches{};
// We use the expander trick rather than a fold expression to avoid incurring in the bracket depth limit of clang
int expander[] = {(SetEmptyBranchesHelper(inputTree, outputTree, outputBranches, fInputBranchNames[S],
fOutputBranchNames[S], typeid(ColTypes), fOptions.fBasketSize),
int expander[] = {(SetBranchesHelper(inputTree, outputTree, outputBranches, fOptions.fBasketSize,
fInputBranchNames[S], fOutputBranchNames[S], typeid(ColTypes),
dummyValueAddress, dummyTBranchPtr, dummyTBranchAddress),
0)...,
0};
(void)expander;
Expand Down Expand Up @@ -2228,6 +2247,136 @@ public:
}
};

class R__CLING_PTRCHECK(off) UntypedSnapshotTTreeHelper final : public RActionImpl<UntypedSnapshotTTreeHelper> {
std::string fFileName;
std::string fDirName;
std::string fTreeName;
RSnapshotOptions fOptions;
std::unique_ptr<TFile> fOutputFile;
std::unique_ptr<TTree> fOutputTree; // must be a ptr because TTrees are not copy/move constructible
bool fBranchAddressesNeedReset{true};
ColumnNames_t fInputBranchNames; // This contains the resolved aliases
ColumnNames_t fOutputBranchNames;
TTree *fInputTree = nullptr; // Current input tree. Set at initialization time (`InitTask`)
// TODO we might be able to unify fBranches, fBranchAddresses and fOutputBranches
std::vector<TBranch *> fBranches; // Addresses of branches in output, non-null only for the ones holding C arrays
std::vector<void *> fBranchAddresses; // Addresses of objects associated to output branches
RBranchSet fOutputBranches;
std::vector<bool> fIsDefine;
ROOT::Detail::RDF::RLoopManager *fOutputLoopManager;
ROOT::Detail::RDF::RLoopManager *fInputLoopManager;
std::vector<const std::type_info *> fInputColumnTypeIDs; // Types for the input columns

public:
UntypedSnapshotTTreeHelper(std::string_view filename, std::string_view dirname, std::string_view treename,
const ColumnNames_t &vbnames, const ColumnNames_t &bnames,
const RSnapshotOptions &options, std::vector<bool> &&isDefine,
ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM,
const std::vector<const std::type_info *> &colTypeIDs);

UntypedSnapshotTTreeHelper(const UntypedSnapshotTTreeHelper &) = delete;
UntypedSnapshotTTreeHelper &operator=(const UntypedSnapshotTTreeHelper &) = delete;
UntypedSnapshotTTreeHelper(UntypedSnapshotTTreeHelper &&) = default;
UntypedSnapshotTTreeHelper &operator=(UntypedSnapshotTTreeHelper &&) = default;
~UntypedSnapshotTTreeHelper() final;

void InitTask(TTreeReader *, unsigned int);

void Exec(unsigned int, const std::vector<void *> &values);

void UpdateCArraysPtrs(const std::vector<void *> &values);

void SetBranches(const std::vector<void *> &values);

void SetEmptyBranches(TTree *inputTree, TTree &outputTree);

void Initialize();

void Finalize();

std::string GetActionName() { return "Snapshot"; }

ROOT::RDF::SampleCallback_t GetSampleCallback() final
{
return [this](unsigned int, const RSampleInfo &) mutable { fBranchAddressesNeedReset = true; };
}

UntypedSnapshotTTreeHelper MakeNew(void *newName, std::string_view /*variation*/ = "nominal");
};

class R__CLING_PTRCHECK(off) UntypedSnapshotTTreeHelperMT final : public RActionImpl<UntypedSnapshotTTreeHelperMT> {

// IMT-specific data members

unsigned int fNSlots;
std::unique_ptr<ROOT::TBufferMerger> fMerger; // must use a ptr because TBufferMerger is not movable
std::vector<std::shared_ptr<ROOT::TBufferMergerFile>> fOutputFiles;
std::vector<std::unique_ptr<TTree>> fOutputTrees;
std::vector<int> fBranchAddressesNeedReset; // vector<bool> does not allow concurrent writing of different elements
std::vector<TTree *> fInputTrees; // Current input trees, one per slot. Set at initialization time (`InitTask`)
// Addresses of branches in output per slot, non-null only for the ones holding C arrays
std::vector<std::vector<TBranch *>> fBranches;
// Addresses of objects associated to output branches per slot, non-null only for the ones holding C arrays
std::vector<std::vector<void *>> fBranchAddresses;
std::vector<RBranchSet> fOutputBranches; // Unique set of output branches, one per slot.

// Attributes of the output TTree

std::string fFileName;
std::string fDirName;
std::string fTreeName;
TFile *fOutputFile; // Non-owning view on the output file
RSnapshotOptions fOptions;
std::vector<std::string> fOutputBranchNames;

// Attributes related to the computation graph

ROOT::Detail::RDF::RLoopManager *fOutputLoopManager;
ROOT::Detail::RDF::RLoopManager *fInputLoopManager;
std::vector<std::string> fInputBranchNames; // This contains the resolved aliases
std::vector<const std::type_info *> fInputColumnTypeIDs; // Types for the input columns

std::vector<bool> fIsDefine;

public:
UntypedSnapshotTTreeHelperMT(unsigned int nSlots, std::string_view filename, std::string_view dirname,
std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames,
const RSnapshotOptions &options, std::vector<bool> &&isDefine,
ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM,
const std::vector<const std::type_info *> &colTypeIDs);

UntypedSnapshotTTreeHelperMT(const UntypedSnapshotTTreeHelperMT &) = delete;
UntypedSnapshotTTreeHelperMT &operator=(const UntypedSnapshotTTreeHelperMT &) = delete;
UntypedSnapshotTTreeHelperMT(UntypedSnapshotTTreeHelperMT &&) = default;
UntypedSnapshotTTreeHelperMT &operator=(UntypedSnapshotTTreeHelperMT &&) = default;
~UntypedSnapshotTTreeHelperMT() final;

void InitTask(TTreeReader *r, unsigned int slot);

void FinalizeTask(unsigned int slot);

void Exec(unsigned int slot, const std::vector<void *> &values);

void UpdateCArraysPtrs(unsigned int slot, const std::vector<void *> &values);

void SetBranches(unsigned int slot, const std::vector<void *> &values);

void SetEmptyBranches(TTree *inputTree, TTree &outputTree);

void Initialize();

void Finalize();

std::string GetActionName() { return "Snapshot"; }

ROOT::RDF::SampleCallback_t GetSampleCallback() final
{
return [this](unsigned int slot, const RSampleInfo &) mutable { fBranchAddressesNeedReset[slot] = 1; };
}

UntypedSnapshotTTreeHelperMT MakeNew(void *newName, std::string_view /*variation*/ = "nominal");
};

template <typename Acc, typename Merge, typename R, typename T, typename U,
bool MustCopyAssign = std::is_same<R, U>::value>
class R__CLING_PTRCHECK(off) AggregateHelper
Expand Down
7 changes: 7 additions & 0 deletions tree/dataframe/inc/ROOT/RDF/ColumnReaderUtils.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "RLoopManager.hxx"
#include "RVariationBase.hxx"
#include "RVariationReader.hxx"
#include <ROOT/RDF/Utils.hxx>

#include <ROOT/RDataSource.hxx>
#include <ROOT/TypeTraits.hxx>
Expand Down Expand Up @@ -81,6 +82,12 @@ GetColumnReaders(unsigned int, TTreeReader *, TypeList<>, const RColumnReadersIn
return {};
}

std::vector<RDFDetail::RColumnReaderBase *>
GetUntypedColumnReaders(unsigned int slot, TTreeReader *treeReader, ROOT::Internal::RDF::RColumnRegister &colRegister,
ROOT::Detail::RDF::RLoopManager &lm, const std::vector<std::string> &colNames,
const std::vector<const std::type_info *> &colTypeIDs,
const std::string &variationName = "nominal");

} // namespace RDF
} // namespace Internal
} // namespace ROOT
Expand Down
45 changes: 45 additions & 0 deletions tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include "RColumnRegister.hxx"
#include <ROOT/RDF/RAction.hxx>
#include <ROOT/RDF/RActionSnapshot.hxx>
#include <ROOT/RDF/ActionHelpers.hxx> // for BuildAction
#include <ROOT/RDF/RColumnRegister.hxx>
#include <ROOT/RDF/RDefine.hxx>
Expand Down Expand Up @@ -333,6 +334,46 @@ BuildAction(const ColumnNames_t &colNames, const std::shared_ptr<SnapshotHelperA
return actionPtr;
}

template <typename PrevNodeType>
std::unique_ptr<RActionBase>
BuildAction(const ColumnNames_t &colNames, const std::shared_ptr<SnapshotHelperArgs> &snapHelperArgs,
const unsigned int nSlots, std::shared_ptr<PrevNodeType> prevNode, const RColumnRegister &colRegister,
const std::vector<const std::type_info *> &colTypeIDs)
{
const auto &filename = snapHelperArgs->fFileName;
const auto &dirname = snapHelperArgs->fDirName;
const auto &treename = snapHelperArgs->fTreeName;
const auto &outputColNames = snapHelperArgs->fOutputColNames;
const auto &options = snapHelperArgs->fOptions;
const auto &lmPtr = snapHelperArgs->fOutputLoopManager;
const auto &inputLM = snapHelperArgs->fInputLoopManager;

auto sz = colNames.size();
std::vector<bool> isDefine(sz);
for (auto i = 0u; i < sz; ++i)
isDefine[i] = colRegister.IsDefineOrAlias(colNames[i]);

std::unique_ptr<RActionBase> actionPtr;

if (!ROOT::IsImplicitMTEnabled()) {
// single-thread snapshot
using Helper_t = UntypedSnapshotTTreeHelper;
using Action_t = RActionSnapshot<Helper_t, PrevNodeType>;
actionPtr.reset(new Action_t(Helper_t(filename, dirname, treename, colNames, outputColNames, options,
std::move(isDefine), lmPtr, inputLM, colTypeIDs),
colNames, colTypeIDs, prevNode, colRegister));
} else {
// multi-thread snapshot
using Helper_t = UntypedSnapshotTTreeHelperMT;
using Action_t = RActionSnapshot<Helper_t, PrevNodeType>;
actionPtr.reset(new Action_t(Helper_t(nSlots, filename, dirname, treename, colNames, outputColNames, options,
std::move(isDefine), lmPtr, inputLM, colTypeIDs),
colNames, colTypeIDs, prevNode, colRegister));
}

return actionPtr;
}

// Book with custom helper type
template <typename... ColTypes, typename PrevNodeType, typename Helper_t>
std::unique_ptr<RActionBase>
Expand Down Expand Up @@ -470,6 +511,10 @@ void AddDSColumns(const std::vector<std::string> &requiredCols, RLoopManager &lm
(void)expander{(AddDSColumnsHelper<ColumnTypes>(requiredCols[i], lm, ds, colRegister), ++i)..., 0};
}

void AddDSColumns(const std::vector<std::string> &requiredCols, ROOT::Detail::RDF::RLoopManager &lm,
ROOT::RDF::RDataSource &ds, const std::vector<const std::type_info *> &colTypeIDs,
ROOT::Internal::RDF::RColumnRegister &colRegister);

// this function is meant to be called by the jitted code generated by BookFilterJit
template <typename F, typename PrevNode>
void JitFilterHelper(F &&f, const char **colsPtr, std::size_t colsSize, std::string_view name,
Expand Down
Loading
Loading