Skip to content

Commit 82a3a0d

Browse files
committed
[DF][RDatasetSpec] Initial version of friend trees handling
1 parent 11a7f9a commit 82a3a0d

File tree

3 files changed

+69
-10
lines changed

3 files changed

+69
-10
lines changed

tree/dataframe/inc/ROOT/RDF/RDatasetSpec.hxx

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,31 @@ struct RDatasetSpec {
3838
}
3939
};
4040

41+
struct RFriendInfo {
42+
std::vector<std::string> fTreeNames{};
43+
std::vector<std::string> fFileNameGlobs{};
44+
RFriendInfo() {}
45+
46+
RFriendInfo(const std::string &treeName, const std::string &fileName)
47+
: fTreeNames(std::vector<std::string>{treeName}), fFileNameGlobs(std::vector<std::string>{fileName})
48+
{
49+
}
50+
51+
RFriendInfo(const std::string &treeName, const std::vector<std::string> &fileNames)
52+
: fTreeNames(std::vector<std::string>{treeName}), fFileNameGlobs(fileNames)
53+
{
54+
}
55+
56+
RFriendInfo(const std::vector<std::string> &treeNames, const std::vector<std::string> &fileNames)
57+
: fTreeNames(
58+
fileNames.size() != treeNames.size() && treeNames.size() != 1
59+
? throw std::logic_error("RFriendInfo exepcts either N trees and N files, or 1 tree and N files.")
60+
: treeNames),
61+
fFileNameGlobs(fileNames)
62+
{
63+
}
64+
};
65+
4166
/**
4267
* A list of names of trees.
4368
* This list should go in lockstep with fFileNameGlobs, only in case this dataset is a TChain where each file
@@ -49,31 +74,35 @@ struct RDatasetSpec {
4974
* A list of file names.
5075
* They can contain the globbing characters supported by TChain. See TChain::Add for more information.
5176
*/
52-
5377
std::vector<std::string> fFileNameGlobs{};
5478

5579
ULong64_t fStartEntry{}; ///< The entry where the dataset processing should start (inclusive).
5680
ULong64_t fEndEntry{}; ///< The entry where the dataset processing should end (exclusive).
5781

58-
RDatasetSpec(const std::string &treeName, const std::string &fileName, REntryRange entryRange = {})
82+
std::vector<RFriendInfo> fFriendInfos{}; ///< List of friends
83+
84+
RDatasetSpec(const std::string &treeName, const std::string &fileName, REntryRange entryRange = {},
85+
const std::vector<RFriendInfo> &friendInfos = {})
5986
: fTreeNames(std::vector<std::string>{treeName}), fFileNameGlobs(std::vector<std::string>{fileName}),
60-
fStartEntry(entryRange.fStartEntry), fEndEntry(entryRange.fEndEntry)
87+
fStartEntry(entryRange.fStartEntry), fEndEntry(entryRange.fEndEntry), fFriendInfos(friendInfos)
6188
{
6289
}
6390

64-
RDatasetSpec(const std::string &treeName, const std::vector<std::string> &fileNames, REntryRange entryRange = {})
91+
RDatasetSpec(const std::string &treeName, const std::vector<std::string> &fileNames, REntryRange entryRange = {},
92+
const std::vector<RFriendInfo> &friendInfos = {})
6593
: fTreeNames(std::vector<std::string>{treeName}), fFileNameGlobs(fileNames), fStartEntry(entryRange.fStartEntry),
66-
fEndEntry(entryRange.fEndEntry)
94+
fEndEntry(entryRange.fEndEntry), fFriendInfos(friendInfos)
6795
{
6896
}
6997

7098
RDatasetSpec(const std::vector<std::string> &treeNames, const std::vector<std::string> &fileNames,
71-
REntryRange entryRange = {})
99+
REntryRange entryRange = {}, const std::vector<RFriendInfo> &friendInfos = {})
72100
: fTreeNames(
73101
fileNames.size() != treeNames.size() && treeNames.size() != 1
74-
? throw std::runtime_error("RDatasetSpec exepcts either N trees and N files, or 1 tree and N files.")
102+
? throw std::logic_error("RDatasetSpec exepcts either N trees and N files, or 1 tree and N files.")
75103
: treeNames),
76-
fFileNameGlobs(fileNames), fStartEntry(entryRange.fStartEntry), fEndEntry(entryRange.fEndEntry)
104+
fFileNameGlobs(fileNames), fStartEntry(entryRange.fStartEntry), fEndEntry(entryRange.fEndEntry),
105+
fFriendInfos(friendInfos)
77106
{
78107
}
79108
};

tree/dataframe/src/RLoopManager.cxx

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,17 @@ RLoopManager::RLoopManager(const ROOT::RDF::RDatasetSpec &spec)
370370
chain->Add(fullpath.c_str());
371371
}
372372
SetTree(chain);
373+
for (auto i = 0u; i < spec.fFriendInfos.size(); ++i) {
374+
auto temp_friend_chain_name = "f" + std::to_string(i);
375+
auto temp_friend_chain = std::make_shared<TChain>(temp_friend_chain_name.c_str());
376+
for (auto j = 0u; j < spec.fFriendInfos[i].fFileNameGlobs[j].size(); ++j) {
377+
const auto fullpath = spec.fFriendInfos[i].fFileNameGlobs[j] + "?#" +
378+
spec.fFriendInfos[i].fTreeNames[spec.fFriendInfos[i].fTreeNames.size() == 1 ? 0 : j];
379+
temp_friend_chain->Add(fullpath.c_str());
380+
}
381+
SetTree(temp_friend_chain);
382+
chain->AddFriend(temp_friend_chain_name.c_str());
383+
}
373384
}
374385

375386
struct RSlotRAII {

tree/dataframe/test/dataframe_datasetspec.cxx

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,11 @@ TEST(RDFDatasetSpec, SingleFileSingleColConstructor)
5252
EXPECT_THROW(
5353
try {
5454
RDatasetSpec({"tree"s, "anothertree"s}, {"file.root"s}, {2, 4});
55-
} catch (const std::runtime_error &err) {
55+
} catch (const std::logic_error &err) {
5656
EXPECT_EQ(std::string(err.what()), "RDatasetSpec exepcts either N trees and N files, or 1 tree and N files.");
5757
throw;
5858
},
59-
std::runtime_error);
59+
std::logic_error);
6060

6161
// specify range [2, 2) (3 is a valid index) => range is disregarded
6262
const auto dfRDS7 = RDataFrame(RDatasetSpec("tree", "file.root", {2, 2})).Display<int>({"x"})->AsString();
@@ -281,3 +281,22 @@ TEST(RDFDatasetSpec, MultipleFiles)
281281

282282
gSystem->Exec("rm file0.root file1.root file2.root");
283283
}
284+
285+
// TODO: test the friends
286+
/*
287+
TEST(RDFDatasetSpec, FriendTrees)
288+
{
289+
auto dfWriter0 = RDataFrame(3)
290+
.Define("x", [](ULong64_t e) { return int(e); }, {"rdfentry_"})
291+
.Define("y", [](ULong64_t e) { return int(e) + 1; }, {"rdfentry_"});
292+
dfWriter0.Snapshot<int, int>("treeA", "file0.root", {"x", "y"});
293+
dfWriter0.Snapshot<int, int>("treeA", "file1.root", {"x", "y"});
294+
dfWriter0.Snapshot<int, int>("treeB", "file2.root", {"x", "y"}); // different tree's name
295+
auto dfWriter1 = RDataFrame(2)
296+
.Define("x", [](ULong64_t e) { return int(e) + 2; }, {"rdfentry_"})
297+
.Define("y", [](ULong64_t e) { return int(e) + 3; }, {"rdfentry_"});
298+
dfWriter0.Snapshot<int, int>("treeA", "file3.root", {"x", "y"});
299+
dfWriter0.Snapshot<int, int>("treeB", "file4.root", {"x", "y"});
300+
dfWriter0.Snapshot<int, int>("treeC", "file5.root", {"x", "y"});
301+
}
302+
*/

0 commit comments

Comments
 (0)