-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ntuple] Add initial in-memory index prototype
This PR adds (a first version of) the `RNTupleIndex`, which is an in-memory structure that maps RNTuple field values (or combinations thereof) to an entry index in the RNTuple for which the index was built. At this point, the index only resides in memory and thus has to be (re)build each time. `RNTupleIndex` will be used by the `RNTupleProcessor` to enable dataset joins and will be as transparent as possible to users. Currently, no public interface is foreseen.
- Loading branch information
Showing
6 changed files
with
404 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
/// \file ROOT/RNTupleIndex.hxx | ||
/// \ingroup NTuple ROOT7 | ||
/// \author Florine de Geus <florine.de.geus@cern.ch> | ||
/// \date 2024-04-02 | ||
/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback | ||
/// is welcome! | ||
|
||
/************************************************************************* | ||
* Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. * | ||
* All rights reserved. * | ||
* * | ||
* For the licensing terms see $ROOTSYS/LICENSE. * | ||
* For the list of contributors see $ROOTSYS/README/CREDITS. * | ||
*************************************************************************/ | ||
|
||
#ifndef ROOT7_RNTupleIndex | ||
#define ROOT7_RNTupleIndex | ||
|
||
#include <ROOT/RField.hxx> | ||
#include <ROOT/RNTupleUtil.hxx> | ||
|
||
#include <unordered_map> | ||
#include <memory> | ||
#include <set> | ||
#include <string> | ||
#include <vector> | ||
|
||
namespace ROOT { | ||
namespace Experimental { | ||
namespace Internal { | ||
///////////////////////////////////////////////////////////////////////////// | ||
/// Container for the combined hash of the indexed fields. Uses the implementation from `boost::hash_combine` (see | ||
/// https://www.boost.org/doc/libs/1_55_0/doc/html/hash/reference.html#boost.hash_combine). | ||
struct RIndexValue { | ||
std::size_t fValue = 0; | ||
|
||
void operator+=(std::size_t other) { fValue ^= other + 0x9e3779b9 + (fValue << 6) + (fValue >> 2); } | ||
inline bool operator==(const RIndexValue &other) const { return other.fValue == fValue; } | ||
inline size_t operator()(const ROOT::Experimental::Internal::RIndexValue &val) const { return val.fValue; } | ||
}; | ||
|
||
// clang-format off | ||
/** | ||
\class ROOT::Experimental::Internal::RNTupleIndex | ||
\ingroup NTuple | ||
\brief Build an index for an RNTuple so it can be joined onto other RNTuples. | ||
*/ | ||
// clang-format on | ||
class RNTupleIndex { | ||
friend std::unique_ptr<RNTupleIndex> | ||
CreateRNTupleIndex(std::vector<std::string_view> fieldNames, RPageSource &pageSource); | ||
|
||
private: | ||
std::vector<std::unique_ptr<RFieldBase>> fFields; | ||
std::unordered_map<RIndexValue, std::vector<NTupleSize_t>, RIndexValue> fIndex; | ||
|
||
///////////////////////////////////////////////////////////////////////////// | ||
/// \brief Create an RNTupleIndex for an existing RNTuple. | ||
/// | ||
/// \param[in] The fields that will make up the index. | ||
/// \param[in] The page source of the RNTuple to build the index for. | ||
/// | ||
/// \note The page source is assumed be attached already. | ||
RNTupleIndex(std::vector<std::unique_ptr<RFieldBase>> fields, RPageSource &pageSource); | ||
|
||
public: | ||
RNTupleIndex(const RNTupleIndex &other) = delete; | ||
RNTupleIndex &operator=(const RNTupleIndex &other) = delete; | ||
RNTupleIndex(RNTupleIndex &&other) = delete; | ||
RNTupleIndex &operator=(RNTupleIndex &&other) = delete; | ||
|
||
std::size_t GetNElems() const { return fIndex.size(); } | ||
|
||
void Add(std::vector<void *> valuePtrs, NTupleSize_t entry); | ||
|
||
///////////////////////////////////////////////////////////////////////////// | ||
/// \brief Get the entry number containing the given index value. | ||
/// | ||
/// \param[in] value The indexed value | ||
/// \return The entry number, containing the specified index value. When no such entry exists, return | ||
/// `kInvalidNTupleIndex` | ||
/// | ||
/// Note that in case multiple entries corresponding to the provided index value exist, the first occurrence is | ||
/// returned. Use RNTupleIndex::GetEntryIndices to get all entries. | ||
NTupleSize_t GetEntryIndex(std::vector<void *> valuePtrs) const; | ||
|
||
///////////////////////////////////////////////////////////////////////////// | ||
/// \brief Get the entry number containing the given index value. | ||
/// | ||
/// \sa GetEntryIndex(std::vector<void *> valuePtrs) | ||
template <typename... Ts> | ||
NTupleSize_t GetEntryIndex(Ts... values) const | ||
{ | ||
if (sizeof...(Ts) != fFields.size()) | ||
throw RException(R__FAIL("number of value pointers must match number of indexed fields")); | ||
|
||
std::vector<void *> valuePtrs; | ||
([&] { valuePtrs.push_back(&values); }(), ...); | ||
|
||
return GetEntryIndex(valuePtrs); | ||
} | ||
|
||
///////////////////////////////////////////////////////////////////////////// | ||
/// \brief Get all entry numbers for the given index. | ||
/// | ||
/// \param[in] value The indexed value | ||
/// \return The entry numbers containing the specified index value. When no entries exists, return an empty vector. | ||
std::vector<NTupleSize_t> GetEntryIndices(std::vector<void *> valuePtrs) const; | ||
|
||
///////////////////////////////////////////////////////////////////////////// | ||
/// \brief Get all entry numbers for the given index. | ||
/// | ||
/// \sa GetEntryIndices(std::vector<void *> valuePtrs) | ||
template <typename... Ts> | ||
std::vector<NTupleSize_t> GetEntryIndices(Ts... values) const | ||
{ | ||
if (sizeof...(Ts) != fFields.size()) | ||
throw RException(R__FAIL("number of value pointers must match number of indexed fields")); | ||
|
||
std::vector<void *> valuePtrs; | ||
([&] { valuePtrs.push_back(&values); }(), ...); | ||
|
||
return GetEntryIndices(valuePtrs); | ||
} | ||
}; | ||
|
||
//////////////////////////////////////////////////////////////////////////////// | ||
/// \brief Create an RNTupleIndex from an existing RNTuple. | ||
/// | ||
/// \param[in] fieldNames The names of the fields to index. | ||
/// \param pageSource The page source. | ||
/// | ||
/// \return A pointer to the newly-created index. | ||
/// | ||
std::unique_ptr<RNTupleIndex> CreateRNTupleIndex(std::vector<std::string_view> fieldNames, RPageSource &pageSource); | ||
|
||
} // namespace Internal | ||
} // namespace Experimental | ||
} // namespace ROOT | ||
|
||
#endif // ROOT7_RNTupleIndex |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
/// \file RNTupleIndex.cxx | ||
/// \ingroup NTuple ROOT7 | ||
/// \author Florine de Geus <florine.de.geus@cern.ch> | ||
/// \date 2024-04-02 | ||
/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback | ||
/// is welcome! | ||
|
||
/************************************************************************* | ||
* Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. * | ||
* All rights reserved. * | ||
* * | ||
* For the licensing terms see $ROOTSYS/LICENSE. * | ||
* For the list of contributors see $ROOTSYS/README/CREDITS. * | ||
*************************************************************************/ | ||
|
||
#include <ROOT/RNTupleIndex.hxx> | ||
|
||
ROOT::Experimental::Internal::RNTupleIndex::RNTupleIndex(std::vector<std::unique_ptr<RFieldBase>> fields, | ||
RPageSource &pageSource) | ||
: fFields(std::move(fields)) | ||
{ | ||
std::vector<RFieldBase::RValue> fieldValues; | ||
for (const auto &field : fFields) { | ||
fieldValues.emplace_back(field->CreateValue()); | ||
} | ||
|
||
for (std::uint64_t i = 0; i < pageSource.GetNEntries(); ++i) { | ||
std::vector<void *> ptrs; | ||
for (auto &fieldValue : fieldValues) { | ||
fieldValue.Read(i); | ||
ptrs.push_back(fieldValue.GetPtr<void>().get()); | ||
} | ||
Add(ptrs, i); | ||
} | ||
} | ||
|
||
void ROOT::Experimental::Internal::RNTupleIndex::Add(std::vector<void *> valuePtrs, NTupleSize_t entry) | ||
{ | ||
RIndexValue indexValue; | ||
for (unsigned i = 0; i < fFields.size(); ++i) { | ||
indexValue += fFields[i]->GetHash(valuePtrs[i]); | ||
} | ||
fIndex[indexValue].push_back(entry); | ||
} | ||
|
||
ROOT::Experimental::NTupleSize_t | ||
ROOT::Experimental::Internal::RNTupleIndex::GetEntryIndex(std::vector<void *> valuePtrs) const | ||
{ | ||
auto entryIndices = GetEntryIndices(valuePtrs); | ||
if (entryIndices.empty()) | ||
return kInvalidNTupleIndex; | ||
return entryIndices.front(); | ||
} | ||
|
||
std::vector<ROOT::Experimental::NTupleSize_t> | ||
ROOT::Experimental::Internal::RNTupleIndex::GetEntryIndices(std::vector<void *> valuePtrs) const | ||
{ | ||
RIndexValue indexValue; | ||
for (unsigned i = 0; i < fFields.size(); ++i) { | ||
indexValue += fFields[i]->GetHash(valuePtrs[i]); | ||
} | ||
|
||
if (!fIndex.count(indexValue)) | ||
return {}; | ||
|
||
return fIndex.at(indexValue); | ||
} | ||
|
||
//------------------------------------------------------------------------------ | ||
|
||
std::unique_ptr<ROOT::Experimental::Internal::RNTupleIndex> | ||
ROOT::Experimental::Internal::CreateRNTupleIndex(std::vector<std::string_view> fieldNames, RPageSource &pageSource) | ||
{ | ||
pageSource.Attach(); | ||
auto desc = pageSource.GetSharedDescriptorGuard(); | ||
|
||
std::vector<std::unique_ptr<RFieldBase>> fields; | ||
|
||
for (const auto &fieldName : fieldNames) { | ||
auto fieldId = desc->FindFieldId(fieldName); | ||
if (fieldId == kInvalidDescriptorId) | ||
throw RException(R__FAIL("could not find field \"" + std::string(fieldName) + "")); | ||
|
||
const auto &fieldDesc = desc->GetFieldDescriptor(fieldId); | ||
auto fieldOrException = RFieldBase::Create(fieldDesc.GetFieldName(), fieldDesc.GetTypeName()); | ||
if (!fieldOrException) { | ||
throw RException(R__FAIL("could not construct field \"" + std::string(fieldName) + "\"")); | ||
} | ||
auto field = fieldOrException.Unwrap(); | ||
field->SetOnDiskId(fieldDesc.GetId()); | ||
|
||
CallConnectPageSourceOnField(*field, pageSource); | ||
|
||
fields.push_back(std::move(field)); | ||
} | ||
|
||
return std::unique_ptr<RNTupleIndex>(new RNTupleIndex(std::move(fields), pageSource)); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.