Skip to content

Commit

Permalink
[ntuple] Add test for reading an unknown column type
Browse files Browse the repository at this point in the history
  • Loading branch information
silverweed committed Sep 24, 2024
1 parent 0550ae7 commit fe595f6
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 13 deletions.
9 changes: 9 additions & 0 deletions tree/ntuple/v7/inc/ROOT/RColumnElementBase.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,13 @@ enum class EColumnCppType {
kDouble,
kClusterSize,
kColumnSwitch,
kMax
};

inline constexpr EColumnCppType kTestFutureColumn = static_cast<EColumnCppType>((int)EColumnCppType::kMax + 1);

struct RTestFutureColumn {
int dummy;
};

std::unique_ptr<RColumnElementBase> GenerateColumnElement(EColumnCppType cppType, EColumnType colType);
Expand Down Expand Up @@ -160,6 +167,8 @@ std::unique_ptr<RColumnElementBase> RColumnElementBase::Generate(EColumnType typ
return GenerateColumnElement(EColumnCppType::kClusterSize, type);
else if constexpr (std::is_same_v<CppT, RColumnSwitch>)
return GenerateColumnElement(EColumnCppType::kColumnSwitch, type);
else if constexpr (std::is_same_v<CppT, RTestFutureColumn>)
return GenerateColumnElement(kTestFutureColumn, type);
else
static_assert(!sizeof(CppT), "Unsupported Cpp type");
}
Expand Down
4 changes: 4 additions & 0 deletions tree/ntuple/v7/src/RColumnElement.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ ROOT::Experimental::Internal::RColumnElementBase::GetValidBitRange(EColumnType t
case EColumnType::kSplitInt16: return std::make_pair(16, 16);
case EColumnType::kSplitUInt16: return std::make_pair(16, 16);
case EColumnType::kReal32Trunc: return std::make_pair(10, 31);
case kTestFutureType: return std::make_pair(32, 32);
default: assert(false);
}
// never here
Expand Down Expand Up @@ -94,6 +95,7 @@ std::string ROOT::Experimental::Internal::RColumnElementBase::GetTypeName(EColum
case EColumnType::kSplitInt16: return "SplitInt16";
case EColumnType::kSplitUInt16: return "SplitUInt16";
case EColumnType::kReal32Trunc: return "Real32Trunc";
case kTestFutureType: return "TestFutureType";
default: return "UNKNOWN";
}
}
Expand Down Expand Up @@ -134,6 +136,7 @@ ROOT::Experimental::Internal::RColumnElementBase::Generate<void>(EColumnType typ
case EColumnType::kSplitInt16: return std::make_unique<RColumnElement<std::int16_t, EColumnType::kSplitInt16>>();
case EColumnType::kSplitUInt16: return std::make_unique<RColumnElement<std::uint16_t, EColumnType::kSplitUInt16>>();
case EColumnType::kReal32Trunc: return std::make_unique<RColumnElement<float, EColumnType::kReal32Trunc>>();
case kTestFutureType: return std::make_unique<RColumnElement<Internal::RTestFutureColumn, kTestFutureType>>();
default: assert(false);
}
// never here
Expand All @@ -159,6 +162,7 @@ ROOT::Experimental::Internal::GenerateColumnElement(EColumnCppType cppType, ECol
case EColumnCppType::kDouble: return GenerateColumnElementInternal<double>(type);
case EColumnCppType::kClusterSize: return GenerateColumnElementInternal<ClusterSize_t>(type);
case EColumnCppType::kColumnSwitch: return GenerateColumnElementInternal<RColumnSwitch>(type);
case kTestFutureColumn: return GenerateColumnElementInternal<RTestFutureColumn>(type);
default: R__ASSERT(!"Invalid column cpp type");
}
// never here
Expand Down
18 changes: 18 additions & 0 deletions tree/ntuple/v7/src/RColumnElement.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,9 @@ namespace {
using ROOT::Experimental::EColumnType;
using ROOT::Experimental::Internal::RColumnElementBase;

// testing value for an unknown future column type
inline constexpr EColumnType kTestFutureType = static_cast<EColumnType>(int(EColumnType::kMax) + 1);

template <typename CppT, EColumnType>
class RColumnElement;

Expand Down Expand Up @@ -301,6 +304,7 @@ std::unique_ptr<RColumnElementBase> GenerateColumnElementInternal(EColumnType ty
case EColumnType::kSplitInt16: return std::make_unique<RColumnElement<CppT, EColumnType::kSplitInt16>>();
case EColumnType::kSplitUInt16: return std::make_unique<RColumnElement<CppT, EColumnType::kSplitUInt16>>();
case EColumnType::kReal32Trunc: return std::make_unique<RColumnElement<CppT, EColumnType::kReal32Trunc>>();
case kTestFutureType: return std::make_unique<RColumnElement<CppT, kTestFutureType>>();
default: R__ASSERT(false);
}
// never here
Expand Down Expand Up @@ -847,6 +851,20 @@ DECLARE_RCOLUMNELEMENT_SPEC(ROOT::Experimental::ClusterSize_t, EColumnType::kSpl
DECLARE_RCOLUMNELEMENT_SPEC(ROOT::Experimental::ClusterSize_t, EColumnType::kSplitIndex32, 32,
RColumnElementDeltaSplitLE, <std::uint64_t, std::uint32_t>);

template <>
class RColumnElement<ROOT::Experimental::Internal::RTestFutureColumn, kTestFutureType>
final : public RColumnElementBase {
public:
static constexpr bool kIsMappable = false;
static constexpr std::size_t kSize = sizeof(ROOT::Experimental::Internal::RTestFutureColumn);
static constexpr std::size_t kBitsOnStorage = kSize * 8;
RColumnElement() : RColumnElementBase(kSize, kBitsOnStorage) {}

bool IsMappable() const { return kIsMappable; }
void Pack(void *, const void *, std::size_t) const {}
void Unpack(void *, const void *, std::size_t) const {}
};

inline void
RColumnElement<bool, ROOT::Experimental::EColumnType::kBit>::Pack(void *dst, const void *src, std::size_t count) const
{
Expand Down
15 changes: 10 additions & 5 deletions tree/ntuple/v7/src/RNTupleDescriptor.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -822,14 +822,19 @@ ROOT::Experimental::Internal::RColumnDescriptorBuilder::MakeDescriptor() const
return R__FAIL("invalid logical column id");
if (fColumn.GetPhysicalId() == kInvalidDescriptorId)
return R__FAIL("invalid physical column id");
if (fColumn.GetType() == EColumnType::kUnknown)
return R__FAIL("invalid column model");
if (fColumn.GetFieldId() == kInvalidDescriptorId)
return R__FAIL("invalid field id, dangling column");

const auto [minBits, maxBits] = RColumnElementBase::GetValidBitRange(fColumn.GetType());
if (fColumn.GetBitsOnStorage() < minBits || fColumn.GetBitsOnStorage() > maxBits)
return R__FAIL("invalid column bit width");
// NOTE: if the column type is unknown we don't want to fail, as we might be reading an RNTuple
// created with a future version of ROOT. In this case we just skip the valid bit range check,
// as we have no idea what the valid range is.
// In general, reading the metadata of an unknown column is fine, it becomes an error only when
// we try to read the actual data contained in it.
if (fColumn.GetType() != EColumnType::kUnknown) {
const auto [minBits, maxBits] = RColumnElementBase::GetValidBitRange(fColumn.GetType());
if (fColumn.GetBitsOnStorage() < minBits || fColumn.GetBitsOnStorage() > maxBits)
return R__FAIL("invalid column bit width");
}

return fColumn.Clone();
}
Expand Down
10 changes: 9 additions & 1 deletion tree/ntuple/v7/src/RNTupleSerialize.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
#include <ROOT/RNTupleSerialize.hxx>
#include <ROOT/RNTupleUtil.hxx>

#include "RColumnElement.hxx"

#include <RVersion.h>
#include <TBufferFile.h>
#include <TClass.h>
Expand Down Expand Up @@ -686,6 +688,7 @@ ROOT::Experimental::Internal::RNTupleSerializer::SerializeColumnType(ROOT::Exper
case EColumnType::kSplitInt16: return SerializeUInt16(0x1C, buffer);
case EColumnType::kSplitUInt16: return SerializeUInt16(0x15, buffer);
case EColumnType::kReal32Trunc: return SerializeUInt16(0x1D, buffer);
case kTestFutureType: return SerializeUInt16(0x99, buffer);
default: throw RException(R__FAIL("ROOT bug: unexpected column type"));
}
}
Expand All @@ -699,6 +702,7 @@ ROOT::Experimental::Internal::RNTupleSerializer::DeserializeColumnType(const voi
auto result = DeserializeUInt16(buffer, onDiskType);

switch (onDiskType) {
case 0x00: return R__FAIL("unexpected on-disk column type");
case 0x01: type = EColumnType::kIndex64; break;
case 0x02: type = EColumnType::kIndex32; break;
case 0x03: type = EColumnType::kSwitch; break;
Expand Down Expand Up @@ -727,7 +731,11 @@ ROOT::Experimental::Internal::RNTupleSerializer::DeserializeColumnType(const voi
case 0x1C: type = EColumnType::kSplitInt16; break;
case 0x15: type = EColumnType::kSplitUInt16; break;
case 0x1D: type = EColumnType::kReal32Trunc; break;
default: return R__FAIL("unexpected on-disk column type");
// case 0x99 => kTestFutureType missing on purpose
default:
// may be a column type introduced by a future version
type = EColumnType::kUnknown;
break;
}
return result;
}
Expand Down
51 changes: 50 additions & 1 deletion tree/ntuple/v7/test/ntuple_compat.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
#include "ROOT/EExecutionPolicy.hxx"
#include "RXTuple.hxx"
#include <gtest/gtest.h>
#include <memory>
#include <cstdio>

#include "../src/RColumnElement.hxx"

TEST(RNTupleCompat, Epoch)
{
Expand Down Expand Up @@ -65,7 +69,7 @@ TEST(RNTupleCompat, FeatureFlag)
}
}

TEST(RNTupleCompat, FwdCompat_FutureNTuple)
TEST(RNTupleCompat, FwdCompat_FutureNTupleAnchor)
{
using ROOT::Experimental::RXTuple;

Expand Down Expand Up @@ -223,3 +227,48 @@ TEST(RNTupleCompat, NTupleV4)
EXPECT_EQ(ntuple.GetMaxKeySize(), 0);
}
}

template <>
class ROOT::Experimental::RField<ROOT::Experimental::Internal::RTestFutureColumn> final
: public RSimpleField<ROOT::Experimental::Internal::RTestFutureColumn> {
protected:
std::unique_ptr<RFieldBase> CloneImpl(std::string_view newName) const final
{
return std::make_unique<RField>(newName);
}
const RColumnRepresentations &GetColumnRepresentations() const final
{
static const RColumnRepresentations representations{{{kTestFutureType}}, {}};
return representations;
}

public:
static std::string TypeName() { return "FutureColumn"; }
explicit RField(std::string_view name) : RSimpleField(name, TypeName()) {}
RField(RField &&other) = default;
RField &operator=(RField &&other) = default;
~RField() override = default;
};

TEST(RNTupleCompat, FutureColumnType)
{
// Write a RNTuple containing a field with an unknown column type and verify we can
// read back the ntuple and its descriptor.

FileRaii fileGuard("test_ntuple_compat_future_col_type.root");
{
auto model = RNTupleModel::Create();
auto col = model->MakeField<ROOT::Experimental::Internal::RTestFutureColumn>("futureColumn");
auto writeOpts = RNTupleWriteOptions();
writeOpts.SetCompression(0);
auto writer = RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath(), writeOpts);
col->dummy = 0x42424242;
writer->Fill();
}

auto reader = RNTupleReader::Open("ntpl", fileGuard.GetPath());
const auto &desc = reader->GetDescriptor();
const auto &fdesc = desc.GetFieldDescriptor(desc.FindFieldId("futureColumn"));
const auto &cdesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[0]);
EXPECT_EQ(cdesc.GetType(), EColumnType::kUnknown);
}
8 changes: 2 additions & 6 deletions tree/ntuple/v7/test/ntuple_serialize.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,8 @@ TEST(RNTuple, SerializeColumnType)
}

RNTupleSerializer::SerializeUInt16(5000, buffer);
try {
RNTupleSerializer::DeserializeColumnType(buffer, type).Unwrap();
FAIL() << "unexpected on disk column type should throw";
} catch (const RException& err) {
EXPECT_THAT(err.what(), testing::HasSubstr("unexpected on-disk column type"));
}
auto res = RNTupleSerializer::DeserializeColumnType(buffer, type);
EXPECT_TRUE(bool(res));

for (int i = 1; i < static_cast<int>(EColumnType::kMax); ++i) {
RNTupleSerializer::SerializeColumnType(static_cast<EColumnType>(i), buffer);
Expand Down

0 comments on commit fe595f6

Please sign in to comment.