From e7363e48f28aefc776d2bb570dbd98b7fc4efd6c Mon Sep 17 00:00:00 2001 From: silverweed Date: Thu, 19 Sep 2024 14:33:02 +0200 Subject: [PATCH] [ntuple] Add test for reading an unknown column type --- .../ntuple/v7/inc/ROOT/RColumnElementBase.hxx | 9 ++++ tree/ntuple/v7/src/RColumnElement.cxx | 4 ++ tree/ntuple/v7/src/RColumnElement.hxx | 18 +++++++ tree/ntuple/v7/src/RNTupleDescriptor.cxx | 15 ++++-- tree/ntuple/v7/src/RNTupleSerialize.cxx | 10 +++- tree/ntuple/v7/test/ntuple_compat.cxx | 51 ++++++++++++++++++- tree/ntuple/v7/test/ntuple_serialize.cxx | 8 +-- 7 files changed, 102 insertions(+), 13 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RColumnElementBase.hxx b/tree/ntuple/v7/inc/ROOT/RColumnElementBase.hxx index e93dfcba2725d..5d39b2db12679 100644 --- a/tree/ntuple/v7/inc/ROOT/RColumnElementBase.hxx +++ b/tree/ntuple/v7/inc/ROOT/RColumnElementBase.hxx @@ -123,6 +123,13 @@ enum class EColumnCppType { kDouble, kClusterSize, kColumnSwitch, + kMax +}; + +inline constexpr EColumnCppType kTestFutureColumn = static_cast((int)EColumnCppType::kMax + 1); + +struct RTestFutureColumn { + int dummy; }; std::unique_ptr GenerateColumnElement(EColumnCppType cppType, EColumnType colType); @@ -160,6 +167,8 @@ std::unique_ptr RColumnElementBase::Generate(EColumnType typ return GenerateColumnElement(EColumnCppType::kClusterSize, type); else if constexpr (std::is_same_v) return GenerateColumnElement(EColumnCppType::kColumnSwitch, type); + else if constexpr (std::is_same_v) + return GenerateColumnElement(kTestFutureColumn, type); else static_assert(!sizeof(CppT), "Unsupported Cpp type"); } diff --git a/tree/ntuple/v7/src/RColumnElement.cxx b/tree/ntuple/v7/src/RColumnElement.cxx index cbd79fcd3aab6..e9807d670999c 100644 --- a/tree/ntuple/v7/src/RColumnElement.cxx +++ b/tree/ntuple/v7/src/RColumnElement.cxx @@ -57,6 +57,7 @@ ROOT::Experimental::Internal::RColumnElementBase::GetValidBitRange(EColumnType t case EColumnType::kSplitInt16: return std::make_pair(16, 16); case EColumnType::kSplitUInt16: return std::make_pair(16, 16); case EColumnType::kReal32Trunc: return std::make_pair(10, 31); + case kTestFutureType: return std::make_pair(32, 32); default: assert(false); } // never here @@ -94,6 +95,7 @@ std::string ROOT::Experimental::Internal::RColumnElementBase::GetTypeName(EColum case EColumnType::kSplitInt16: return "SplitInt16"; case EColumnType::kSplitUInt16: return "SplitUInt16"; case EColumnType::kReal32Trunc: return "Real32Trunc"; + case kTestFutureType: return "TestFutureType"; default: return "UNKNOWN"; } } @@ -134,6 +136,7 @@ ROOT::Experimental::Internal::RColumnElementBase::Generate(EColumnType typ case EColumnType::kSplitInt16: return std::make_unique>(); case EColumnType::kSplitUInt16: return std::make_unique>(); case EColumnType::kReal32Trunc: return std::make_unique>(); + case kTestFutureType: return std::make_unique>(); default: assert(false); } // never here @@ -159,6 +162,7 @@ ROOT::Experimental::Internal::GenerateColumnElement(EColumnCppType cppType, ECol case EColumnCppType::kDouble: return GenerateColumnElementInternal(type); case EColumnCppType::kClusterSize: return GenerateColumnElementInternal(type); case EColumnCppType::kColumnSwitch: return GenerateColumnElementInternal(type); + case kTestFutureColumn: return GenerateColumnElementInternal(type); default: R__ASSERT(!"Invalid column cpp type"); } // never here diff --git a/tree/ntuple/v7/src/RColumnElement.hxx b/tree/ntuple/v7/src/RColumnElement.hxx index 991ecd44243f3..e46cf6a5e7f98 100644 --- a/tree/ntuple/v7/src/RColumnElement.hxx +++ b/tree/ntuple/v7/src/RColumnElement.hxx @@ -266,6 +266,9 @@ namespace { using ROOT::Experimental::EColumnType; using ROOT::Experimental::Internal::RColumnElementBase; +// testing value for an unknown future column type +inline constexpr EColumnType kTestFutureType = static_cast(int(EColumnType::kMax) + 1); + template class RColumnElement; @@ -301,6 +304,7 @@ std::unique_ptr GenerateColumnElementInternal(EColumnType ty case EColumnType::kSplitInt16: return std::make_unique>(); case EColumnType::kSplitUInt16: return std::make_unique>(); case EColumnType::kReal32Trunc: return std::make_unique>(); + case kTestFutureType: return std::make_unique>(); default: R__ASSERT(false); } // never here @@ -847,6 +851,20 @@ DECLARE_RCOLUMNELEMENT_SPEC(ROOT::Experimental::ClusterSize_t, EColumnType::kSpl DECLARE_RCOLUMNELEMENT_SPEC(ROOT::Experimental::ClusterSize_t, EColumnType::kSplitIndex32, 32, RColumnElementDeltaSplitLE, ); +template <> +class RColumnElement final + : public RColumnElementBase { +public: + static constexpr bool kIsMappable = false; + static constexpr std::size_t kSize = sizeof(ROOT::Experimental::Internal::RTestFutureColumn); + static constexpr std::size_t kBitsOnStorage = kSize * 8; + RColumnElement() : RColumnElementBase(kSize, kBitsOnStorage) {} + + bool IsMappable() const { return kIsMappable; } + void Pack(void *, const void *, std::size_t) const {} + void Unpack(void *, const void *, std::size_t) const {} +}; + inline void RColumnElement::Pack(void *dst, const void *src, std::size_t count) const { diff --git a/tree/ntuple/v7/src/RNTupleDescriptor.cxx b/tree/ntuple/v7/src/RNTupleDescriptor.cxx index 743da9825e3cb..4adbfae5b00b3 100644 --- a/tree/ntuple/v7/src/RNTupleDescriptor.cxx +++ b/tree/ntuple/v7/src/RNTupleDescriptor.cxx @@ -822,14 +822,19 @@ ROOT::Experimental::Internal::RColumnDescriptorBuilder::MakeDescriptor() const return R__FAIL("invalid logical column id"); if (fColumn.GetPhysicalId() == kInvalidDescriptorId) return R__FAIL("invalid physical column id"); - if (fColumn.GetType() == EColumnType::kUnknown) - return R__FAIL("invalid column model"); if (fColumn.GetFieldId() == kInvalidDescriptorId) return R__FAIL("invalid field id, dangling column"); - const auto [minBits, maxBits] = RColumnElementBase::GetValidBitRange(fColumn.GetType()); - if (fColumn.GetBitsOnStorage() < minBits || fColumn.GetBitsOnStorage() > maxBits) - return R__FAIL("invalid column bit width"); + // NOTE: if the column type is unknown we don't want to fail, as we might be reading an RNTuple + // created with a future version of ROOT. In this case we just skip the valid bit range check, + // as we have no idea what the valid range is. + // In general, reading the metadata of an unknown column is fine, it becomes an error only when + // we try to read the actual data contained in it. + if (fColumn.GetType() != EColumnType::kUnknown) { + const auto [minBits, maxBits] = RColumnElementBase::GetValidBitRange(fColumn.GetType()); + if (fColumn.GetBitsOnStorage() < minBits || fColumn.GetBitsOnStorage() > maxBits) + return R__FAIL("invalid column bit width"); + } return fColumn.Clone(); } diff --git a/tree/ntuple/v7/src/RNTupleSerialize.cxx b/tree/ntuple/v7/src/RNTupleSerialize.cxx index aec83f5c1b0eb..eb546b95e3e5c 100644 --- a/tree/ntuple/v7/src/RNTupleSerialize.cxx +++ b/tree/ntuple/v7/src/RNTupleSerialize.cxx @@ -20,6 +20,8 @@ #include #include +#include "RColumnElement.hxx" + #include #include #include @@ -686,6 +688,7 @@ ROOT::Experimental::Internal::RNTupleSerializer::SerializeColumnType(ROOT::Exper case EColumnType::kSplitInt16: return SerializeUInt16(0x1C, buffer); case EColumnType::kSplitUInt16: return SerializeUInt16(0x15, buffer); case EColumnType::kReal32Trunc: return SerializeUInt16(0x1D, buffer); + case kTestFutureType: return SerializeUInt16(0x99, buffer); default: throw RException(R__FAIL("ROOT bug: unexpected column type")); } } @@ -699,6 +702,7 @@ ROOT::Experimental::Internal::RNTupleSerializer::DeserializeColumnType(const voi auto result = DeserializeUInt16(buffer, onDiskType); switch (onDiskType) { + case 0x00: return R__FAIL("unexpected on-disk column type"); case 0x01: type = EColumnType::kIndex64; break; case 0x02: type = EColumnType::kIndex32; break; case 0x03: type = EColumnType::kSwitch; break; @@ -727,7 +731,11 @@ ROOT::Experimental::Internal::RNTupleSerializer::DeserializeColumnType(const voi case 0x1C: type = EColumnType::kSplitInt16; break; case 0x15: type = EColumnType::kSplitUInt16; break; case 0x1D: type = EColumnType::kReal32Trunc; break; - default: return R__FAIL("unexpected on-disk column type"); + // case 0x99 => kTestFutureType missing on purpose + default: + // may be a column type introduced by a future version + type = EColumnType::kUnknown; + break; } return result; } diff --git a/tree/ntuple/v7/test/ntuple_compat.cxx b/tree/ntuple/v7/test/ntuple_compat.cxx index 49bfeda808d13..4b8ea06569642 100644 --- a/tree/ntuple/v7/test/ntuple_compat.cxx +++ b/tree/ntuple/v7/test/ntuple_compat.cxx @@ -5,6 +5,10 @@ #include "ROOT/EExecutionPolicy.hxx" #include "RXTuple.hxx" #include +#include +#include + +#include "../src/RColumnElement.hxx" TEST(RNTupleCompat, Epoch) { @@ -65,7 +69,7 @@ TEST(RNTupleCompat, FeatureFlag) } } -TEST(RNTupleCompat, FwdCompat_FutureNTuple) +TEST(RNTupleCompat, FwdCompat_FutureNTupleAnchor) { using ROOT::Experimental::RXTuple; @@ -223,3 +227,48 @@ TEST(RNTupleCompat, NTupleV4) EXPECT_EQ(ntuple.GetMaxKeySize(), 0); } } + +template <> +class ROOT::Experimental::RField final + : public RSimpleField { +protected: + std::unique_ptr CloneImpl(std::string_view newName) const final + { + return std::make_unique(newName); + } + const RColumnRepresentations &GetColumnRepresentations() const final + { + static const RColumnRepresentations representations{{{kTestFutureType}}, {}}; + return representations; + } + +public: + static std::string TypeName() { return "FutureColumn"; } + explicit RField(std::string_view name) : RSimpleField(name, TypeName()) {} + RField(RField &&other) = default; + RField &operator=(RField &&other) = default; + ~RField() override = default; +}; + +TEST(RNTupleCompat, FutureColumnType) +{ + // Write a RNTuple containing a field with an unknown column type and verify we can + // read back the ntuple and its descriptor. + + FileRaii fileGuard("test_ntuple_compat_future_col_type.root"); + { + auto model = RNTupleModel::Create(); + auto col = model->MakeField("futureColumn"); + auto writeOpts = RNTupleWriteOptions(); + writeOpts.SetCompression(0); + auto writer = RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath(), writeOpts); + col->dummy = 0x42424242; + writer->Fill(); + } + + auto reader = RNTupleReader::Open("ntpl", fileGuard.GetPath()); + const auto &desc = reader->GetDescriptor(); + const auto &fdesc = desc.GetFieldDescriptor(desc.FindFieldId("futureColumn")); + const auto &cdesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[0]); + EXPECT_EQ(cdesc.GetType(), EColumnType::kUnknown); +} diff --git a/tree/ntuple/v7/test/ntuple_serialize.cxx b/tree/ntuple/v7/test/ntuple_serialize.cxx index 3b06c07b0d903..54e3a37e8d5c4 100644 --- a/tree/ntuple/v7/test/ntuple_serialize.cxx +++ b/tree/ntuple/v7/test/ntuple_serialize.cxx @@ -63,12 +63,8 @@ TEST(RNTuple, SerializeColumnType) } RNTupleSerializer::SerializeUInt16(5000, buffer); - try { - RNTupleSerializer::DeserializeColumnType(buffer, type).Unwrap(); - FAIL() << "unexpected on disk column type should throw"; - } catch (const RException& err) { - EXPECT_THAT(err.what(), testing::HasSubstr("unexpected on-disk column type")); - } + auto res = RNTupleSerializer::DeserializeColumnType(buffer, type); + EXPECT_TRUE(bool(res)); for (int i = 1; i < static_cast(EColumnType::kMax); ++i) { RNTupleSerializer::SerializeColumnType(static_cast(i), buffer);