[ntuple] add Real32Quant column type

root-project · Sep 9, 2024 · cde7f5a · cde7f5a
1 parent 633cddb
commit cde7f5a
Show file tree

Hide file tree

Showing 9 changed files with 270 additions and 33 deletions.
diff --git a/tree/ntuple/v7/doc/specifications.md b/tree/ntuple/v7/doc/specifications.md
@@ -1,4 +1,4 @@
-# RNTuple Reference Specifications 0.2.9.0
+# RNTuple Reference Specifications 0.2.10.0
 
 **Note:** This is work in progress. The RNTuple specification is not yet finalized.
 
@@ -405,6 +405,7 @@ The flags field can have one of the following bits set:
 | 0x01     | Repetitive field, i.e. for every entry $n$ copies of the field are stored  |
 | 0x02     | Projected field                                                            |
 | 0x04     | Has ROOT type checksum as reported by TClass                               |
+| 0x08     | Field with a range of possible values                                      |
 
 If `flag==0x01` (_repetitive field_) is set, the field represents a fixed-size array.
 Typically, another (sub) field with `Parent Field ID` equal to the ID of this field
@@ -416,9 +417,12 @@ the field has been created as a virtual field from another, non-projected source
 If a projected field has attached columns,
 these columns are alias columns to physical columns attached to the source field.
 
-If `flag==0x04` (type checksum) is set, the field metadata contain the checksum of the ROOT streamer info.
+If `flag==0x04` (_type checksum_) is set, the field metadata contain the checksum of the ROOT streamer info.
 This checksum is only used for I/O rules in order to find types that are identified by checksum.
 
+If `flag==0x08` (_field with range_) is set, the field metadata contain the range of valid values
+for this field (used e.g. for quantized real values, see Column Description section).
+
 Depending on the flags, the following optional values follow:
 
 ```
@@ -433,6 +437,14 @@ Depending on the flags, the following optional values follow:
 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 +          ROOT Streamer Checksum (if flag 0x04 is set)         +
 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+|                                                               |
++                Min value(if flag 0x08 is set)                 +
+|                                                               |
++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+|                                                               |
++                Max value(if flag 0x08 is set)                 +
+|                                                               |
++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 ```
 
 The block of integers is followed by a list of strings:
@@ -478,37 +490,38 @@ The representation index is consecutive starting at zero.
 
 The column type and bits on storage integers can have one of the following values
 
-| Type | Bits | Name         | Contents                                                                      |
-|------|------|--------------|-------------------------------------------------------------------------------|
-| 0x01 |   64 | Index64      | Parent columns of (nested) collections, counting is relative to the cluster   |
-| 0x02 |   32 | Index32      | Parent columns of (nested) collections, counting is relative to the cluster   |
-| 0x03 |   96 | Switch       | Tuple of a kIndex64 value followed by a 32 bits dispatch tag to a column ID   |
-| 0x04 |    8 | Byte         | An uninterpreted byte, e.g. part of a blob                                    |
-| 0x05 |    8 | Char         | ASCII character                                                               |
-| 0x06 |    1 | Bit          | Boolean value                                                                 |
-| 0x07 |   64 | Real64       | IEEE-754 double precision float                                               |
-| 0x08 |   32 | Real32       | IEEE-754 single precision float                                               |
-| 0x09 |   16 | Real16       | IEEE-754 half precision float                                                 |
-| 0x16 |   64 | Int64        | Two's complement, little-endian 8 byte signed integer                         |
-| 0x0A |   64 | UInt64       | Little-endian 8 byte unsigned integer                                         |
-| 0x17 |   32 | Int32        | Two's complement, little-endian 4 byte signed integer                         |
-| 0x0B |   32 | UInt32       | Little-endian 4 byte unsigned integer                                         |
-| 0x18 |   16 | Int16        | Two's complement, little-endian 2 byte signed integer                         |
-| 0x0C |   16 | UInt16       | Little-endian 2 byte unsigned integer                                         |
-| 0x19 |    8 | Int8         | Two's complement, 1 byte signed integer                                       |
-| 0x0D |    8 | UInt8        | 1 byte unsigned integer                                                       |
-| 0x0E |   64 | SplitIndex64 | Like Index64 but pages are stored in split + delta encoding                   |
-| 0x0F |   32 | SplitIndex32 | Like Index32 but pages are stored in split + delta encoding                   |
-| 0x10 |   64 | SplitReal64  | Like Real64 but in split encoding                                             |
-| 0x11 |   32 | SplitReal32  | Like Real32 but in split encoding                                             |
-| 0x12 |   16 | SplitReal16  | Like Real16 but in split encoding                                             |
-| 0x1A |   64 | SplitInt64   | Like Int64 but in split + zigzag encoding                                     |
-| 0x13 |   64 | SplitUInt64  | Like UInt64 but in split encoding                                             |
-| 0x1B |   64 | SplitInt32   | Like Int32 but in split + zigzag encoding                                     |
-| 0x14 |   32 | SplitUInt32  | Like UInt32 but in split encoding                                             |
-| 0x1C |   16 | SplitInt16   | Like Int16 but in split + zigzag encoding                                     |
-| 0x15 |   16 | SplitUInt16  | Like UInt16 but in split encoding                                             |
-| 0x1D |10-31 | Real32Trunc  | IEEE-754 single precision float with truncated mantissa                       |
+| Type | Bits | Name         | Contents                                                                                      |
+|------|------|--------------|-----------------------------------------------------------------------------------------------|
+| 0x01 |   64 | Index64      | Parent columns of (nested) collections, counting is relative to the cluster                   |
+| 0x02 |   32 | Index32      | Parent columns of (nested) collections, counting is relative to the cluster                   |
+| 0x03 |   96 | Switch       | Tuple of a kIndex64 value followed by a 32 bits dispatch tag to a column ID                   |
+| 0x04 |    8 | Byte         | An uninterpreted byte, e.g. part of a blob                                                    |
+| 0x05 |    8 | Char         | ASCII character                                                                               |
+| 0x06 |    1 | Bit          | Boolean value                                                                                 |
+| 0x07 |   64 | Real64       | IEEE-754 double precision float                                                               |
+| 0x08 |   32 | Real32       | IEEE-754 single precision float                                                               |
+| 0x09 |   16 | Real16       | IEEE-754 half precision float                                                                 |
+| 0x16 |   64 | Int64        | Two's complement, little-endian 8 byte signed integer                                         |
+| 0x0A |   64 | UInt64       | Little-endian 8 byte unsigned integer                                                         |
+| 0x17 |   32 | Int32        | Two's complement, little-endian 4 byte signed integer                                         |
+| 0x0B |   32 | UInt32       | Little-endian 4 byte unsigned integer                                                         |
+| 0x18 |   16 | Int16        | Two's complement, little-endian 2 byte signed integer                                         |
+| 0x0C |   16 | UInt16       | Little-endian 2 byte unsigned integer                                                         |
+| 0x19 |    8 | Int8         | Two's complement, 1 byte signed integer                                                       |
+| 0x0D |    8 | UInt8        | 1 byte unsigned integer                                                                       |
+| 0x0E |   64 | SplitIndex64 | Like Index64 but pages are stored in split + delta encoding                                   |
+| 0x0F |   32 | SplitIndex32 | Like Index32 but pages are stored in split + delta encoding                                   |
+| 0x10 |   64 | SplitReal64  | Like Real64 but in split encoding                                                             |
+| 0x11 |   32 | SplitReal32  | Like Real32 but in split encoding                                                             |
+| 0x12 |   16 | SplitReal16  | Like Real16 but in split encoding                                                             |
+| 0x1A |   64 | SplitInt64   | Like Int64 but in split + zigzag encoding                                                     |
+| 0x13 |   64 | SplitUInt64  | Like UInt64 but in split encoding                                                             |
+| 0x1B |   64 | SplitInt32   | Like Int32 but in split + zigzag encoding                                                     |
+| 0x14 |   32 | SplitUInt32  | Like UInt32 but in split encoding                                                             |
+| 0x1C |   16 | SplitInt16   | Like Int16 but in split + zigzag encoding                                                     |
+| 0x15 |   16 | SplitUInt16  | Like UInt16 but in split encoding                                                             |
+| 0x1D |10-31 | Real32Trunc  | IEEE-754 single precision float with truncated mantissa                                       |
+| 0x1E | 8-32 | Real32Quant  | Real value contained in a specified range with an underlying quantized integer representation |
 
 The "split encoding" columns apply a byte transformation encoding to all pages of that column
 and in addition, depending on the column type, delta or zigzag encoding:
@@ -530,6 +543,10 @@ not cluster-wise.
 The "Real32Trunc" type column is a variable-sized floating point column with lower precision than `Real32` and `SplitReal32`.
 It is a IEEE-754 single precision float with some of the mantissa's least significant bits truncated.
 
+The "Real32Quant" type column is a variable-sized real column that is internally represented as an integer within
+a specified range of values.
+The min and max values of the range is specified in its parent field metadata (see the Field Description section).
+
 Future versions of the file format may introduce additional column types
 without changing the minimum version of the header.
 Old readers need to ignore these columns and fields constructed from such columns.

diff --git a/tree/ntuple/v7/inc/ROOT/RColumnElementBase.hxx b/tree/ntuple/v7/inc/ROOT/RColumnElementBase.hxx
@@ -89,6 +89,11 @@ public:
          throw RException(R__FAIL(std::string("internal error: cannot change bit width of this column type")));
    }
 
+   virtual void SetValueRange(double, double)
+   {
+      throw RException(R__FAIL(std::string("internal error: cannot change value range of this column type")));
+   }
+
    /// If the on-storage layout and the in-memory layout differ, packing creates an on-disk page from an in-memory page
    virtual void Pack(void *destination, const void *source, std::size_t count) const
    {

diff --git a/tree/ntuple/v7/inc/ROOT/RNTupleDescriptor.hxx b/tree/ntuple/v7/inc/ROOT/RNTupleDescriptor.hxx
@@ -72,6 +72,10 @@ class RFieldDescriptor {
    friend class Internal::RNTupleDescriptorBuilder;
    friend class Internal::RFieldDescriptorBuilder;
 
+   struct RValueRange {
+      double fMin, fMax;
+   };
+
 private:
    DescriptorId_t fFieldId = kInvalidDescriptorId;
    /// The version of the C++-type-to-column translation mechanics
@@ -106,6 +110,8 @@ private:
    /// For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules that
    /// identify types by their checksum
    std::optional<std::uint32_t> fTypeChecksum;
+   /// Optional value range (used e.g. by quantized real fields)
+   std::optional<RValueRange> fValueRange;
 
 public:
    RFieldDescriptor() = default;
@@ -141,6 +147,7 @@ public:
    /// natively supported stdlib classes.
    /// The dictionary does not need to be available for this method.
    bool IsCustomClass() const;
+   std::optional<RValueRange> GetValueRange() const { return fValueRange; }
 };
 
 // clang-format off

diff --git a/tree/ntuple/v7/inc/ROOT/RNTupleSerialize.hxx b/tree/ntuple/v7/inc/ROOT/RNTupleSerialize.hxx
@@ -68,6 +68,7 @@ public:
    static constexpr std::uint16_t kFlagRepetitiveField = 0x01;
    static constexpr std::uint16_t kFlagProjectedField = 0x02;
    static constexpr std::uint16_t kFlagHasTypeChecksum = 0x04;
+   static constexpr std::uint16_t kFlagHasValueRange = 0x08;
 
    static constexpr std::uint16_t kFlagDeferredColumn = 0x08;
 

diff --git a/tree/ntuple/v7/inc/ROOT/RNTupleUtil.hxx b/tree/ntuple/v7/inc/ROOT/RNTupleUtil.hxx
@@ -79,6 +79,7 @@ enum class EColumnType {
    kSplitInt16,
    kSplitUInt16,
    kReal32Trunc,
+   kReal32Quant,
    kMax,
 };
 

diff --git a/tree/ntuple/v7/src/RColumnElement.cxx b/tree/ntuple/v7/src/RColumnElement.cxx
@@ -57,6 +57,7 @@ ROOT::Experimental::Internal::RColumnElementBase::GetValidBitRange(EColumnType t
    case EColumnType::kSplitInt16: return std::make_pair(16, 16);
    case EColumnType::kSplitUInt16: return std::make_pair(16, 16);
    case EColumnType::kReal32Trunc: return std::make_pair(10, 31);
+   case EColumnType::kReal32Quant: return std::make_pair(8, 32);
    default: assert(false);
    }
    // never here
@@ -94,6 +95,7 @@ std::string ROOT::Experimental::Internal::RColumnElementBase::GetTypeName(EColum
    case EColumnType::kSplitInt16: return "SplitInt16";
    case EColumnType::kSplitUInt16: return "SplitUInt16";
    case EColumnType::kReal32Trunc: return "Real32Trunc";
+   case EColumnType::kReal32Quant: return "Real32Quant";
    default: return "UNKNOWN";
    }
 }
@@ -134,6 +136,7 @@ ROOT::Experimental::Internal::RColumnElementBase::Generate<void>(EColumnType typ
    case EColumnType::kSplitInt16: return std::make_unique<RColumnElement<std::int16_t, EColumnType::kSplitInt16>>();
    case EColumnType::kSplitUInt16: return std::make_unique<RColumnElement<std::uint16_t, EColumnType::kSplitUInt16>>();
    case EColumnType::kReal32Trunc: return std::make_unique<RColumnElement<float, EColumnType::kReal32Trunc>>();
+   case EColumnType::kReal32Quant: return std::make_unique<RColumnElement<float, EColumnType::kReal32Quant>>();
    default: assert(false);
    }
    // never here
@@ -285,3 +288,4 @@ void ROOT::Experimental::Internal::BitPacking::UnpackBits(void *dst, const void
    assert(prevWordLsb == 0);
    assert(dstIdx == count);
 }
+
diff --git a/tree/ntuple/v7/src/RColumnElement.hxx b/tree/ntuple/v7/src/RColumnElement.hxx
@@ -740,6 +740,135 @@ public:
    }
 };
 
+namespace Quantize {
+
+using Quantized_t = std::uint32_t;
+
+[[maybe_unused]] inline std::size_t LeadingZeroes(std::uint32_t x)
+{
+#ifdef _MSC_VER
+   unsigned long idx = 0;
+   _BitScanForward(&idx, x);
+   return static_cast<std::size_t>(idx);
+#else
+   return static_cast<std::size_t>(__builtin_clzl(x));
+#endif
+}
+
+[[maybe_unused]] inline std::size_t TrailingZeroes(std::uint32_t x)
+{
+#ifdef _MSC_VER
+   unsigned long idx = 0;
+   _BitScanReverse(&idx, x);
+   return static_cast<std::size_t>(idx);
+#else
+   return static_cast<std::size_t>(__builtin_ctzl(x));
+#endif
+}
+
+/// Converts the array of `count` floating point numbers in `src` into an array of their quantized representations.
+/// Each element of `src` is assumed to be in the inclusive range [min, max].
+/// The quantized representation will consist of unsigned integers of at most `nQuantBits`, with `8 <= nQuantBits <=
+/// 32`. The unused bits are kept in the LSB of the quantized integers, to allow for easy bit packing of those integers
+/// via BitPacking::PackBits().
+template <typename T>
+void QuantizeReals(Quantized_t *dst, const T *src, std::size_t count, double min, double max, std::size_t nQuantBits)
+{
+   static_assert(std::is_floating_point_v<T>);
+   static_assert(sizeof(T) <= sizeof(double));
+   R__ASSERT(nQuantBits >= 8 && nQuantBits <= 8 * sizeof(Quantized_t));
+
+   const std::size_t quantMax = (1ull << nQuantBits) - 1;
+   const double scale = quantMax / (max - min);
+   const std::size_t unusedBits = sizeof(Quantized_t) * 8 - nQuantBits;
+
+   for (std::size_t i = 0; i < count; ++i) {
+      T elem = src[i];
+      assert(min <= elem && elem <= max);
+      double e = (elem - min) * scale;
+      Quantized_t q = static_cast<Quantized_t>(e);
+      ByteSwapIfNecessary(q);
+
+      // double-check we actually used at most `nQuantBits`
+      assert(LeadingZeroes(q) >= unusedBits);
+
+      // we want to leave zeroes in the LSB, not the MSB, because we'll then drop the LSB
+      // when bit packing.
+      dst[i] = q << unusedBits;
+   }
+}
+
+/// Undoes the transformation performed by QuantizeReals() (assuming the same `count`, `min`, `max` and `nQuantBits`).
+template <typename T>
+void UnquantizeReals(T *dst, const Quantized_t *src, std::size_t count, double min, double max, std::size_t nQuantBits)
+{
+   static_assert(std::is_floating_point_v<T>);
+   static_assert(sizeof(T) <= sizeof(double));
+   R__ASSERT(nQuantBits >= 8 && nQuantBits <= 8 * sizeof(Quantized_t));
+
+   const std::size_t quantMax = (1ull << nQuantBits) - 1;
+   const double scale = (max - min) / quantMax;
+   const double bias = min * quantMax / (max - min);
+   const std::size_t unusedBits = sizeof(Quantized_t) * 8 - nQuantBits;
+
+   for (std::size_t i = 0; i < count; ++i) {
+      Quantized_t elem = src[i];
+      // Undo the LSB-preserving shift performed by QuantizeReals
+      assert(TrailingZeroes(elem) >= unusedBits);
+      elem >>= unusedBits;
+      ByteSwapIfNecessary(elem);
+
+      double fq = static_cast<double>(elem);
+      double e = (fq + bias) * scale;
+      dst[i] = static_cast<T>(e);
+      assert(min <= dst[i] && dst[i] <= max);
+   }
+}
+} // namespace Quantize
+
+template <>
+class RColumnElement<float, EColumnType::kReal32Quant> : public RColumnElementBase {
+   double fMin = std::numeric_limits<double>::min();
+   double fMax = std::numeric_limits<double>::max();
+
+public:
+   static constexpr bool kIsMappable = false;
+   static constexpr std::size_t kSize = sizeof(float);
+
+   RColumnElement() : RColumnElementBase(kSize, 0) {}
+
+   void SetBitsOnStorage(std::size_t bitsOnStorage) final
+   {
+      const auto [minBits, maxBits] = GetValidBitRange(EColumnType::kReal32Quant);
+      R__ASSERT(bitsOnStorage >= minBits && bitsOnStorage <= maxBits);
+      fBitsOnStorage = bitsOnStorage;
+   }
+
+   void SetValueRange(double min, double max) final
+   {
+      fMin = min;
+      fMax = max;
+   }
+
+   bool IsMappable() const final { return kIsMappable; }
+
+   void Pack(void *dst, const void *src, std::size_t count) const final
+   {
+      auto quantized = std::make_unique<Quantize::Quantized_t[]>(count);
+      Quantize::QuantizeReals(quantized.get(), reinterpret_cast<const float *>(src), count, fMin, fMax, fBitsOnStorage);
+      ROOT::Experimental::Internal::BitPacking::PackBits(dst, quantized.get(), count, sizeof(Quantize::Quantized_t),
+                                                         fBitsOnStorage);
+   }
+
+   void Unpack(void *dst, const void *src, std::size_t count) const final
+   {
+      auto quantized = std::make_unique<Quantize::Quantized_t[]>(count);
+      ROOT::Experimental::Internal::BitPacking::UnpackBits(quantized.get(), src, count, sizeof(Quantize::Quantized_t),
+                                                           fBitsOnStorage);
+      Quantize::UnquantizeReals(reinterpret_cast<float *>(dst), quantized.get(), count, fMin, fMax, fBitsOnStorage);
+   }
+};
+
 #define __RCOLUMNELEMENT_SPEC_BODY(CppT, BaseT, BitsOnStorage)  \
    static constexpr std::size_t kSize = sizeof(CppT);           \
    static constexpr std::size_t kBitsOnStorage = BitsOnStorage; \