Skip to content

Commit

Permalink
[ntuple] add column ValueRange logic
Browse files Browse the repository at this point in the history
uncomment commented-out code
  • Loading branch information
silverweed committed Sep 10, 2024
1 parent 0b3483c commit 4ec0cb2
Show file tree
Hide file tree
Showing 12 changed files with 100 additions and 13 deletions.
6 changes: 6 additions & 0 deletions tree/ntuple/v7/inc/ROOT/RColumn.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,11 @@ public:
assert(fElement);
return fElement->GetBitsOnStorage();
}
std::pair<double, double> GetValueRange() const
{
assert(fElement);
return fElement->GetValueRange();
}
std::uint32_t GetIndex() const { return fIndex; }
std::uint16_t GetRepresentationIndex() const { return fRepresentationIndex; }
ColumnId_t GetColumnIdSource() const { return fColumnIdSource; }
Expand All @@ -367,6 +372,7 @@ public:
RPageStorage::ColumnHandle_t GetHandleSink() const { return fHandleSink; }

void SetBitsOnStorage(std::size_t bits) { fElement->SetBitsOnStorage(bits); }
void SetValueRange(double min, double max) { fElement->SetValueRange(min, max); }
}; // class RColumn

} // namespace ROOT::Experimental::Internal
Expand Down
3 changes: 3 additions & 0 deletions tree/ntuple/v7/inc/ROOT/RColumnElementBase.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ protected:
/// Size of the C++ value that corresponds to the on-disk element
std::size_t fSize;
std::size_t fBitsOnStorage;
/// This is only meaningful for column elements that support it (e.g. Real32Quant)
std::pair<double, double> fValueRange = {0, 0};

explicit RColumnElementBase(std::size_t size, std::size_t bitsOnStorage = 0)
: fSize(size), fBitsOnStorage(bitsOnStorage ? bitsOnStorage : 8 * size)
Expand Down Expand Up @@ -108,6 +110,7 @@ public:

std::size_t GetSize() const { return fSize; }
std::size_t GetBitsOnStorage() const { return fBitsOnStorage; }
std::pair<double, double> GetValueRange() const { return fValueRange; }
std::size_t GetPackedSize(std::size_t nElements = 1U) const { return (nElements * fBitsOnStorage + 7) / 8; }
}; // class RColumnElementBase

Expand Down
13 changes: 13 additions & 0 deletions tree/ntuple/v7/inc/ROOT/RField/RFieldFundamental.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -351,12 +351,16 @@ extern template class RSimpleField<float>;
template <>
class RField<float> final : public RSimpleField<float> {
std::size_t fBitWidth = sizeof(float) * 8;
double fValueMin = std::numeric_limits<float>::min();
double fValueMax = std::numeric_limits<float>::max();

protected:
std::unique_ptr<RFieldBase> CloneImpl(std::string_view newName) const final
{
auto cloned = std::make_unique<RField<float>>(newName);
cloned->fBitWidth = fBitWidth;
cloned->fValueMin = fValueMin;
cloned->fValueMax = fValueMax;
return cloned;
}

Expand All @@ -374,11 +378,20 @@ public:

void AcceptVisitor(Detail::RFieldVisitor &visitor) const final;

/// Sets this field to use a half precision representation, occupying half as much storage space (16 bits) on disk.
/// This is mutually exclusive with `SetTruncated` and `SetQuantized`.
void SetHalfPrecision();
/// Set the precision of this field to `nBits`. The remaining (32 - `nBits`) bits will be truncated
/// from the number's mantissa. `nBits` must be $10 <= nBits <= 31$ (this means that at least 1 bit
/// of mantissa is always preserved). Note that this effectively rounds the number towards 0.
/// This is mutually exclusive with `SetHalfPrecision` and `SetQuantized`.
/// \note Calling `SetTruncated(16)` effectively makes this field a `bfloat16` on disk.
void SetTruncated(std::size_t nBits);
/// Sets this field to use a quantized integer representation using `nBits` per value.
/// This call promises that this field will only contain values contained in `[minValue, maxValue]` inclusive.
/// If a value outside this range is assigned to this field, the behavior is undefined.
/// This is mutually exclusive with `SetTruncated` and `SetHalfPrecision`.
void SetQuantized(float minValue, float maxValue, std::size_t nBits);
};

extern template class RSimpleField<double>;
Expand Down
15 changes: 13 additions & 2 deletions tree/ntuple/v7/inc/ROOT/RNTupleDescriptor.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,13 @@ class RColumnDescriptor {

public:
struct RValueRange {
double fMin, fMax;
double fMin = 0, fMax = 0;

RValueRange() = default;
RValueRange(double min, double max) : fMin(min), fMax(max) {}
RValueRange(std::pair<double, double> range) : fMin(range.first), fMax(range.second) {}

bool operator==(RValueRange other) const { return fMin == other.fMin && fMax == other.fMax; }
};

private:
Expand Down Expand Up @@ -1106,7 +1112,12 @@ public:
}
RColumnDescriptorBuilder &ValueRange(double min, double max)
{
fColumn.fValueRange = { min, max };
fColumn.fValueRange = {min, max};
return *this;
}
RColumnDescriptorBuilder &ValueRange(std::optional<RColumnDescriptor::RValueRange> valueRange)
{
fColumn.fValueRange = valueRange;
return *this;
}
DescriptorId_t GetFieldId() const { return fColumn.fFieldId; }
Expand Down
15 changes: 8 additions & 7 deletions tree/ntuple/v7/src/RColumnElement.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ std::unique_ptr<RColumnElementBase> GenerateColumnElementInternal(EColumnType ty
case EColumnType::kSplitInt16: return std::make_unique<RColumnElement<CppT, EColumnType::kSplitInt16>>();
case EColumnType::kSplitUInt16: return std::make_unique<RColumnElement<CppT, EColumnType::kSplitUInt16>>();
case EColumnType::kReal32Trunc: return std::make_unique<RColumnElement<CppT, EColumnType::kReal32Trunc>>();
case EColumnType::kReal32Quant: return std::make_unique<RColumnElement<CppT, EColumnType::kReal32Quant>>();
default: R__ASSERT(false);
}
// never here
Expand Down Expand Up @@ -831,9 +832,6 @@ template <typename T>
class RColumnElementQuantized : public RColumnElementBase {
static_assert(std::is_floating_point_v<T>);

double fMin = std::numeric_limits<double>::min();
double fMax = std::numeric_limits<double>::max();

public:
static constexpr bool kIsMappable = false;
static constexpr std::size_t kSize = sizeof(T);
Expand All @@ -849,26 +847,29 @@ public:

void SetValueRange(double min, double max) final
{
fMin = min;
fMax = max;
R__ASSERT(min >= std::numeric_limits<T>::lowest());
R__ASSERT(max <= std::numeric_limits<T>::max());
fValueRange = {min, max};
}

bool IsMappable() const final { return kIsMappable; }

void Pack(void *dst, const void *src, std::size_t count) const final
{
auto quantized = std::make_unique<Quantize::Quantized_t[]>(count);
Quantize::QuantizeReals(quantized.get(), reinterpret_cast<const float *>(src), count, fMin, fMax, fBitsOnStorage);
const auto [min, max] = fValueRange;
Quantize::QuantizeReals(quantized.get(), reinterpret_cast<const float *>(src), count, min, max, fBitsOnStorage);
ROOT::Experimental::Internal::BitPacking::PackBits(dst, quantized.get(), count, sizeof(Quantize::Quantized_t),
fBitsOnStorage);
}

void Unpack(void *dst, const void *src, std::size_t count) const final
{
auto quantized = std::make_unique<Quantize::Quantized_t[]>(count);
const auto [min, max] = fValueRange;
ROOT::Experimental::Internal::BitPacking::UnpackBits(quantized.get(), src, count, sizeof(Quantize::Quantized_t),
fBitsOnStorage);
Quantize::UnquantizeReals(reinterpret_cast<float *>(dst), quantized.get(), count, fMin, fMax, fBitsOnStorage);
Quantize::UnquantizeReals(reinterpret_cast<float *>(dst), quantized.get(), count, min, max, fBitsOnStorage);
}
};

Expand Down
31 changes: 29 additions & 2 deletions tree/ntuple/v7/src/RField.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -1387,8 +1387,12 @@ template class ROOT::Experimental::RSimpleField<float>;
const ROOT::Experimental::RFieldBase::RColumnRepresentations &
ROOT::Experimental::RField<float>::GetColumnRepresentations() const
{
static RColumnRepresentations representations(
{{EColumnType::kSplitReal32}, {EColumnType::kReal32}, {EColumnType::kReal16}, {EColumnType::kReal32Trunc}}, {});
static RColumnRepresentations representations({{EColumnType::kSplitReal32},
{EColumnType::kReal32},
{EColumnType::kReal16},
{EColumnType::kReal32Trunc},
{EColumnType::kReal32Quant}},
{});
return representations;
}

Expand All @@ -1401,6 +1405,9 @@ void ROOT::Experimental::RField<float>::GenerateColumns()
auto &column = fAvailableColumns.emplace_back(Internal::RColumn::Create<float>(r[i][0], 0, i));
if (r[i][0] == EColumnType::kReal32Trunc) {
column->SetBitsOnStorage(fBitWidth);
} else if (r[i][0] == EColumnType::kReal32Quant) {
column->SetBitsOnStorage(fBitWidth);
column->SetValueRange(fValueMin, fValueMax);
}
}
fPrincipalColumn = fAvailableColumns[0].get();
Expand All @@ -1420,6 +1427,13 @@ void ROOT::Experimental::RField<float>::GenerateColumns(const RNTupleDescriptor
const auto &fdesc = desc.GetFieldDescriptor(GetOnDiskId());
const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[0]);
column->SetBitsOnStorage(coldesc.GetBitsOnStorage());
} else if (onDiskTypes[0] == EColumnType::kReal32Quant) {
const auto &fdesc = desc.GetFieldDescriptor(GetOnDiskId());
const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[0]);
assert(coldesc.GetValueRange().has_value());
const auto [valMin, valMax] = *coldesc.GetValueRange();
column->SetBitsOnStorage(coldesc.GetBitsOnStorage());
column->SetValueRange(valMin, valMax);
}
fColumnRepresentatives.emplace_back(onDiskTypes);
if (representationIndex > 0) {
Expand Down Expand Up @@ -1452,6 +1466,19 @@ void ROOT::Experimental::RField<float>::SetTruncated(std::size_t nBits)
fBitWidth = nBits;
}

void ROOT::Experimental::RField<float>::SetQuantized(float minValue, float maxValue, std::size_t nBits)
{
const auto &[minBits, maxBits] = Internal::RColumnElementBase::GetValidBitRange(EColumnType::kReal32Quant);
if (nBits < minBits || nBits > maxBits) {
throw RException(R__FAIL("SetQuantized() argument nBits = " + std::to_string(nBits) + " is out of valid range [" +
std::to_string(minBits) + ", " + std::to_string(maxBits) + "])"));
}
SetColumnRepresentatives({{EColumnType::kReal32Quant}});
fBitWidth = nBits;
fValueMin = minValue;
fValueMax = maxValue;
}

//------------------------------------------------------------------------------

template class ROOT::Experimental::RSimpleField<double>;
Expand Down
4 changes: 3 additions & 1 deletion tree/ntuple/v7/src/RNTupleDescriptor.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ bool ROOT::Experimental::RColumnDescriptor::operator==(const RColumnDescriptor &
{
return fLogicalColumnId == other.fLogicalColumnId && fPhysicalColumnId == other.fPhysicalColumnId &&
fBitsOnStorage == other.fBitsOnStorage && fType == other.fType && fFieldId == other.fFieldId &&
fIndex == other.fIndex && fRepresentationIndex == other.fRepresentationIndex;
fIndex == other.fIndex && fRepresentationIndex == other.fRepresentationIndex &&
fValueRange == other.fValueRange;
}

ROOT::Experimental::RColumnDescriptor ROOT::Experimental::RColumnDescriptor::Clone() const
Expand All @@ -143,6 +144,7 @@ ROOT::Experimental::RColumnDescriptor ROOT::Experimental::RColumnDescriptor::Clo
clone.fIndex = fIndex;
clone.fFirstElementIndex = fFirstElementIndex;
clone.fRepresentationIndex = fRepresentationIndex;
clone.fValueRange = fValueRange;
return clone;
}

Expand Down
2 changes: 1 addition & 1 deletion tree/ntuple/v7/src/RNTupleFillContext.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ ROOT::Experimental::RNTupleFillContext::RNTupleFillContext(std::unique_ptr<RNTup
: fSink(std::move(sink)), fModel(std::move(model)), fMetrics("RNTupleFillContext")
{
fModel->Freeze();
fSink->Init(*fModel.get());
fSink->Init(*fModel);
fMetrics.ObserveMetrics(fSink->GetMetrics());

const auto &writeOpts = fSink->GetWriteOptions();
Expand Down
1 change: 1 addition & 0 deletions tree/ntuple/v7/src/RNTupleSerialize.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -1451,6 +1451,7 @@ ROOT::Experimental::Internal::RNTupleSerializer::DeserializeSchemaDescription(co
columnBuilder.LogicalColumnId(aliasColumnIdRangeBegin + i).PhysicalColumnId(physicalId).FieldId(fieldId);
const auto &physicalColumnDesc = descBuilder.GetDescriptor().GetColumnDescriptor(physicalId);
columnBuilder.BitsOnStorage(physicalColumnDesc.GetBitsOnStorage());
columnBuilder.ValueRange(physicalColumnDesc.GetValueRange());
columnBuilder.Type(physicalColumnDesc.GetType());
columnBuilder.RepresentationIndex(physicalColumnDesc.GetRepresentationIndex());
columnBuilder.Index(fnNextColumnIndex(columnBuilder.GetFieldId(), columnBuilder.GetRepresentationIndex()));
Expand Down
1 change: 1 addition & 0 deletions tree/ntuple/v7/src/RPageSourceFriends.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ void ROOT::Experimental::Internal::RPageSourceFriends::AddVirtualField(const RNT
.PhysicalColumnId(physicalId)
.FieldId(virtualFieldId)
.BitsOnStorage(c.GetBitsOnStorage())
.ValueRange(c.GetValueRange())
.Type(c.GetType())
.Index(c.GetIndex())
.RepresentationIndex(c.GetRepresentationIndex());
Expand Down
2 changes: 2 additions & 0 deletions tree/ntuple/v7/src/RPageStorage.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,7 @@ ROOT::Experimental::Internal::RPagePersistentSink::AddColumn(DescriptorId_t fiel
.PhysicalColumnId(columnId)
.FieldId(fieldId)
.BitsOnStorage(column.GetBitsOnStorage())
.ValueRange(column.GetValueRange())
.Type(column.GetType())
.Index(column.GetIndex())
.RepresentationIndex(column.GetRepresentationIndex())
Expand Down Expand Up @@ -668,6 +669,7 @@ void ROOT::Experimental::Internal::RPagePersistentSink::UpdateSchema(const RNTup
.PhysicalColumnId(source.GetLogicalId())
.FieldId(fieldId)
.BitsOnStorage(source.GetBitsOnStorage())
.ValueRange(source.GetValueRange())
.Type(source.GetType())
.Index(source.GetIndex())
.RepresentationIndex(source.GetRepresentationIndex());
Expand Down
20 changes: 20 additions & 0 deletions tree/ntuple/v7/test/ntuple_packing.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,15 @@ static void AddReal32TruncField(RNTupleModel &model, const std::string &fieldNam
model.AddField(std::move(fld));
}

static void
AddReal32QuantField(RNTupleModel &model, const std::string &fieldName, std::size_t nBits, double min, double max)
{
auto fld = std::make_unique<RField<float>>(fieldName);
fld->SetColumnRepresentatives({{EColumnType::kReal32Quant}});
fld->SetQuantized(min, max, nBits);
model.AddField(std::move(fld));
}

TEST(Packing, OnDiskEncoding)
{
FileRaii fileGuard("test_ntuple_packing_ondiskencoding.root");
Expand All @@ -272,6 +281,7 @@ TEST(Packing, OnDiskEncoding)
AddField<ClusterSize_t, EColumnType::kSplitIndex32>(*model, "index32");
AddField<ClusterSize_t, EColumnType::kSplitIndex64>(*model, "index64");
AddReal32TruncField(*model, "float32Trunc", 11);
AddReal32QuantField(*model, "float32Quant", 7, 0.0, 1.0);
auto fldStr = std::make_unique<RField<std::string>>("str");
model->AddField(std::move(fldStr));
{
Expand All @@ -292,6 +302,7 @@ TEST(Packing, OnDiskEncoding)
*e->GetPtr<ClusterSize_t>("index32") = 39916801; // 0x0261 1501
*e->GetPtr<ClusterSize_t>("index64") = 0x0706050403020100L;
*e->GetPtr<float>("float32Trunc") = -3.75f; // 1 10000000 11100000000000000000000 == 0xC0700000
*e->GetPtr<float>("float32Quant") = 0.69f; // quantized to 87 == 0b1010111
e->GetPtr<std::string>("str")->assign("abc");

writer->Fill(*e);
Expand All @@ -308,6 +319,7 @@ TEST(Packing, OnDiskEncoding)
*e->GetPtr<ClusterSize_t>("index32") = 39916808; // d(previous) == 7
*e->GetPtr<ClusterSize_t>("index64") = 0x070605040302010DL; // d(previous) == 13
*e->GetPtr<float>("float32Trunc") = 1.875f; // 0 01111111 11100000000000000000000 == 0x3ff00000
*e->GetPtr<float>("float32Quant") = 0.875f; // quantized to 111: 0b1101111
e->GetPtr<std::string>("str")->assign("de");

writer->Fill(*e);
Expand Down Expand Up @@ -377,6 +389,11 @@ TEST(Packing, OnDiskEncoding)
unsigned char expF32Trunc[] = {0x03, 0xFE, 0x0F};
EXPECT_EQ(memcmp(sealedPage.GetBuffer(), expF32Trunc, sizeof(expF32Trunc)), 0);

source->LoadSealedPage(fnGetColumnId("float32Quant"), RClusterIndex(0, 0), sealedPage);
// Two tightly packed 7bit quantized ints: 0b1101111 + 0b1010111 = 0b11011111010111 = 0x37d7
unsigned char expF32Quant[] = {0xd7, 0x37};
EXPECT_EQ(memcmp(sealedPage.GetBuffer(), expF32Quant, sizeof(expF32Quant)), 0);

auto reader = RNTupleReader::Open("ntuple", fileGuard.GetPath());
EXPECT_EQ(EColumnType::kIndex64, reader->GetModel().GetField("str").GetColumnRepresentatives()[0][0]);
EXPECT_EQ(2u, reader->GetNEntries());
Expand All @@ -386,6 +403,9 @@ TEST(Packing, OnDiskEncoding)
auto viewFtrunc = reader->GetView<float>("float32Trunc");
EXPECT_EQ(-3.5f, viewFtrunc(0));
EXPECT_EQ(1.75f, viewFtrunc(1));
auto viewFquant = reader->GetView<float>("float32Quant");
EXPECT_NEAR(0.69, viewFquant(0), 0.005f);
EXPECT_NEAR(0.875f, viewFquant(1), 0.005f);
}

TEST(Packing, Real32Trunc)
Expand Down

0 comments on commit 4ec0cb2

Please sign in to comment.