Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 36 additions & 12 deletions contrib/pax_storage/src/cpp/comm/bitmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,31 @@ struct BitmapRaw final {
static_assert(BM_WORD_BITS == (1 << BM_WORD_SHIFTS));
return (index >> BM_WORD_SHIFTS) < size;
}
inline bool Empty() const {


inline bool Empty(uint32 end_index) const {
if (!bitmap) return true;
for (size_t i = 0; i < size; i++)
if (bitmap[i]) return false;

uint32 end_word = BM_INDEX_WORD_OFF(end_index);
uint32 end_bit_offset = BM_INDEX_BIT_OFF(end_index);

for (uint32 i = 0; i < end_word && i < size; i++) {
if (bitmap[i] != 0) return false;
}

// Check partial word at end
if (end_word < size && end_bit_offset > 0) {
T mask = (T(1) << end_bit_offset) - 1;
if (bitmap[end_word] & mask) return false;
}

return true;
}

inline bool Empty() const {
return Empty(size);
}

BitmapRaw() = default;
BitmapRaw(T *buffer, size_t size) : bitmap(buffer), size(size) {}
BitmapRaw(const BitmapRaw &) = delete;
Expand All @@ -160,13 +179,14 @@ struct BitmapRaw final {
template <typename T>
class BitmapTpl final {
public:
using BitmapMemoryPolicy = void (*)(BitmapRaw<T> &, uint32);
explicit BitmapTpl(uint32 initial_size = 16) {
using BitmapMemoryPolicy = void (*)(BitmapRaw<T> &, uint32, uint8);
explicit BitmapTpl(uint32 initial_size = 16, uint8 init_value = 0) {
static_assert(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
sizeof(T) == 8);
static_assert(BM_WORD_BITS == (1 << BM_WORD_SHIFTS));
policy_ = DefaultBitmapMemoryPolicy;
policy_(raw_, Max(initial_size, 16));
policy_(raw_, Max(initial_size, 16), init_value);
init_value_ = init_value;
}
explicit BitmapTpl(const BitmapRaw<T> &raw) {
static_assert(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
Expand Down Expand Up @@ -205,11 +225,11 @@ class BitmapTpl final {

inline size_t WordBits() const { return BM_WORD_BITS; }
inline void Set(uint32 index) {
if (unlikely(!raw_.HasEnoughSpace(index))) policy_(raw_, index);
if (unlikely(!raw_.HasEnoughSpace(index))) policy_(raw_, index, init_value_);
raw_.Set(index);
}
inline void SetN(uint32 index) {
if (unlikely(!raw_.HasEnoughSpace(index))) policy_(raw_, index);
if (unlikely(!raw_.HasEnoughSpace(index))) policy_(raw_, index, init_value_);
raw_.SetN(index);
}
inline void Clear(uint32 index) {
Expand All @@ -228,7 +248,7 @@ class BitmapTpl final {
}
// invert the bit and return the old value.
inline bool Toggle(uint32 index) {
if (unlikely(!raw_.HasEnoughSpace(index))) policy_(raw_, index);
if (unlikely(!raw_.HasEnoughSpace(index))) policy_(raw_, index, init_value_);
return raw_.Toggle(index);
}
// count bits in range [0, index]
Expand All @@ -247,24 +267,27 @@ class BitmapTpl final {
}

inline bool Empty() const { return raw_.Empty(); }
inline bool Empty(uint32 end_index) const {
return raw_.Empty(end_index);
}

BitmapMemoryPolicy Policy() const { return policy_; }

const BitmapRaw<T> &Raw() const { return raw_; }
BitmapRaw<T> &Raw() { return raw_; }

static void DefaultBitmapMemoryPolicy(BitmapRaw<T> &raw, uint32 index) {
static void DefaultBitmapMemoryPolicy(BitmapRaw<T> &raw, uint32 index, uint8 init_value = 0) {
auto old_bitmap = raw.bitmap;
auto old_size = raw.size;
auto size = Max(BM_INDEX_WORD_OFF(index) + 1, old_size * 2);
auto p = PAX_NEW_ARRAY<T>(size);
if (old_size > 0) memcpy(p, old_bitmap, sizeof(T) * old_size);
memset(&p[old_size], 0, sizeof(T) * (size - old_size));
memset(&p[old_size], init_value, sizeof(T) * (size - old_size));
raw.bitmap = p;
raw.size = size;
PAX_DELETE_ARRAY(old_bitmap);
}
static void ReadOnlyRefBitmap(BitmapRaw<T> & /*raw*/, uint32 /*index*/) {
static void ReadOnlyRefBitmap(BitmapRaw<T> & /*raw*/, uint32 /*index*/, uint8 /*init_value*/) {
// raise
CBDB_RAISE(cbdb::CException::kExTypeInvalidMemoryOperation);
}
Expand Down Expand Up @@ -315,6 +338,7 @@ class BitmapTpl final {

BitmapRaw<T> raw_;
BitmapMemoryPolicy policy_;
uint8 init_value_ = 0;
};

using Bitmap8 = BitmapTpl<uint8>;
Expand Down
15 changes: 11 additions & 4 deletions contrib/pax_storage/src/cpp/storage/columns/pax_column.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,21 @@ size_t PaxColumn::GetRangeNonNullRows(size_t start_pos, size_t len) {

void PaxColumn::CreateNulls(size_t cap) {
Assert(!null_bitmap_);
null_bitmap_ = std::make_unique<Bitmap8>(cap);
null_bitmap_->SetN(total_rows_);
// By default, initialize every bit in the null bitmap to 1.
// This is based on the assumption that null values are much less frequent
// than non-null values in most datasets. As a result, when appending non-null
// values, we can simply skip setting the bit to 1, since it is already set.
// Only when appending a null value do we need to explicitly clear the
// corresponding bit.
null_bitmap_ = std::make_unique<Bitmap8>(cap, 0xff);
}

void PaxColumn::AppendNull() {
if (!null_bitmap_) {
CreateNulls(DEFAULT_CAPACITY);
// Ensure that the capacity of null_bitmap_ is pax_max_tuples_per_group.
// This design allows the use of raw_bitmap in normal cases without
// incurring the overhead of checking the bitmap's capacity.
CreateNulls(pax::pax_max_tuples_per_group);
}
null_bitmap_->Clear(total_rows_);
++total_rows_;
Expand All @@ -111,7 +119,6 @@ void PaxColumn::AppendToast(char *buffer, size_t size) {
}

void PaxColumn::Append(char * /*buffer*/, size_t /*size*/) {
if (null_bitmap_) null_bitmap_->Set(total_rows_);
++total_rows_;
++non_null_rows_;
}
Expand Down
12 changes: 10 additions & 2 deletions contrib/pax_storage/src/cpp/storage/columns/pax_column.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@

namespace pax {

#define DEFAULT_CAPACITY MIN(2048, MAX(16, MAXALIGN(pax::pax_max_tuples_per_group)))
#define DEFAULT_CAPACITY \
MIN(2048, MAX(16, MAXALIGN(pax::pax_max_tuples_per_group)))

// Used to mapping pg_type
enum PaxColumnTypeInMem {
Expand Down Expand Up @@ -230,7 +231,14 @@ class PaxColumn {
inline bool HasNull() { return null_bitmap_ != nullptr; }

// Are all values null?
inline bool AllNull() const { return null_bitmap_ && null_bitmap_->Empty(); }
// Check whether all bits in the specified range are zero.
// In pax_column, to avoid checking the capacity of the null bitmap, we
// allocate memory based on pax_max_tuples_per_group. As a result, the last
// group may contain fewer tuples than pax_max_tuples_per_group, so we need to
// check whether all bits in the range [0, total_rows_) are zero.
inline bool AllNull() const {
return null_bitmap_ && null_bitmap_->Empty(total_rows_);
}

// Set the null bitmap
inline void SetBitmap(std::unique_ptr<Bitmap8> null_bitmap) {
Expand Down
Loading