Skip to content

Commit bc3ca05

Browse files
committed
Enhancement: simple code optimization
1. Explicit inline functions. 2. Instead of checking the physical size of the file every time a tuple is inserted, it is checked every 16 tuples. performance result ``` create table t1(a int, b int, c int, d int, e text, f text,g text, h text) using pax with(compresstype =zstd,compresslevel=5); gpadmin=# insert into t1 select i, i+1,i+2,i+3, i::text, i::text, i::text, i::text from generate_series(1,5000000) i; INSERT 0 5000000 Time: 6124.535 ms (00:06.125) gpadmin=# insert into t1 select i, i+1,i+2,i+3, i::text, i::text, i::text, i::text from generate_series(1,5000000) i; INSERT 0 5000000 Time: 5993.682 ms (00:05.994) -- optimized with this commit create table t1(a int, b int, c int, d int, e text, f text,g text, h text) using pax with(compresstype =zstd,compresslevel=5); gpadmin=# insert into t1 select i, i+1,i+2,i+3, i::text, i::text, i::text, i::text from generate_series(1,5000000) i; INSERT 0 5000000 Time: 5713.184 ms (00:05.713) gpadmin=# insert into t1 select i, i+1,i+2,i+3, i::text, i::text, i::text, i::text from generate_series(1,5000000) i; INSERT 0 5000000 Time: 5430.221 ms (00:05.430) ```
1 parent 6e0c40b commit bc3ca05

File tree

7 files changed

+54
-18
lines changed

7 files changed

+54
-18
lines changed

contrib/pax_storage/src/cpp/access/pax_dml_state.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ void CPaxDmlStateLocal::Reset() { cbdb::pax_memory_context = nullptr; }
104104
CPaxDmlStateLocal::CPaxDmlStateLocal()
105105
: last_oid_(InvalidOid), cb_{.func = DmlStateResetCallback, .arg = NULL} {}
106106

107-
std::shared_ptr<CPaxDmlStateLocal::DmlStateValue>
107+
pg_attribute_always_inline std::shared_ptr<CPaxDmlStateLocal::DmlStateValue>
108108
CPaxDmlStateLocal::RemoveDmlState(const Oid &oid) {
109109
std::shared_ptr<CPaxDmlStateLocal::DmlStateValue> value;
110110

@@ -121,7 +121,7 @@ CPaxDmlStateLocal::RemoveDmlState(const Oid &oid) {
121121
return value;
122122
}
123123

124-
std::shared_ptr<CPaxDmlStateLocal::DmlStateValue>
124+
pg_attribute_always_inline std::shared_ptr<CPaxDmlStateLocal::DmlStateValue>
125125
CPaxDmlStateLocal::FindDmlState(const Oid &oid) {
126126
Assert(OidIsValid(oid));
127127

contrib/pax_storage/src/cpp/comm/cbdb_wrappers.cc

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,6 @@ void cbdb::MemoryCtxRegisterResetCallback(MemoryContext context,
124124
CBDB_WRAP_END;
125125
}
126126

127-
Oid cbdb::RelationGetRelationId(Relation rel) {
128-
CBDB_WRAP_START;
129-
{ return RelationGetRelid(rel); }
130-
CBDB_WRAP_END;
131-
}
132-
133127
#ifdef RUN_GTEST
134128
Datum cbdb::DatumFromCString(const char *src, size_t length) {
135129
CBDB_WRAP_START;

contrib/pax_storage/src/cpp/comm/cbdb_wrappers.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,6 @@ void MemoryCtxDelete(MemoryContext memory_context);
114114
void MemoryCtxRegisterResetCallback(MemoryContext context,
115115
MemoryContextCallback *cb);
116116

117-
Oid RelationGetRelationId(Relation rel);
118-
119117
static inline void *DatumToPointer(Datum d) noexcept {
120118
return DatumGetPointer(d);
121119
}
@@ -164,6 +162,10 @@ static inline float8 DatumToFloat8(Datum d) noexcept {
164162
return DatumGetFloat8(d);
165163
}
166164

165+
static pg_attribute_always_inline Oid RelationGetRelationId(Relation rel) noexcept {
166+
return RelationGetRelid(rel);
167+
}
168+
167169
BpChar *BpcharInput(const char *s, size_t len, int32 atttypmod);
168170
VarChar *VarcharInput(const char *s, size_t len, int32 atttypmod);
169171
text *CstringToText(const char *s, size_t len);

contrib/pax_storage/src/cpp/storage/orc/orc_writer.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,11 @@ std::vector<std::pair<int, Datum>> OrcWriter::PrepareWriteTuple(
361361
// Numeric always need ensure that with the 4B header, otherwise it will
362362
// be converted twice in the vectorization path.
363363
if (required_stats_cols[i] || VARATT_IS_COMPRESSED(tts_value_vl) ||
364-
VARATT_IS_EXTERNAL(tts_value_vl) || attrs->atttypid == NUMERICOID) {
364+
VARATT_IS_EXTERNAL(tts_value_vl)
365+
#ifdef VEC_BUILD
366+
|| attrs->atttypid == NUMERICOID
367+
#endif
368+
) {
365369
// still detoast the origin toast
366370
detoast_vl = cbdb::PgDeToastDatum(tts_value_vl);
367371
Assert(detoast_vl != nullptr);

contrib/pax_storage/src/cpp/storage/pax.cc

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949
#include "storage/vec/pax_vec_reader.h"
5050
#endif
5151

52+
#define PAX_SPLIT_CHECK_INTERVAL (16)
53+
5254
namespace paxc {
5355
class IndexUpdaterInternal {
5456
public:
@@ -280,14 +282,19 @@ void TableWriter::Open() {
280282
// insert tuple into the aux table before inserting any tuples.
281283
cbdb::InsertMicroPartitionPlaceHolder(RelationGetRelid(relation_),
282284
current_blockno_);
285+
cur_physical_size_ = 0;
283286
}
284287

285288
void TableWriter::WriteTuple(TupleTableSlot *slot) {
286289
Assert(writer_);
287290
Assert(strategy_);
288-
// should check split strategy before write tuple
289-
// otherwise, may got a empty file in the disk
290-
if (strategy_->ShouldSplit(writer_->PhysicalSize(), num_tuples_)) {
291+
// Sampled split check to reduce PhysicalSize() overhead
292+
// We first perform a sampled pre-write check to avoid empty files.
293+
if ((num_tuples_ % PAX_SPLIT_CHECK_INTERVAL) == 0) {
294+
cur_physical_size_ = writer_->PhysicalSize();
295+
}
296+
297+
if (strategy_->ShouldSplit(cur_physical_size_, num_tuples_)) {
291298
writer_->Close();
292299
writer_ = nullptr;
293300
Open();

contrib/pax_storage/src/cpp/storage/pax.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ class TableWriter {
131131
std::vector<std::tuple<ColumnEncoding_Kind, int>> encoding_opts_;
132132

133133
bool is_dfs_table_space_;
134+
size_t cur_physical_size_ = 0;
134135
};
135136

136137
class TableReader final {

contrib/pax_storage/src/cpp/storage/vec/pax_porc_adpater.cc

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929

3030
#ifdef VEC_BUILD
3131

32+
#include <stdlib.h>
33+
3234
#include "comm/vec_numeric.h"
3335
#include "storage/columns/pax_column_traits.h"
3436
#include "storage/toast/pax_toast.h"
@@ -38,6 +40,22 @@
3840
#endif
3941
namespace pax {
4042

43+
static inline struct varlena *VarlenaShortTo4B(struct varlena *attr) {
44+
Assert(attr != nullptr);
45+
Assert(VARATT_IS_SHORT(attr));
46+
Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT;
47+
Size new_size = data_size + VARHDRSZ;
48+
49+
struct varlena *new_attr =
50+
reinterpret_cast<struct varlena *>(malloc(new_size));
51+
52+
Assert(new_attr != nullptr);
53+
54+
SET_VARSIZE(new_attr, new_size);
55+
memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size);
56+
return new_attr;
57+
}
58+
4159
static void CopyFixedRawBufferWithNull(
4260
PaxColumn *column, std::shared_ptr<Bitmap8> visibility_map_bitset,
4361
size_t bitset_index_begin, size_t range_begin, size_t range_lens,
@@ -235,16 +253,22 @@ static void CopyDecimalBuffer(PaxColumn *column,
235253
out_data_buffer->Brush(type_len);
236254
} else {
237255
Numeric numeric;
256+
bool should_free = false;
238257
size_t num_len = 0;
239258
std::tie(buffer, buffer_len) =
240259
column->GetBuffer(data_index_begin + non_null_offset);
241260

242261
auto vl = (struct varlena *)DatumGetPointer(buffer);
243-
Assert(!(VARATT_IS_EXTERNAL(vl) || VARATT_IS_COMPRESSED(vl) ||
244-
VARATT_IS_SHORT(vl)));
262+
Assert(!(VARATT_IS_EXTERNAL(vl) || VARATT_IS_COMPRESSED(vl)));
245263
num_len = VARSIZE_ANY_EXHDR(vl);
246-
// direct cast
247-
numeric = (Numeric)(buffer);
264+
// it has been detoasted in OrcWriter::PrepareWriteTuple, except numeric
265+
// type with short header should be detoasted to 4B header
266+
if (unlikely(VARATT_IS_SHORT(vl))) {
267+
numeric = VarlenaShortTo4B(vl);
268+
should_free = true;
269+
} else { // direct cast
270+
numeric = (Numeric)(buffer);
271+
}
248272

249273
char *dest_buff = out_data_buffer->GetAvailableBuffer();
250274
Assert(out_data_buffer->Available() >= (size_t)type_len);
@@ -253,6 +277,10 @@ static void CopyDecimalBuffer(PaxColumn *column,
253277
(int64 *)(dest_buff + sizeof(int64)));
254278
out_data_buffer->Brush(type_len);
255279
non_null_offset++;
280+
281+
if (should_free) {
282+
free(numeric);
283+
}
256284
}
257285
}
258286

0 commit comments

Comments
 (0)