Skip to content

Commit

Permalink
[unify type system](remove unused type desc) remove some code (apache…
Browse files Browse the repository at this point in the history
…#17921)

There are many type definitions in BE. Should unify the type system and simplify the development.



---------

Co-authored-by: yiguolei <yiguolei@gmail.com>
  • Loading branch information
yiguolei and Doris-Extras authored Mar 19, 2023
1 parent a993ac9 commit dd53bc1
Show file tree
Hide file tree
Showing 27 changed files with 154 additions and 527 deletions.
1 change: 0 additions & 1 deletion be/src/exec/base_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
#include "common/utils.h"
#include "exec/exec_node.h"
#include "runtime/descriptors.h"
#include "runtime/raw_value.h"
#include "runtime/runtime_state.h"
#include "vec/data_types/data_type_factory.hpp"

Expand Down
74 changes: 0 additions & 74 deletions be/src/exec/schema_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,6 @@ Status SchemaScanner::init(SchemaScannerParam* param, ObjectPool* pool) {
return Status::InternalError("invalid parameter");
}

RETURN_IF_ERROR(create_tuple_desc(pool));

_param = param;
_is_init = true;

Expand Down Expand Up @@ -302,76 +300,4 @@ Status SchemaScanner::fill_dest_column_for_range(vectorized::Block* block, size_
return Status::OK();
}

Status SchemaScanner::create_tuple_desc(ObjectPool* pool) {
int null_column = 0;
for (int i = 0; i < _columns.size(); ++i) {
if (_columns[i].is_null) {
null_column++;
}
}

int offset = (null_column + 7) / 8;
std::vector<SlotDescriptor*> slots;
int null_byte = 0;
int null_bit = 0;

for (int i = 0; i < _columns.size(); ++i) {
TSlotDescriptor t_slot_desc;
if (_columns[i].type == TYPE_DECIMALV2) {
t_slot_desc.__set_slotType(TypeDescriptor::create_decimalv2_type(27, 9).to_thrift());
} else {
TypeDescriptor descriptor(_columns[i].type);
if (_columns[i].precision >= 0 && _columns[i].scale >= 0) {
descriptor.precision = _columns[i].precision;
descriptor.scale = _columns[i].scale;
}
t_slot_desc.__set_slotType(descriptor.to_thrift());
}
t_slot_desc.__set_colName(_columns[i].name);
t_slot_desc.__set_columnPos(i);
t_slot_desc.__set_byteOffset(offset);

if (_columns[i].is_null) {
t_slot_desc.__set_nullIndicatorByte(null_byte);
t_slot_desc.__set_nullIndicatorBit(null_bit);
null_bit = (null_bit + 1) % 8;

if (0 == null_bit) {
null_byte++;
}
} else {
t_slot_desc.__set_nullIndicatorByte(0);
t_slot_desc.__set_nullIndicatorBit(-1);
}

t_slot_desc.id = i;
t_slot_desc.__set_slotIdx(i);
t_slot_desc.__set_isMaterialized(true);

SlotDescriptor* slot = pool->add(new (std::nothrow) SlotDescriptor(t_slot_desc));

if (nullptr == slot) {
return Status::InternalError("no memory for _tuple_desc.");
}

slots.push_back(slot);
offset += _columns[i].size;
}

TTupleDescriptor t_tuple_desc;
t_tuple_desc.__set_byteSize(offset);
t_tuple_desc.__set_numNullBytes((null_byte * 8 + null_bit + 7) / 8);
_tuple_desc = pool->add(new (std::nothrow) TupleDescriptor(t_tuple_desc));

if (nullptr == _tuple_desc) {
return Status::InternalError("no memory for _tuple_desc.");
}

for (int i = 0; i < slots.size(); ++i) {
_tuple_desc->add_slot(slots[i]);
}

return Status::OK();
}

} // namespace doris
3 changes: 0 additions & 3 deletions be/src/exec/schema_scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,22 +88,19 @@ class SchemaScanner {
const std::vector<ColumnDesc>& get_column_desc() const { return _columns; }
// factory function
static SchemaScanner* create(TSchemaTableType::type type);
const TupleDescriptor* tuple_desc() const { return _tuple_desc; }
TSchemaTableType::type type() const { return _schema_table_type; }

static void set_doris_server(DorisServer* doris_server) { _s_doris_server = doris_server; }

protected:
Status fill_dest_column_for_range(vectorized::Block* block, size_t pos,
const std::vector<void*>& datas);
Status create_tuple_desc(ObjectPool* pool);

bool _is_init;
// this is used for sub class
SchemaScannerParam* _param;
// schema table's column desc
std::vector<ColumnDesc> _columns;
TupleDescriptor* _tuple_desc;

static DorisServer* _s_doris_server;

Expand Down
1 change: 1 addition & 0 deletions be/src/exec/tablet_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "exec/tablet_info.h"

#include "runtime/large_int_value.h"
#include "runtime/raw_value.h"
#include "util/string_parser.hpp"

namespace doris {
Expand Down
1 change: 0 additions & 1 deletion be/src/exec/tablet_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
#include "gen_cpp/descriptors.pb.h"
#include "olap/tablet_schema.h"
#include "runtime/descriptors.h"
#include "runtime/raw_value.h"
#include "vec/core/block.h"

namespace doris {
Expand Down
1 change: 0 additions & 1 deletion be/src/runtime/buffer_control_block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include "gen_cpp/PaloInternalService_types.h"
#include "gen_cpp/internal_service.pb.h"
#include "runtime/exec_env.h"
#include "runtime/raw_value.h"
#include "runtime/thread_context.h"
#include "service/brpc.h"
#include "util/thrift_util.h"
Expand Down
1 change: 0 additions & 1 deletion be/src/runtime/collection_value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#include "common/object_pool.h"
#include "common/utils.h"
#include "runtime/mem_pool.h"
#include "runtime/raw_value.h"
#include "runtime/types.h"
#include "vec/common/string_ref.h"

Expand Down
1 change: 0 additions & 1 deletion be/src/runtime/collection_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ struct ArrayIteratorFunctionsBase;
class ArrayIterator;
class Status;
class ObjectPool;
struct TypeDescriptor;

template <PrimitiveType type>
struct ArrayIteratorFunctions;
Expand Down
183 changes: 3 additions & 180 deletions be/src/runtime/raw_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,199 +36,22 @@ class SlotDescriptor;
// Useful utility functions for runtime values (which are passed around as void*).
class RawValue {
public:
// Ascii output precision for double/float
static const int ASCII_PRECISION;

static uint32_t get_hash_value(const void* value, const PrimitiveType& type) {
return get_hash_value(value, type, 0);
}

static uint32_t get_hash_value(const void* value, const PrimitiveType& type, uint32_t seed);

// Returns hash value for 'value' interpreted as 'type'. The resulting hash value
// is combined with the seed value.
static uint32_t get_hash_value(const void* value, const TypeDescriptor& type, uint32_t seed) {
return get_hash_value(value, type.type, seed);
}

static uint32_t get_hash_value(const void* value, const TypeDescriptor& type) {
return get_hash_value(value, type.type, 0);
}

// Get the hash value using the fvn hash function. Using different seeds with FVN
// results in different hash functions. get_hash_value() does not have this property
// and cannot be safely used as the first step in data repartitioning.
// However, get_hash_value() can be significantly faster.
// TODO: fix get_hash_value
static uint32_t zlib_crc32(const void* value, const TypeDescriptor& type, uint32_t seed);

// Same as the up function, only use in vec exec engine.
static uint32_t zlib_crc32(const void* value, size_t len, const TypeDescriptor& type,
static uint32_t zlib_crc32(const void* value, size_t len, const PrimitiveType& type,
uint32_t seed);

// Compares both values.
// Return value is < 0 if v1 < v2, 0 if v1 == v2, > 0 if v1 > v2.
static int compare(const void* v1, const void* v2, const TypeDescriptor& type);

// Returns true if v1 == v2.
// This is more performant than compare() == 0 for string equality, mostly because of
// the length comparison check.
static bool eq(const void* v1, const void* v2, const TypeDescriptor& type);

static bool lt(const void* v1, const void* v2, const TypeDescriptor& type);
};

// Use boost::hash_combine for corner cases. boost::hash_combine is reimplemented
// here to use int32t's (instead of size_t)
// boost::hash_combine does:
// seed ^= v + 0x9e3779b9 + (seed << 6) + (seed >> 2);
inline uint32_t RawValue::get_hash_value(const void* v, const PrimitiveType& type, uint32_t seed) {
// Hash_combine with v = 0
if (v == nullptr) {
uint32_t value = 0x9e3779b9;
return seed ^ (value + (seed << 6) + (seed >> 2));
}

switch (type) {
case TYPE_VARCHAR:
case TYPE_CHAR:
case TYPE_HLL:
case TYPE_STRING: {
const StringRef* string_value = reinterpret_cast<const StringRef*>(v);
return HashUtil::hash(string_value->data, string_value->size, seed);
}

case TYPE_BOOLEAN: {
uint32_t value = *reinterpret_cast<const bool*>(v) + 0x9e3779b9;
return seed ^ (value + (seed << 6) + (seed >> 2));
}

case TYPE_TINYINT:
return HashUtil::hash(v, 1, seed);

case TYPE_SMALLINT:
return HashUtil::hash(v, 2, seed);

case TYPE_INT:
return HashUtil::hash(v, 4, seed);

case TYPE_BIGINT:
return HashUtil::hash(v, 8, seed);

case TYPE_FLOAT:
return HashUtil::hash(v, 4, seed);

case TYPE_DOUBLE:
return HashUtil::hash(v, 8, seed);

case TYPE_DATE:
case TYPE_DATETIME:
return HashUtil::hash(v, 16, seed);

case TYPE_DATEV2:
return HashUtil::hash(v, 4, seed);

case TYPE_DATETIMEV2:
return HashUtil::hash(v, 8, seed);

case TYPE_DECIMALV2:
return HashUtil::hash(v, 16, seed);
case TYPE_DECIMAL32:
return HashUtil::hash(v, 4, seed);
case TYPE_DECIMAL64:
return HashUtil::hash(v, 8, seed);
case TYPE_DECIMAL128I:
return HashUtil::hash(v, 16, seed);

case TYPE_LARGEINT:
return HashUtil::hash(v, 16, seed);

default:
DCHECK(false) << "invalid type: " << type;
return 0;
}
}

// NOTE: this is just for split data, decimal use old doris hash function
// Because crc32 hardware is not equal with zlib crc32
inline uint32_t RawValue::zlib_crc32(const void* v, const TypeDescriptor& type, uint32_t seed) {
// Hash_combine with v = 0
if (v == nullptr) {
uint32_t value = 0x9e3779b9;
return seed ^ (value + (seed << 6) + (seed >> 2));
}

switch (type.type) {
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_STRING: {
const StringRef* string_value = reinterpret_cast<const StringRef*>(v);
return HashUtil::zlib_crc_hash(string_value->data, string_value->size, seed);
}
case TYPE_CHAR: {
// TODO(zc): ugly, use actual value to compute hash value
const StringRef* string_value = reinterpret_cast<const StringRef*>(v);
int len = 0;
while (len < string_value->size) {
if (string_value->data[len] == '\0') {
break;
}
len++;
}
return HashUtil::zlib_crc_hash(string_value->data, len, seed);
}
case TYPE_BOOLEAN:
case TYPE_TINYINT:
return HashUtil::zlib_crc_hash(v, 1, seed);
case TYPE_SMALLINT:
return HashUtil::zlib_crc_hash(v, 2, seed);
case TYPE_INT:
case TYPE_DATEV2:
case TYPE_DECIMAL32:
return HashUtil::zlib_crc_hash(v, 4, seed);
case TYPE_BIGINT:
case TYPE_DATETIMEV2:
case TYPE_DECIMAL64:
return HashUtil::zlib_crc_hash(v, 8, seed);
case TYPE_LARGEINT:
case TYPE_DECIMAL128I:
return HashUtil::zlib_crc_hash(v, 16, seed);
case TYPE_FLOAT:
return HashUtil::zlib_crc_hash(v, 4, seed);
case TYPE_DOUBLE:
return HashUtil::zlib_crc_hash(v, 8, seed);
case TYPE_DATE:
case TYPE_DATETIME: {
const DateTimeValue* date_val = (const DateTimeValue*)v;
char buf[64];
int len = date_val->to_buffer(buf);
return HashUtil::zlib_crc_hash(buf, len, seed);
}

case TYPE_DECIMALV2: {
const DecimalV2Value* dec_val = (const DecimalV2Value*)v;
int64_t int_val = dec_val->int_value();
int32_t frac_val = dec_val->frac_value();
seed = HashUtil::zlib_crc_hash(&int_val, sizeof(int_val), seed);
return HashUtil::zlib_crc_hash(&frac_val, sizeof(frac_val), seed);
}
default:
DCHECK(false) << "invalid type: " << type;
return 0;
}
}

// NOTE: this is just for split data, decimal use old doris hash function
// Because crc32 hardware is not equal with zlib crc32
inline uint32_t RawValue::zlib_crc32(const void* v, size_t len, const TypeDescriptor& type,
inline uint32_t RawValue::zlib_crc32(const void* v, size_t len, const PrimitiveType& type,
uint32_t seed) {
// Hash_combine with v = 0
if (v == nullptr) {
uint32_t value = 0x9e3779b9;
return seed ^ (value + (seed << 6) + (seed >> 2));
}

switch (type.type) {
switch (type) {
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_STRING:
Expand Down
1 change: 0 additions & 1 deletion be/src/runtime/result_buffer_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#include "gen_cpp/PaloInternalService_types.h"
#include "gen_cpp/types.pb.h"
#include "runtime/buffer_control_block.h"
#include "runtime/raw_value.h"
#include "util/doris_metrics.h"

namespace doris {
Expand Down
1 change: 0 additions & 1 deletion be/src/runtime/result_queue_mgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

#include "common/status.h"
#include "runtime/primitive_type.h"
#include "runtime/raw_value.h"
#include "runtime/record_batch_queue.h"
#include "util/hash_util.hpp"

Expand Down
1 change: 0 additions & 1 deletion be/src/runtime/result_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ namespace doris {

class Status;
class RuntimeState;
struct TypeDescriptor;

namespace vectorized {
class Block;
Expand Down
2 changes: 0 additions & 2 deletions be/src/runtime/struct_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ class StructValue {

void shallow_copy(const StructValue* other);

// size_t get_byte_size(const TypeDescriptor& type) const;

const void** values() const { return const_cast<const void**>(_values); }
void** mutable_values() { return _values; }
void set_values(void** values) { _values = values; }
Expand Down
Loading

0 comments on commit dd53bc1

Please sign in to comment.