Skip to content

Commit

Permalink
[Schema change] Support More column type in schema change (apache#4938)
Browse files Browse the repository at this point in the history
1. Support modify column type CHAR to TINYINT/SMALLINT/INT/BIGINT/LARGEINT/FLOAT/DOUBLE/DATE
and TINYINT/SMALLINT/INT/BIGINT/LARGEINT/FLOAT/DOUBLE convert to a wider range of numeric types (apache#4937)

2. Use template to refactor code of types.h and schema_change.cpp to delete redundant code.
  • Loading branch information
HappenLee authored Nov 28, 2020
1 parent 3b56b60 commit 55ce88d
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 134 deletions.
54 changes: 17 additions & 37 deletions be/src/olap/schema_change.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ ColumnMapping* RowBlockChanger::get_mutable_column_mapping(size_t column_index)

#define CONVERT_FROM_TYPE(from_type) \
{ \
switch (mutable_block->tablet_schema().column(i).type()) { \
switch (newtype) { \
case OLAP_FIELD_TYPE_TINYINT: \
TYPE_REINTERPRET_CAST(from_type, int8_t); \
case OLAP_FIELD_TYPE_UNSIGNED_TINYINT: \
Expand All @@ -168,6 +168,8 @@ ColumnMapping* RowBlockChanger::get_mutable_column_mapping(size_t column_index)
TYPE_REINTERPRET_CAST(from_type, uint64_t); \
case OLAP_FIELD_TYPE_LARGEINT: \
LARGEINT_REINTERPRET_CAST(from_type, int128_t); \
case OLAP_FIELD_TYPE_FLOAT: \
TYPE_REINTERPRET_CAST(from_type, float); \
case OLAP_FIELD_TYPE_DOUBLE: \
TYPE_REINTERPRET_CAST(from_type, double); \
default: \
Expand Down Expand Up @@ -217,6 +219,16 @@ class ConvertTypeResolver {
};

ConvertTypeResolver::ConvertTypeResolver() {
// from char type
add_convert_type_mapping<OLAP_FIELD_TYPE_CHAR, OLAP_FIELD_TYPE_TINYINT>();
add_convert_type_mapping<OLAP_FIELD_TYPE_CHAR, OLAP_FIELD_TYPE_SMALLINT>();
add_convert_type_mapping<OLAP_FIELD_TYPE_CHAR, OLAP_FIELD_TYPE_INT>();
add_convert_type_mapping<OLAP_FIELD_TYPE_CHAR, OLAP_FIELD_TYPE_BIGINT>();
add_convert_type_mapping<OLAP_FIELD_TYPE_CHAR, OLAP_FIELD_TYPE_LARGEINT>();
add_convert_type_mapping<OLAP_FIELD_TYPE_CHAR, OLAP_FIELD_TYPE_FLOAT>();
add_convert_type_mapping<OLAP_FIELD_TYPE_CHAR, OLAP_FIELD_TYPE_DOUBLE>();
add_convert_type_mapping<OLAP_FIELD_TYPE_CHAR, OLAP_FIELD_TYPE_DATE>();

// supported type convert should annotate in doc:
// http://doris.incubator.apache.org/master/zh-CN/sql-reference/sql-statements/Data%20Definition/ALTER%20TABLE.html#description
// If type convert is supported here, you should check fe/src/main/java/org/apache/doris/catalog/ColumnType.java to supported it either
Expand All @@ -239,6 +251,7 @@ ConvertTypeResolver::ConvertTypeResolver() {
add_convert_type_mapping<OLAP_FIELD_TYPE_FLOAT, OLAP_FIELD_TYPE_VARCHAR>();
add_convert_type_mapping<OLAP_FIELD_TYPE_DOUBLE, OLAP_FIELD_TYPE_VARCHAR>();
add_convert_type_mapping<OLAP_FIELD_TYPE_DECIMAL, OLAP_FIELD_TYPE_VARCHAR>();
add_convert_type_mapping<OLAP_FIELD_TYPE_CHAR, OLAP_FIELD_TYPE_VARCHAR>();

add_convert_type_mapping<OLAP_FIELD_TYPE_DATE, OLAP_FIELD_TYPE_DATETIME>();

Expand Down Expand Up @@ -452,7 +465,6 @@ OLAPStatus RowBlockChanger::change_row_block(const RowBlock* ref_block, int32_t
for (size_t i = 0, len = mutable_block->tablet_schema().num_columns(); !filter_all && i < len;
++i) {
int32_t ref_column = _schema_mapping[i].ref_column;

if (_schema_mapping[i].ref_column >= 0) {
if (!_schema_mapping[i].materialized_function.empty()) {
bool (*_do_materialized_transform) (RowCursor*, RowCursor*, const TabletColumn&, int, int, MemPool* );
Expand Down Expand Up @@ -523,38 +535,6 @@ OLAPStatus RowBlockChanger::change_row_block(const RowBlock* ref_block, int32_t
}
}
}

// 从ref_column 写入 i列。
} else if (newtype == OLAP_FIELD_TYPE_VARCHAR && reftype == OLAP_FIELD_TYPE_CHAR) {
// 效率低下,也可以直接计算变长域拷贝,但仍然会破坏封装
for (size_t row_index = 0, new_row_index = 0;
row_index < ref_block->row_block_info().row_num; ++row_index) {
// 不需要的row,每次处理到这个row时就跳过
if (need_filter_data && is_data_left_vec[row_index] == 0) {
continue;
}

// 指定新的要写入的row index(不同于读的row_index)
mutable_block->get_row(new_row_index++, &write_helper);

ref_block->get_row(row_index, &read_helper);

if (true == read_helper.is_null(ref_column)) {
write_helper.set_null(i);
} else {
// 要写入的

write_helper.set_not_null(i);
int p = ref_block->tablet_schema().column(ref_column).length() - 1;
Slice* slice = reinterpret_cast<Slice*>(read_helper.cell_ptr(ref_column));
char* buf = slice->data;
while (p >= 0 && buf[p] == '\0') {
p--;
}
slice->size = p + 1;
write_helper.set_field_content(i, reinterpret_cast<char*>(slice), mem_pool);
}
}
} else if (ConvertTypeResolver::instance()->get_convert_type_info(reftype, newtype)) {
for (size_t row_index = 0, new_row_index = 0;
row_index < ref_block->row_block_info().row_num; ++row_index) {
Expand Down Expand Up @@ -602,6 +582,8 @@ OLAPStatus RowBlockChanger::change_row_block(const RowBlock* ref_block, int32_t
CONVERT_FROM_TYPE(int64_t);
case OLAP_FIELD_TYPE_UNSIGNED_BIGINT:
CONVERT_FROM_TYPE(uint64_t);
case OLAP_FIELD_TYPE_LARGEINT:
CONVERT_FROM_TYPE(int128_t);
default:
LOG(WARNING) << "the column type which was altered from was unsupported."
<< " from_type="
Expand Down Expand Up @@ -736,10 +718,8 @@ bool RowBlockSorter::sort(RowBlock** row_block) {
RowBlockAllocator::RowBlockAllocator(const TabletSchema& tablet_schema, size_t memory_limitation)
: _tablet_schema(tablet_schema),
_memory_allocated(0),
_row_len(tablet_schema.row_size()),
_memory_limitation(memory_limitation) {
_row_len = 0;
_row_len = tablet_schema.row_size();

VLOG(3) << "RowBlockAllocator(). row_len=" << _row_len;
}

Expand Down
152 changes: 61 additions & 91 deletions be/src/olap/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "gen_cpp/segment_v2.pb.h" // for ColumnMetaPB
#include "olap/collection.h"
#include "olap/decimal12.h"

#include "olap/olap_common.h"
#include "olap/olap_define.h"
#include "olap/tablet_schema.h" // for TabletColumn
Expand Down Expand Up @@ -495,12 +496,32 @@ struct BaseFieldtypeTraits : public CppTypeTraits<field_type> {
}
};

static void prepare_char_before_convert(const void* src) {
Slice* slice = const_cast<Slice*>(reinterpret_cast<const Slice*>(src));
char* buf = slice->data;
auto p = slice->size - 1;
while (p >= 0 && buf[p] == '\0') {
p--;
}
slice->size = p + 1;
}

template <typename T>
OLAPStatus convert_int_from_varchar(void* dest, const void* src) {
using SrcType = typename CppTypeTraits<OLAP_FIELD_TYPE_VARCHAR>::CppType;
auto src_value = reinterpret_cast<const SrcType*>(src);
T convert_from_varchar(const Slice* src_value, StringParser::ParseResult& parse_res, std::true_type) {
return StringParser::string_to_int<T>(src_value->get_data(), src_value->get_size(), &parse_res);
}

template <typename T>
T convert_from_varchar(const Slice* src_value, StringParser::ParseResult& parse_res, std::false_type) {
return StringParser::string_to_float<T>(src_value->get_data(), src_value->get_size(), &parse_res);
}

template <typename T>
OLAPStatus arithmetic_convert_from_varchar(void* dest, const void* src) {
auto src_value = reinterpret_cast<const Slice*>(src);
StringParser::ParseResult parse_res;
T result = StringParser::string_to_int<T>(src_value->get_data(), src_value->get_size(), &parse_res);
//TODO: use C++17 if constexpr to replace label assignment
auto result = convert_from_varchar<T>(src_value, parse_res, std::is_integral<T>());
if (UNLIKELY(parse_res != StringParser::PARSE_SUCCESS)) {
return OLAPStatus::OLAP_ERR_INVALID_SCHEMA;
}
Expand All @@ -509,98 +530,55 @@ OLAPStatus convert_int_from_varchar(void* dest, const void* src) {
}

template <typename T>
OLAPStatus convert_float_from_varchar(void* dest, const void* src) {
using SrcType = typename CppTypeTraits<OLAP_FIELD_TYPE_VARCHAR>::CppType;
auto src_value = reinterpret_cast<const SrcType *>(src);
StringParser::ParseResult parse_res;
T result = StringParser::string_to_float<T>(src_value->get_data(), src_value->get_size(), &parse_res);
if (UNLIKELY(parse_res != StringParser::PARSE_SUCCESS)) {
return OLAPStatus::OLAP_ERR_INVALID_SCHEMA;
}
*reinterpret_cast<T*>(dest) = result;
return OLAPStatus::OLAP_SUCCESS;
OLAPStatus numeric_convert_from_char(void *dest, const void *src) {
prepare_char_before_convert(src);
return arithmetic_convert_from_varchar<T>(dest, src);
}

template<FieldType field_type>
struct FieldTypeTraits : public BaseFieldtypeTraits<field_type> { };
// Using NumericFieldtypeTraits to Derived code for OLAP_FIELD_TYPE_XXXINT, OLAP_FIELD_TYPE_FLOAT,
// OLAP_FIELD_TYPE_DOUBLE, to reduce redundant code
template <FieldType fieldType, bool isArithmetic>
struct NumericFieldtypeTraits : public BaseFieldtypeTraits<fieldType> {
using CppType = typename CppTypeTraits<fieldType>::CppType;

template<>
struct FieldTypeTraits<OLAP_FIELD_TYPE_BOOL> : public BaseFieldtypeTraits<OLAP_FIELD_TYPE_BOOL> {
static std::string to_string(const void* src) {
char buf[1024] = {'\0'};
snprintf(buf, sizeof(buf), "%d", *reinterpret_cast<const bool*>(src));
return std::string(buf);
return std::to_string(*reinterpret_cast<const CppType*>(src));
}
static void set_to_max(void* buf) {
(*(bool*)buf) = true;
}
static void set_to_min(void* buf) {
(*(bool*)buf) = false;
}
};

template<>
struct FieldTypeTraits<OLAP_FIELD_TYPE_TINYINT> : public BaseFieldtypeTraits<OLAP_FIELD_TYPE_TINYINT> {
static std::string to_string(const void* src) {
char buf[1024] = {'\0'};
snprintf(buf, sizeof(buf), "%d", *reinterpret_cast<const int8_t*>(src));
return std::string(buf);
}
static OLAPStatus convert_from(void* dest, const void* src, const TypeInfo* src_type, MemPool* mem_pool) {
if (src_type->type() == OLAP_FIELD_TYPE_VARCHAR) {
return convert_int_from_varchar<CppType>(dest, src);
return arithmetic_convert_from_varchar<CppType>(dest, src);
} else if (src_type->type() == OLAP_FIELD_TYPE_CHAR) {
return numeric_convert_from_char<CppType>(dest, src);
}
return OLAPStatus::OLAP_ERR_INVALID_SCHEMA;
}
};

template<>
struct FieldTypeTraits<OLAP_FIELD_TYPE_SMALLINT> : public BaseFieldtypeTraits<OLAP_FIELD_TYPE_SMALLINT> {
static std::string to_string(const void* src) {
char buf[1024] = {'\0'};
snprintf(buf, sizeof(buf), "%d", *reinterpret_cast<const int16_t*>(src));
return std::string(buf);
}
static OLAPStatus convert_from(void* dest, const void* src, const TypeInfo* src_type, MemPool* mem_pool) {
if (src_type->type() == OLAP_FIELD_TYPE_VARCHAR) {
return convert_int_from_varchar<CppType>(dest, src);
}
return OLAPStatus::OLAP_ERR_INVALID_SCHEMA;
}
};
template <FieldType fieldType>
struct NumericFieldtypeTraits<fieldType, false> : public BaseFieldtypeTraits<fieldType> {};

template<>
struct FieldTypeTraits<OLAP_FIELD_TYPE_INT> : public BaseFieldtypeTraits<OLAP_FIELD_TYPE_INT> {
static std::string to_string(const void* src) {
char buf[1024] = {'\0'};
snprintf(buf, sizeof(buf), "%d", *reinterpret_cast<const int32_t *>(src));
return std::string(buf);
}
static OLAPStatus convert_from(void* dest, const void* src, const TypeInfo* src_type, MemPool* mem_pool) {
if (src_type->type() == OLAP_FIELD_TYPE_VARCHAR) {
return convert_int_from_varchar<CppType>(dest, src);
}
return OLAPStatus::OLAP_ERR_INVALID_SCHEMA;
}
};
template <FieldType fieldType>
struct FieldTypeTraits : public NumericFieldtypeTraits<fieldType,
std::is_arithmetic<typename BaseFieldtypeTraits<fieldType>::CppType>::value && std::is_signed<typename BaseFieldtypeTraits<fieldType>::CppType>::value> {};

template<>
struct FieldTypeTraits<OLAP_FIELD_TYPE_BIGINT> : public BaseFieldtypeTraits<OLAP_FIELD_TYPE_BIGINT> {
struct FieldTypeTraits<OLAP_FIELD_TYPE_BOOL> : public BaseFieldtypeTraits<OLAP_FIELD_TYPE_BOOL> {
static std::string to_string(const void* src) {
char buf[1024] = {'\0'};
snprintf(buf, sizeof(buf), "%ld", *reinterpret_cast<const int64_t*>(src));
snprintf(buf, sizeof(buf), "%d", *reinterpret_cast<const bool*>(src));
return std::string(buf);
}
static OLAPStatus convert_from(void* dest, const void* src, const TypeInfo* src_type, MemPool* mem_pool) {
if (src_type->type() == OLAP_FIELD_TYPE_VARCHAR) {
return convert_int_from_varchar<CppType>(dest, src);
}
return OLAPStatus::OLAP_ERR_INVALID_SCHEMA;
static void set_to_max(void* buf) {
(*(bool*)buf) = true;
}
static void set_to_min(void* buf) {
(*(bool*)buf) = false;
}
};

template<>
struct FieldTypeTraits<OLAP_FIELD_TYPE_LARGEINT> : public BaseFieldtypeTraits<OLAP_FIELD_TYPE_LARGEINT> {
struct FieldTypeTraits<OLAP_FIELD_TYPE_LARGEINT> : public NumericFieldtypeTraits<OLAP_FIELD_TYPE_LARGEINT, true> {
static OLAPStatus from_string(void* buf, const std::string& scan_key) {
int128_t value = 0;

Expand Down Expand Up @@ -699,16 +677,10 @@ struct FieldTypeTraits<OLAP_FIELD_TYPE_LARGEINT> : public BaseFieldtypeTraits<OL
static void set_to_min(void* buf) {
*reinterpret_cast<PackedInt128*>(buf) = (int128_t)(1) << 127;
}
static OLAPStatus convert_from(void* dest, const void* src, const TypeInfo* src_type, MemPool* mem_pool) {
if (src_type->type() == OLAP_FIELD_TYPE_VARCHAR) {
return convert_int_from_varchar<CppType>(dest, src);
}
return OLAPStatus::OLAP_ERR_INVALID_SCHEMA;
}
};

template<>
struct FieldTypeTraits<OLAP_FIELD_TYPE_FLOAT> : public BaseFieldtypeTraits<OLAP_FIELD_TYPE_FLOAT> {
struct FieldTypeTraits<OLAP_FIELD_TYPE_FLOAT> : public NumericFieldtypeTraits<OLAP_FIELD_TYPE_FLOAT, true> {
static OLAPStatus from_string(void* buf, const std::string& scan_key) {
CppType value = 0.0f;
if (scan_key.length() > 0) {
Expand All @@ -723,16 +695,10 @@ struct FieldTypeTraits<OLAP_FIELD_TYPE_FLOAT> : public BaseFieldtypeTraits<OLAP_
DCHECK(length >= 0) << "gcvt float failed, float value=" << *reinterpret_cast<const CppType *>(src);
return std::string(buf);
}
static OLAPStatus convert_from(void* dest, const void* src, const TypeInfo* src_type, MemPool* mem_pool) {
if (src_type->type() == OLAP_FIELD_TYPE_VARCHAR) {
return convert_float_from_varchar<CppType>(dest, src);
}
return OLAPStatus::OLAP_ERR_INVALID_SCHEMA;
}
};

template<>
struct FieldTypeTraits<OLAP_FIELD_TYPE_DOUBLE> : public BaseFieldtypeTraits<OLAP_FIELD_TYPE_DOUBLE> {
struct FieldTypeTraits<OLAP_FIELD_TYPE_DOUBLE> : public NumericFieldtypeTraits<OLAP_FIELD_TYPE_DOUBLE, true> {
static OLAPStatus from_string(void* buf, const std::string& scan_key) {
CppType value = 0.0;
if (scan_key.length() > 0) {
Expand Down Expand Up @@ -767,10 +733,8 @@ struct FieldTypeTraits<OLAP_FIELD_TYPE_DOUBLE> : public BaseFieldtypeTraits<OLAP
*reinterpret_cast<CppType*>(dest) = strtod(buf,&tg);
return OLAPStatus::OLAP_SUCCESS;
}
if (src_type->type() == OLAP_FIELD_TYPE_VARCHAR) {
return convert_float_from_varchar<CppType>(dest, src);
}
return OLAPStatus::OLAP_ERR_INVALID_SCHEMA;

return NumericFieldtypeTraits<OLAP_FIELD_TYPE_DOUBLE, true>::convert_from(dest, src, src_type, mem_pool);
}
};

Expand Down Expand Up @@ -844,7 +808,10 @@ struct FieldTypeTraits<OLAP_FIELD_TYPE_DATE> : public BaseFieldtypeTraits<OLAP_F
return OLAPStatus::OLAP_SUCCESS;
}

if (src_type->type() == FieldType::OLAP_FIELD_TYPE_VARCHAR) {
if (src_type->type() == OLAP_FIELD_TYPE_VARCHAR || src_type->type() == OLAP_FIELD_TYPE_CHAR) {
if (src_type->type() == OLAP_FIELD_TYPE_CHAR) {
prepare_char_before_convert(src);
}
using SrcType = typename CppTypeTraits<OLAP_FIELD_TYPE_VARCHAR>::CppType;
auto src_value = *reinterpret_cast<const SrcType*>(src);
DateTimeValue dt;
Expand Down Expand Up @@ -1032,6 +999,9 @@ struct FieldTypeTraits<OLAP_FIELD_TYPE_VARCHAR> : public FieldTypeTraits<OLAP_FI
memcpy(slice->data, result.c_str(), result.size());
slice->size = result.size();
return OLAP_SUCCESS;
} else if (src_type->type() == OLAP_FIELD_TYPE_CHAR) {
prepare_char_before_convert(src);
deep_copy(dest, src, mem_pool);
}
return OLAP_ERR_INVALID_SCHEMA;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,11 @@ under the License.
3) Only the type of the column can be modified. The other attributes of the column remain as they are (ie other attributes need to be explicitly written in the statement according to the original attribute, see example 8)
4) The partition column cannot be modified
5) The following types of conversions are currently supported (accuracy loss is guaranteed by the user)
TINYINT/SMALLINT/INT/BIGINT is converted to TINYINT/SMALLINT/INT/BIGINT/DOUBLE.
TINYINT/SMALLINT/INT/BIGINT/LARGEINT/FLOAT/DOUBLE convert to a wider range of numeric types
TINTINT/SMALLINT/INT/BIGINT/LARGEINT/FLOAT/DOUBLE/DECIMAL is converted to VARCHAR
VARCHAR supports modification of maximum length
Convert VARCHAR to TINYINT/SMALLINT/INT/BIGINT/LARGEINT/FLOAT/DOUBLE.
Convert VARCHAR to DATE (currently support six formats: "%Y-%m-%d", "%y-%m-%d", "%Y%m%d", "%y%m%d", "%Y/%m/%d, "%y/%m/%d")
Convert VARCHAR/CHAR to TINYINT/SMALLINT/INT/BIGINT/LARGEINT/FLOAT/DOUBLE.
Convert VARCHAR/CHAR to DATE (currently support six formats: "%Y-%m-%d", "%y-%m-%d", "%Y%m%d", "%y%m%d", "%Y/%m/%d, "%y/%m/%d")
Convert DATETIME to DATE(Only year-month-day information is retained, For example: `2019-12-09 21:47:05` <--> `2019-12-09`)
Convert DATE to DATETIME(Set hour, minute, second to zero, For example: `2019-12-09` <--> `2019-12-09 00:00:00`)
Convert FLOAT to DOUBLE
Expand Down
Loading

0 comments on commit 55ce88d

Please sign in to comment.