diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 509c8c168e6578..ce208d8abc6e4b 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -319,16 +319,14 @@ check_function_exists(sched_getcpu HAVE_SCHED_GETCPU) # -Wall: Enable all warnings. # -Wno-sign-compare: suppress warnings for comparison between signed and unsigned # integers -# -fno-strict-aliasing: disable optimizations that assume strict aliasing. This -# is unsafe to do if the code uses casts (which we obviously do). # -Wno-unknown-pragmas: suppress warnings for unknown (compiler specific) pragmas # -Wno-deprecated: gutil contains deprecated headers # -Wno-vla: we use C99-style variable-length arrays # -pthread: enable multithreaded malloc # -DBOOST_DATE_TIME_POSIX_TIME_STD_CONFIG: enable nanosecond precision for boost # -fno-omit-frame-pointers: Keep frame pointer for functions in register -set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall -Wno-sign-compare -Wno-unknown-pragmas -pthread") -set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -fno-strict-aliasing -fno-omit-frame-pointer") +set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall -Wno-sign-compare -Wno-unknown-pragmas -pthread -Werror=strict-aliasing") +set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -fno-omit-frame-pointer") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -std=gnu++17 -D__STDC_FORMAT_MACROS") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-deprecated -Wno-vla") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DBOOST_DATE_TIME_POSIX_TIME_STD_CONFIG") diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index 9ab9e0a520a52c..f9ecb2aa03e92d 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -1145,12 +1145,6 @@ Status OlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot, } switch (slot->type().type) { - case TYPE_TINYINT: { - int32_t v = *reinterpret_cast(value); - range->add_range(to_olap_filter_type(pred->op(), child_idx), - *reinterpret_cast(&v)); - break; - } case TYPE_DATE: { DateTimeValue date_value = *reinterpret_cast(value); @@ -1165,6 +1159,7 @@ Status OlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot, *reinterpret_cast(&date_value)); break; } + case TYPE_TINYINT: case TYPE_DECIMAL: case TYPE_DECIMALV2: case TYPE_CHAR: @@ -1174,15 +1169,10 @@ Status OlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot, case TYPE_SMALLINT: case TYPE_INT: case TYPE_BIGINT: - case TYPE_LARGEINT: { - range->add_range(to_olap_filter_type(pred->op(), child_idx), - *reinterpret_cast(value)); - break; - } + case TYPE_LARGEINT: case TYPE_BOOLEAN: { - bool v = *reinterpret_cast(value); range->add_range(to_olap_filter_type(pred->op(), child_idx), - *reinterpret_cast(&v)); + *reinterpret_cast(value)); break; } diff --git a/be/src/exec/text_converter.hpp b/be/src/exec/text_converter.hpp index 1f2b1dce8ca447..5afdad36880961 100644 --- a/be/src/exec/text_converter.hpp +++ b/be/src/exec/text_converter.hpp @@ -18,32 +18,28 @@ #ifndef DORIS_BE_SRC_QUERY_EXEC_TEXT_CONVERTER_HPP #define DORIS_BE_SRC_QUERY_EXEC_TEXT_CONVERTER_HPP -#include "text_converter.h" - #include +#include "olap/utils.h" +#include "runtime/datetime_value.h" #include "runtime/decimal_value.h" #include "runtime/decimalv2_value.h" #include "runtime/descriptors.h" #include "runtime/mem_pool.h" #include "runtime/runtime_state.h" #include "runtime/string_value.h" -#include "runtime/datetime_value.h" #include "runtime/tuple.h" +#include "text_converter.h" +#include "util/binary_cast.hpp" #include "util/string_parser.hpp" #include "util/types.h" -#include "olap/utils.h" namespace doris { // Note: this function has a codegen'd version. Changing this function requires // corresponding changes to CodegenWriteSlot. -inline bool TextConverter::write_slot(const SlotDescriptor* slot_desc, - Tuple* tuple, - const char* data, - int len, - bool copy_string, - bool need_escape, +inline bool TextConverter::write_slot(const SlotDescriptor* slot_desc, Tuple* tuple, + const char* data, int len, bool copy_string, bool need_escape, MemPool* pool) { //小批量导入只有\N被认为是NULL,没有批量导入的replace_value函数 if (true == slot_desc->is_nullable()) { @@ -83,28 +79,27 @@ inline bool TextConverter::write_slot(const SlotDescriptor* slot_desc, } case TYPE_BOOLEAN: - *reinterpret_cast(slot) = - StringParser::string_to_bool(data, len, &parse_result); + *reinterpret_cast(slot) = StringParser::string_to_bool(data, len, &parse_result); break; case TYPE_TINYINT: *reinterpret_cast(slot) = - StringParser::string_to_int(data, len, &parse_result); + StringParser::string_to_int(data, len, &parse_result); break; case TYPE_SMALLINT: *reinterpret_cast(slot) = - StringParser::string_to_int(data, len, &parse_result); + StringParser::string_to_int(data, len, &parse_result); break; case TYPE_INT: *reinterpret_cast(slot) = - StringParser::string_to_int(data, len, &parse_result); + StringParser::string_to_int(data, len, &parse_result); break; case TYPE_BIGINT: *reinterpret_cast(slot) = - StringParser::string_to_int(data, len, &parse_result); + StringParser::string_to_int(data, len, &parse_result); break; case TYPE_LARGEINT: { @@ -115,12 +110,12 @@ inline bool TextConverter::write_slot(const SlotDescriptor* slot_desc, case TYPE_FLOAT: *reinterpret_cast(slot) = - StringParser::string_to_float(data, len, &parse_result); + StringParser::string_to_float(data, len, &parse_result); break; case TYPE_DOUBLE: *reinterpret_cast(slot) = - StringParser::string_to_float(data, len, &parse_result); + StringParser::string_to_float(data, len, &parse_result); break; case TYPE_DATE: { @@ -161,9 +156,8 @@ inline bool TextConverter::write_slot(const SlotDescriptor* slot_desc, parse_result = StringParser::PARSE_FAILURE; } - *reinterpret_cast(slot) = - *reinterpret_cast(&decimal_slot); - + *reinterpret_cast(slot) = + binary_cast(decimal_slot); break; } @@ -181,6 +175,6 @@ inline bool TextConverter::write_slot(const SlotDescriptor* slot_desc, return true; } -} +} // namespace doris #endif diff --git a/be/src/exprs/aggregate_functions.cpp b/be/src/exprs/aggregate_functions.cpp index b27637f8c971b8..2f7132a10b0050 100644 --- a/be/src/exprs/aggregate_functions.cpp +++ b/be/src/exprs/aggregate_functions.cpp @@ -1650,7 +1650,7 @@ class MultiDistinctDecimalV2State { BigIntVal count_finalize() { return BigIntVal(_set.size()); } DecimalV2Val sum_finalize() { - DecimalV2Value sum; + DecimalV2Value sum(0); for (auto& value : _set) { sum += value; } diff --git a/be/src/exprs/decimalv2_operators.cpp b/be/src/exprs/decimalv2_operators.cpp index d641092dc26d57..2c99c200ee1267 100644 --- a/be/src/exprs/decimalv2_operators.cpp +++ b/be/src/exprs/decimalv2_operators.cpp @@ -57,7 +57,7 @@ DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val(FunctionContext* context, if (val.is_null) { return DecimalV2Val::null(); } - DecimalV2Value dv; + DecimalV2Value dv(0); dv.assign_from_float(val.val); DecimalV2Val result; dv.to_decimal_val(&result); @@ -69,7 +69,7 @@ DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val(FunctionContext* context, if (val.is_null) { return DecimalV2Val::null(); } - DecimalV2Value dv; + DecimalV2Value dv(0); dv.assign_from_double(val.val); DecimalV2Val result; dv.to_decimal_val(&result); @@ -94,7 +94,7 @@ DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val(FunctionContext* context, if (val.is_null) { return DecimalV2Val::null(); } - DecimalV2Value dv; + DecimalV2Value dv(0); if (dv.parse_from_str((const char*)val.ptr, val.len)) { return DecimalV2Val::null(); } diff --git a/be/src/exprs/expr_value.h b/be/src/exprs/expr_value.h index 849df3e2e904d9..0b528828f8ba1b 100644 --- a/be/src/exprs/expr_value.h +++ b/be/src/exprs/expr_value.h @@ -60,7 +60,7 @@ struct ExprValue { string_val(NULL, 0), datetime_val(), decimal_val(), - decimalv2_val() {} + decimalv2_val(0) {} ExprValue(bool v) : bool_val(v) {} ExprValue(int8_t v) : tinyint_val(v) {} diff --git a/be/src/exprs/string_functions.cpp b/be/src/exprs/string_functions.cpp index 63c874b68251b2..8f69797be59212 100644 --- a/be/src/exprs/string_functions.cpp +++ b/be/src/exprs/string_functions.cpp @@ -907,7 +907,7 @@ StringVal StringFunctions::money_format(FunctionContext* context, const DecimalV return StringVal::null(); } - DecimalV2Value rounded; + DecimalV2Value rounded(0); DecimalV2Value::from_decimal_val(v).round(&rounded, 2, HALF_UP); DecimalV2Value tmp(std::string_view("100")); DecimalV2Value result = rounded * tmp; diff --git a/be/src/runtime/decimalv2_value.h b/be/src/runtime/decimalv2_value.h index d6dccf25e41e6b..8a790557fef862 100644 --- a/be/src/runtime/decimalv2_value.h +++ b/be/src/runtime/decimalv2_value.h @@ -60,7 +60,7 @@ class DecimalV2Value { static const int128_t MAX_DECIMAL_VALUE = static_cast(MAX_INT64) * ONE_BILLION + MAX_FRAC_VALUE; - DecimalV2Value() : _value(0) {} + DecimalV2Value() = default; inline const int128_t& value() const { return _value; } inline int128_t& value() { return _value; } diff --git a/be/src/util/binary_cast.hpp b/be/src/util/binary_cast.hpp new file mode 100644 index 00000000000000..82395805705391 --- /dev/null +++ b/be/src/util/binary_cast.hpp @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once +#include +#include +#include + +#include "runtime/decimalv2_value.h" +#include "util/types.h" + +namespace doris { +union TypeConverter { + uint64_t u64; + int64_t i64; + uint32_t u32[2]; + int32_t i32[2]; + uint8_t u8[8]; + float flt[2]; + double dbl; +}; + +template +inline constexpr bool match_v = std::is_same_v&& std::is_same_v; + +union DecimalInt128Union { + DecimalV2Value decimal; + PackedInt128 packed128; +}; + +static_assert(sizeof(DecimalV2Value) == sizeof(PackedInt128)); + +// similar to reinterpret_cast but won't break strict-aliasing rules +template +To binary_cast(From from) { + constexpr bool from_u64_to_db = match_v; + constexpr bool from_i64_to_db = match_v; + constexpr bool from_db_to_i64 = match_v; + constexpr bool from_db_to_u64 = match_v; + constexpr bool from_decv2_to_packed128 = match_v; + + static_assert(from_u64_to_db || from_i64_to_db || from_db_to_i64 || from_db_to_u64 || + from_decv2_to_packed128); + + if constexpr (from_u64_to_db) { + TypeConverter conv; + conv.u64 = from; + return conv.dbl; + } else if constexpr (from_i64_to_db) { + TypeConverter conv; + conv.i64 = from; + return conv.dbl; + } else if constexpr (from_db_to_i64) { + TypeConverter conv; + conv.dbl = from; + return conv.i64; + } else if constexpr (from_db_to_u64) { + TypeConverter conv; + conv.dbl = from; + return conv.u64; + } else if constexpr (from_decv2_to_packed128) { + DecimalInt128Union conv; + conv.decimal = from; + return conv.packed128; + } else { + __builtin_unreachable(); + } +} + +} // namespace doris diff --git a/be/src/util/hash_util.hpp b/be/src/util/hash_util.hpp index 7d053a17094078..9667b51c25842e 100644 --- a/be/src/util/hash_util.hpp +++ b/be/src/util/hash_util.hpp @@ -97,12 +97,18 @@ class HashUtil { (bytes & 1) ? (h1 = _mm_crc32_u8(h1, *s)) : (h2 = _mm_crc32_u8(h2, *s)); ++s; } + union { + uint64_t u64; + uint32_t u32[2]; + } converter; + converter.u64 = hash; h1 = (h1 << 16) | (h1 >> 16); h2 = (h2 << 16) | (h2 >> 16); - ((uint32_t*)(&hash))[0] = h1; - ((uint32_t*)(&hash))[1] = h2; - return hash; + converter.u32[0] = h1; + converter.u32[1] = h2; + + return converter.u64; } #else static uint32_t crc_hash(const void* data, int32_t bytes, uint32_t hash) { diff --git a/be/src/util/pretty_printer.h b/be/src/util/pretty_printer.h index a7502d31b7972c..0986b75822505a 100644 --- a/be/src/util/pretty_printer.h +++ b/be/src/util/pretty_printer.h @@ -26,6 +26,7 @@ #include "gen_cpp/RuntimeProfile_types.h" #include "util/cpu_info.h" #include "util/template_util.h" +#include "util/binary_cast.hpp" /// Truncate a double to offset decimal places. #define DOUBLE_TRUNCATE(val, offset) floor(val* pow(10, offset)) / pow(10, offset) @@ -140,7 +141,7 @@ class PrettyPrinter { /// TODO: Remove DOUBLE_VALUE. IMPALA-1649 case TUnit::DOUBLE_VALUE: { - double output = *reinterpret_cast(&value); + double output = binary_cast(value); ss << std::setprecision(PRECISION) << output << " "; break; } diff --git a/be/src/util/runtime_profile.h b/be/src/util/runtime_profile.h index 5b1cb512b7b7a3..a0b01d0d88637f 100644 --- a/be/src/util/runtime_profile.h +++ b/be/src/util/runtime_profile.h @@ -30,8 +30,10 @@ #include "common/logging.h" #include "common/object_pool.h" #include "gen_cpp/RuntimeProfile_types.h" +#include "util/binary_cast.hpp" #include "util/stopwatch.hpp" + namespace doris { // Define macros for updating counters. The macros make it very easy to disable @@ -110,14 +112,14 @@ class RuntimeProfile { virtual void set(double value) { DCHECK_EQ(sizeof(value), sizeof(int64_t)); - _value.store(*reinterpret_cast(&value)); + _value.store(binary_cast(value)); } virtual int64_t value() const { return _value.load(); } virtual double double_value() const { int64_t v = _value.load(); - return *reinterpret_cast(&v); + return binary_cast(v); } TUnit::type type() const { return _type; } diff --git a/be/src/util/types.h b/be/src/util/types.h index 77410d0b644f53..7d45141badc6ae 100644 --- a/be/src/util/types.h +++ b/be/src/util/types.h @@ -22,7 +22,8 @@ namespace doris { // Because __int128 in memory is not aligned, but GCC7 will generate SSE instruction // for __int128 load/store. This will cause segment fault. struct PackedInt128 { - PackedInt128() : value(0) {} + // PackedInt128() : value(0) {} + PackedInt128() = default; PackedInt128(const __int128& value_) { value = value_; } #pragma GCC diagnostic push diff --git a/be/test/runtime/decimalv2_value_test.cpp b/be/test/runtime/decimalv2_value_test.cpp index 03a9ef5a387b8d..bcf6dde10fd14a 100644 --- a/be/test/runtime/decimalv2_value_test.cpp +++ b/be/test/runtime/decimalv2_value_test.cpp @@ -112,7 +112,7 @@ TEST_F(DecimalV2ValueTest, negative_zero) { } TEST_F(DecimalV2ValueTest, int_to_decimal) { - DecimalV2Value value1; + DecimalV2Value value1(0); ASSERT_EQ("0", value1.to_string(3)); DecimalV2Value value2(111111111, 0); // 9 digits @@ -308,7 +308,7 @@ TEST_F(DecimalV2ValueTest, round_ops) { // less than 5 DecimalV2Value value(std::string("1.249")); { - DecimalV2Value dst; + DecimalV2Value dst(0); value.round(&dst, -1, HALF_UP); ASSERT_EQ("0", dst.to_string()); @@ -322,7 +322,7 @@ TEST_F(DecimalV2ValueTest, round_ops) { ASSERT_EQ("0", dst.to_string()); } { - DecimalV2Value dst; + DecimalV2Value dst(0); value.round(&dst, 0, HALF_UP); ASSERT_EQ("1", dst.to_string()); @@ -337,7 +337,7 @@ TEST_F(DecimalV2ValueTest, round_ops) { } { - DecimalV2Value dst; + DecimalV2Value dst(0); value.round(&dst, 1, HALF_UP); ASSERT_EQ("1.2", dst.to_string()); @@ -352,7 +352,7 @@ TEST_F(DecimalV2ValueTest, round_ops) { } { - DecimalV2Value dst; + DecimalV2Value dst(0); value.round(&dst, 2, HALF_UP); ASSERT_EQ("1.25", dst.to_string()); @@ -367,7 +367,7 @@ TEST_F(DecimalV2ValueTest, round_ops) { } { - DecimalV2Value dst; + DecimalV2Value dst(0); value.round(&dst, 3, HALF_UP); ASSERT_EQ("1.249", dst.to_string()); @@ -382,7 +382,7 @@ TEST_F(DecimalV2ValueTest, round_ops) { } { - DecimalV2Value dst; + DecimalV2Value dst(0); value.round(&dst, 4, HALF_UP); ASSERT_EQ("1.249", dst.to_string()); @@ -402,7 +402,7 @@ TEST_F(DecimalV2ValueTest, round_minus) { // less than 5 DecimalV2Value value(std::string("-1.249")); { - DecimalV2Value dst; + DecimalV2Value dst(0); value.round(&dst, -1, HALF_UP); ASSERT_EQ("0", dst.to_string()); @@ -416,7 +416,7 @@ TEST_F(DecimalV2ValueTest, round_minus) { ASSERT_EQ("0", dst.to_string()); } { - DecimalV2Value dst; + DecimalV2Value dst(0); value.round(&dst, 0, HALF_UP); ASSERT_EQ("-1", dst.to_string()); @@ -431,7 +431,7 @@ TEST_F(DecimalV2ValueTest, round_minus) { } { - DecimalV2Value dst; + DecimalV2Value dst(0); value.round(&dst, 1, HALF_UP); ASSERT_EQ("-1.2", dst.to_string()); @@ -446,7 +446,7 @@ TEST_F(DecimalV2ValueTest, round_minus) { } { - DecimalV2Value dst; + DecimalV2Value dst(0); value.round(&dst, 2, HALF_UP); ASSERT_EQ("-1.25", dst.to_string()); @@ -461,7 +461,7 @@ TEST_F(DecimalV2ValueTest, round_minus) { } { - DecimalV2Value dst; + DecimalV2Value dst(0); value.round(&dst, 3, HALF_UP); ASSERT_EQ("-1.249", dst.to_string()); @@ -476,7 +476,7 @@ TEST_F(DecimalV2ValueTest, round_minus) { } { - DecimalV2Value dst; + DecimalV2Value dst(0); value.round(&dst, 4, HALF_UP); ASSERT_EQ("-1.249", dst.to_string());