[optimization] open gcc strict-aliasing optimization (apache#6034)

* open gcc strict-aliasing optimization * use -Werror=strick-alias
eldenmoon · Jun 18, 2021 · 1999a0c · 1999a0c
1 parent 48bd680
commit 1999a0c
Show file tree

Hide file tree

Showing 14 changed files with 142 additions and 66 deletions.
diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
@@ -319,16 +319,14 @@ check_function_exists(sched_getcpu HAVE_SCHED_GETCPU)
 #  -Wall: Enable all warnings.
 #  -Wno-sign-compare: suppress warnings for comparison between signed and unsigned
 #    integers
-#   -fno-strict-aliasing: disable optimizations that assume strict aliasing. This
-#       is unsafe to do if the code uses casts (which we obviously do).
 #  -Wno-unknown-pragmas: suppress warnings for unknown (compiler specific) pragmas
 #  -Wno-deprecated: gutil contains deprecated headers
 #  -Wno-vla: we use C99-style variable-length arrays
 #  -pthread: enable multithreaded malloc
 #  -DBOOST_DATE_TIME_POSIX_TIME_STD_CONFIG: enable nanosecond precision for boost
 #  -fno-omit-frame-pointers: Keep frame pointer for functions in register
-set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall -Wno-sign-compare -Wno-unknown-pragmas -pthread")
-set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -fno-strict-aliasing -fno-omit-frame-pointer")
+set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall -Wno-sign-compare -Wno-unknown-pragmas -pthread -Werror=strict-aliasing")
+set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -fno-omit-frame-pointer")
 set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -std=gnu++17 -D__STDC_FORMAT_MACROS")
 set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-deprecated -Wno-vla")
 set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DBOOST_DATE_TIME_POSIX_TIME_STD_CONFIG")

diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp
@@ -1145,12 +1145,6 @@ Status OlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot,
                 }
 
                 switch (slot->type().type) {
-                case TYPE_TINYINT: {
-                    int32_t v = *reinterpret_cast<int8_t*>(value);
-                    range->add_range(to_olap_filter_type(pred->op(), child_idx),
-                                     *reinterpret_cast<T*>(&v));
-                    break;
-                }
 
                 case TYPE_DATE: {
                     DateTimeValue date_value = *reinterpret_cast<DateTimeValue*>(value);
@@ -1165,6 +1159,7 @@ Status OlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot,
                                      *reinterpret_cast<T*>(&date_value));
                     break;
                 }
+                case TYPE_TINYINT:
                 case TYPE_DECIMAL:
                 case TYPE_DECIMALV2:
                 case TYPE_CHAR:
@@ -1174,15 +1169,10 @@ Status OlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot,
                 case TYPE_SMALLINT:
                 case TYPE_INT:
                 case TYPE_BIGINT:
-                case TYPE_LARGEINT: {
-                    range->add_range(to_olap_filter_type(pred->op(), child_idx),
-                                     *reinterpret_cast<T*>(value));
-                    break;
-                }
+                case TYPE_LARGEINT:
                 case TYPE_BOOLEAN: {
-                    bool v = *reinterpret_cast<bool*>(value);
                     range->add_range(to_olap_filter_type(pred->op(), child_idx),
-                                     *reinterpret_cast<T*>(&v));
+                                     *reinterpret_cast<T*>(value));
                     break;
                 }
 

diff --git a/be/src/exec/text_converter.hpp b/be/src/exec/text_converter.hpp
@@ -18,32 +18,28 @@
 #ifndef DORIS_BE_SRC_QUERY_EXEC_TEXT_CONVERTER_HPP
 #define DORIS_BE_SRC_QUERY_EXEC_TEXT_CONVERTER_HPP
 
-#include "text_converter.h"
-
 #include <boost/algorithm/string.hpp>
 
+#include "olap/utils.h"
+#include "runtime/datetime_value.h"
 #include "runtime/decimal_value.h"
 #include "runtime/decimalv2_value.h"
 #include "runtime/descriptors.h"
 #include "runtime/mem_pool.h"
 #include "runtime/runtime_state.h"
 #include "runtime/string_value.h"
-#include "runtime/datetime_value.h"
 #include "runtime/tuple.h"
+#include "text_converter.h"
+#include "util/binary_cast.hpp"
 #include "util/string_parser.hpp"
 #include "util/types.h"
-#include "olap/utils.h"
 
 namespace doris {
 
 // Note: this function has a codegen'd version.  Changing this function requires
 // corresponding changes to CodegenWriteSlot.
-inline bool TextConverter::write_slot(const SlotDescriptor* slot_desc,
-                                      Tuple* tuple,
-                                      const char* data,
-                                      int len,
-                                      bool copy_string,
-                                      bool need_escape,
+inline bool TextConverter::write_slot(const SlotDescriptor* slot_desc, Tuple* tuple,
+                                      const char* data, int len, bool copy_string, bool need_escape,
                                       MemPool* pool) {
     //小批量导入只有\N被认为是NULL,没有批量导入的replace_value函数
     if (true == slot_desc->is_nullable()) {
@@ -83,28 +79,27 @@ inline bool TextConverter::write_slot(const SlotDescriptor* slot_desc,
     }
 
     case TYPE_BOOLEAN:
-        *reinterpret_cast<bool*>(slot) =
-            StringParser::string_to_bool(data, len, &parse_result);
+        *reinterpret_cast<bool*>(slot) = StringParser::string_to_bool(data, len, &parse_result);
         break;
 
     case TYPE_TINYINT:
         *reinterpret_cast<int8_t*>(slot) =
-            StringParser::string_to_int<int8_t>(data, len, &parse_result);
+                StringParser::string_to_int<int8_t>(data, len, &parse_result);
         break;
 
     case TYPE_SMALLINT:
         *reinterpret_cast<int16_t*>(slot) =
-            StringParser::string_to_int<int16_t>(data, len, &parse_result);
+                StringParser::string_to_int<int16_t>(data, len, &parse_result);
         break;
 
     case TYPE_INT:
         *reinterpret_cast<int32_t*>(slot) =
-            StringParser::string_to_int<int32_t>(data, len, &parse_result);
+                StringParser::string_to_int<int32_t>(data, len, &parse_result);
         break;
 
     case TYPE_BIGINT:
         *reinterpret_cast<int64_t*>(slot) =
-            StringParser::string_to_int<int64_t>(data, len, &parse_result);
+                StringParser::string_to_int<int64_t>(data, len, &parse_result);
         break;
 
     case TYPE_LARGEINT: {
@@ -115,12 +110,12 @@ inline bool TextConverter::write_slot(const SlotDescriptor* slot_desc,
 
     case TYPE_FLOAT:
         *reinterpret_cast<float*>(slot) =
-            StringParser::string_to_float<float>(data, len, &parse_result);
+                StringParser::string_to_float<float>(data, len, &parse_result);
         break;
 
     case TYPE_DOUBLE:
         *reinterpret_cast<double*>(slot) =
-            StringParser::string_to_float<double>(data, len, &parse_result);
+                StringParser::string_to_float<double>(data, len, &parse_result);
         break;
 
     case TYPE_DATE: {
@@ -161,9 +156,8 @@ inline bool TextConverter::write_slot(const SlotDescriptor* slot_desc,
             parse_result = StringParser::PARSE_FAILURE;
         }
 
-        *reinterpret_cast<PackedInt128*>(slot) = 
-            *reinterpret_cast<const PackedInt128*>(&decimal_slot);
-
+        *reinterpret_cast<PackedInt128*>(slot) =
+                binary_cast<DecimalV2Value, PackedInt128>(decimal_slot);
         break;
     }
 
@@ -181,6 +175,6 @@ inline bool TextConverter::write_slot(const SlotDescriptor* slot_desc,
     return true;
 }
 
-}
+} // namespace doris
 
 #endif
diff --git a/be/src/exprs/aggregate_functions.cpp b/be/src/exprs/aggregate_functions.cpp
@@ -1650,7 +1650,7 @@ class MultiDistinctDecimalV2State {
     BigIntVal count_finalize() { return BigIntVal(_set.size()); }
 
     DecimalV2Val sum_finalize() {
-        DecimalV2Value sum;
+        DecimalV2Value sum(0);
         for (auto& value : _set) {
             sum += value;
         }

diff --git a/be/src/exprs/decimalv2_operators.cpp b/be/src/exprs/decimalv2_operators.cpp
@@ -57,7 +57,7 @@ DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val(FunctionContext* context,
     if (val.is_null) {
         return DecimalV2Val::null();
     }
-    DecimalV2Value dv;
+    DecimalV2Value dv(0);
     dv.assign_from_float(val.val);
     DecimalV2Val result;
     dv.to_decimal_val(&result);
@@ -69,7 +69,7 @@ DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val(FunctionContext* context,
     if (val.is_null) {
         return DecimalV2Val::null();
     }
-    DecimalV2Value dv;
+    DecimalV2Value dv(0);
     dv.assign_from_double(val.val);
     DecimalV2Val result;
     dv.to_decimal_val(&result);
@@ -94,7 +94,7 @@ DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val(FunctionContext* context,
     if (val.is_null) {
         return DecimalV2Val::null();
     }
-    DecimalV2Value dv;
+    DecimalV2Value dv(0);
     if (dv.parse_from_str((const char*)val.ptr, val.len)) {
         return DecimalV2Val::null();
     }

diff --git a/be/src/exprs/expr_value.h b/be/src/exprs/expr_value.h
@@ -60,7 +60,7 @@ struct ExprValue {
               string_val(NULL, 0),
               datetime_val(),
               decimal_val(),
-              decimalv2_val() {}
+              decimalv2_val(0) {}
 
     ExprValue(bool v) : bool_val(v) {}
     ExprValue(int8_t v) : tinyint_val(v) {}

diff --git a/be/src/exprs/string_functions.cpp b/be/src/exprs/string_functions.cpp
@@ -907,7 +907,7 @@ StringVal StringFunctions::money_format(FunctionContext* context, const DecimalV
         return StringVal::null();
     }
 
-    DecimalV2Value rounded;
+    DecimalV2Value rounded(0);
     DecimalV2Value::from_decimal_val(v).round(&rounded, 2, HALF_UP);
     DecimalV2Value tmp(std::string_view("100"));
     DecimalV2Value result = rounded * tmp;

diff --git a/be/src/runtime/decimalv2_value.h b/be/src/runtime/decimalv2_value.h
@@ -60,7 +60,7 @@ class DecimalV2Value {
     static const int128_t MAX_DECIMAL_VALUE =
             static_cast<int128_t>(MAX_INT64) * ONE_BILLION + MAX_FRAC_VALUE;
 
-    DecimalV2Value() : _value(0) {}
+    DecimalV2Value() = default;
     inline const int128_t& value() const { return _value; }
     inline int128_t& value() { return _value; }
 

diff --git a/be/src/util/binary_cast.hpp b/be/src/util/binary_cast.hpp
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+
+#include "runtime/decimalv2_value.h"
+#include "util/types.h"
+
+namespace doris {
+union TypeConverter {
+    uint64_t u64;
+    int64_t i64;
+    uint32_t u32[2];
+    int32_t i32[2];
+    uint8_t u8[8];
+    float flt[2];
+    double dbl;
+};
+
+template <typename C0, typename C1, typename T0, typename T1>
+inline constexpr bool match_v = std::is_same_v<C0, C1>&& std::is_same_v<T0, T1>;
+
+union DecimalInt128Union {
+    DecimalV2Value decimal;
+    PackedInt128 packed128;
+};
+
+static_assert(sizeof(DecimalV2Value) == sizeof(PackedInt128));
+
+// similar to reinterpret_cast but won't break strict-aliasing rules
+template <typename From, typename To>
+To binary_cast(From from) {
+    constexpr bool from_u64_to_db = match_v<From, uint64_t, To, double>;
+    constexpr bool from_i64_to_db = match_v<From, int64_t, To, double>;
+    constexpr bool from_db_to_i64 = match_v<From, double, To, int64_t>;
+    constexpr bool from_db_to_u64 = match_v<From, double, To, uint64_t>;
+    constexpr bool from_decv2_to_packed128 = match_v<From, DecimalV2Value, To, PackedInt128>;
+
+    static_assert(from_u64_to_db || from_i64_to_db || from_db_to_i64 || from_db_to_u64 ||
+                  from_decv2_to_packed128);
+
+    if constexpr (from_u64_to_db) {
+        TypeConverter conv;
+        conv.u64 = from;
+        return conv.dbl;
+    } else if constexpr (from_i64_to_db) {
+        TypeConverter conv;
+        conv.i64 = from;
+        return conv.dbl;
+    } else if constexpr (from_db_to_i64) {
+        TypeConverter conv;
+        conv.dbl = from;
+        return conv.i64;
+    } else if constexpr (from_db_to_u64) {
+        TypeConverter conv;
+        conv.dbl = from;
+        return conv.u64;
+    } else if constexpr (from_decv2_to_packed128) {
+        DecimalInt128Union conv;
+        conv.decimal = from;
+        return conv.packed128;
+    } else {
+        __builtin_unreachable();
+    }
+}
+
+} // namespace doris
diff --git a/be/src/util/hash_util.hpp b/be/src/util/hash_util.hpp
@@ -97,12 +97,18 @@ class HashUtil {
             (bytes & 1) ? (h1 = _mm_crc32_u8(h1, *s)) : (h2 = _mm_crc32_u8(h2, *s));
             ++s;
         }
+        union {
+            uint64_t u64;
+            uint32_t u32[2];
+        } converter;
+        converter.u64 = hash;
 
         h1 = (h1 << 16) | (h1 >> 16);
         h2 = (h2 << 16) | (h2 >> 16);
-        ((uint32_t*)(&hash))[0] = h1;
-        ((uint32_t*)(&hash))[1] = h2;
-        return hash;
+        converter.u32[0] = h1;
+        converter.u32[1] = h2;
+
+        return converter.u64;
     }
 #else
     static uint32_t crc_hash(const void* data, int32_t bytes, uint32_t hash) {

diff --git a/be/src/util/pretty_printer.h b/be/src/util/pretty_printer.h
@@ -26,6 +26,7 @@
 #include "gen_cpp/RuntimeProfile_types.h"
 #include "util/cpu_info.h"
 #include "util/template_util.h"
+#include "util/binary_cast.hpp"
 
 /// Truncate a double to offset decimal places.
 #define DOUBLE_TRUNCATE(val, offset) floor(val* pow(10, offset)) / pow(10, offset)
@@ -140,7 +141,7 @@ class PrettyPrinter {
 
         /// TODO: Remove DOUBLE_VALUE. IMPALA-1649
         case TUnit::DOUBLE_VALUE: {
-            double output = *reinterpret_cast<double*>(&value);
+            double output = binary_cast<T, double>(value);
             ss << std::setprecision(PRECISION) << output << " ";
             break;
         }

diff --git a/be/src/util/runtime_profile.h b/be/src/util/runtime_profile.h
@@ -30,8 +30,10 @@
 #include "common/logging.h"
 #include "common/object_pool.h"
 #include "gen_cpp/RuntimeProfile_types.h"
+#include "util/binary_cast.hpp"
 #include "util/stopwatch.hpp"
 
+
 namespace doris {
 
 // Define macros for updating counters.  The macros make it very easy to disable
@@ -110,14 +112,14 @@ class RuntimeProfile {
 
         virtual void set(double value) {
             DCHECK_EQ(sizeof(value), sizeof(int64_t));
-            _value.store(*reinterpret_cast<int64_t*>(&value));
+            _value.store(binary_cast<double,int64_t>(value));
         }
 
         virtual int64_t value() const { return _value.load(); }
 
         virtual double double_value() const {
             int64_t v = _value.load();
-            return *reinterpret_cast<const double*>(&v);
+            return binary_cast<int64_t, double>(v);
         }
 
         TUnit::type type() const { return _type; }

diff --git a/be/src/util/types.h b/be/src/util/types.h
@@ -22,7 +22,8 @@ namespace doris {
 // Because __int128 in memory is not aligned, but GCC7 will generate SSE instruction
 // for __int128 load/store. This will cause segment fault.
 struct PackedInt128 {
-    PackedInt128() : value(0) {}
+    // PackedInt128() : value(0) {}
+    PackedInt128() = default;
 
     PackedInt128(const __int128& value_) { value = value_; }
 #pragma GCC diagnostic push