Skip to content

Commit

Permalink
[Bug] fix violating C/C++ aliasing rules cause a error hash value in …
Browse files Browse the repository at this point in the history
…decimal value (apache#6348)

In RuntimeFilter BloomFilter, decimal column will got a wrong hash value because violating  aliasing rules
decimal12_t decimal = { 12, 12 };
murmurhash3(decimal) in bloom filter: 2167721464
expect: 4203026776
  • Loading branch information
stdpain authored Aug 3, 2021
1 parent 2c208e9 commit 16bc5fa
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 26 deletions.
17 changes: 9 additions & 8 deletions be/src/exec/olap_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
#include <string>

#include "gen_cpp/PaloInternalService_types.h"
#include "olap/decimal12.h"
#include "olap/field.h"
#include "olap/uint24.h"
#include "olap_scan_node.h"
#include "olap_utils.h"
#include "runtime/descriptors.h"
Expand Down Expand Up @@ -451,9 +453,10 @@ void OlapScanner::_convert_row_to_tuple(Tuple* tuple) {
}
case TYPE_DECIMALV2: {
DecimalV2Value* slot = tuple->get_decimalv2_slot(slot_desc->tuple_offset());
auto packed_decimal = *reinterpret_cast<const decimal12_t*>(ptr);

int64_t int_value = *(int64_t*)(ptr);
int32_t frac_value = *(int32_t*)(ptr + sizeof(int64_t));
int64_t int_value = packed_decimal.integer;
int32_t frac_value = packed_decimal.fraction;
if (!slot->from_olap_decimal(int_value, frac_value)) {
tuple->set_null(slot_desc->null_indicator_offset());
}
Expand All @@ -469,12 +472,10 @@ void OlapScanner::_convert_row_to_tuple(Tuple* tuple) {
}
case TYPE_DATE: {
DateTimeValue* slot = tuple->get_datetime_slot(slot_desc->tuple_offset());
uint64_t value = 0;
value = *(unsigned char*)(ptr + 2);
value <<= 8;
value |= *(unsigned char*)(ptr + 1);
value <<= 8;
value |= *(unsigned char*)(ptr);

uint24_t date = *reinterpret_cast<const uint24_t*>(ptr);
uint64_t value = uint32_t(date);

if (!slot->from_olap_date(value)) {
tuple->set_null(slot_desc->null_indicator_offset());
}
Expand Down
32 changes: 21 additions & 11 deletions be/src/exprs/bloomfilter_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
#include "exprs/block_bloom_filter.hpp"
#include "exprs/predicate.h"
#include "olap/bloom_filter.hpp"
#include "olap/decimal12.h"
#include "olap/rowset/segment_v2/bloom_filter.h"
#include "olap/uint24.h"
#include "runtime/mem_tracker.h"
#include "runtime/raw_value.h"

Expand Down Expand Up @@ -223,30 +225,38 @@ struct DateTimeFindOp : public CommonFindOp<DateTimeValue, BloomFilterAdaptor> {
}
};

// avoid violating C/C++ aliasing rules.
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101684

template <class BloomFilterAdaptor>
struct DateFindOp : public CommonFindOp<DateTimeValue, BloomFilterAdaptor> {
bool find_olap_engine(const BloomFilterAdaptor& bloom_filter, const void* data) const {
uint64_t value = 0;
value = *(unsigned char*)((char*)data + 2);
value <<= 8;
value |= *(unsigned char*)((char*)data + 1);
value <<= 8;
value |= *(unsigned char*)((char*)data);
uint24_t date = *static_cast<const uint24_t*>(data);
uint64_t value = uint32_t(date);

DateTimeValue date_value;
date_value.from_olap_date(value);
date_value.to_datetime();
return bloom_filter.test_bytes((char*)&date_value, sizeof(DateTimeValue));

char data_bytes[sizeof(date_value)];
memcpy(&data_bytes, &date_value, sizeof(date_value));
return bloom_filter.test_bytes(data_bytes, sizeof(DateTimeValue));
}
};

template <class BloomFilterAdaptor>
struct DecimalV2FindOp : public CommonFindOp<DateTimeValue, BloomFilterAdaptor> {
struct DecimalV2FindOp : public CommonFindOp<DecimalV2Value, BloomFilterAdaptor> {
bool find_olap_engine(const BloomFilterAdaptor& bloom_filter, const void* data) const {
auto packed_decimal = *static_cast<const decimal12_t*>(data);
DecimalV2Value value;
int64_t int_value = *(int64_t*)(data);
int32_t frac_value = *(int32_t*)((char*)data + sizeof(int64_t));
int64_t int_value = packed_decimal.integer;
int32_t frac_value = packed_decimal.fraction;
value.from_olap_decimal(int_value, frac_value);
return bloom_filter.test_bytes((char*)&value, sizeof(DecimalV2Value));

constexpr int decimal_value_sz = sizeof(DecimalV2Value);
char data_bytes[decimal_value_sz];
memcpy(&data_bytes, &value, decimal_value_sz);
return bloom_filter.test_bytes(data_bytes, decimal_value_sz);
}
};

Expand Down
12 changes: 5 additions & 7 deletions be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,12 @@ struct BloomFilterTraits<Slice> {
};

struct Int128Comparator {
bool operator()(const PackedInt128& a, const PackedInt128& b) const {
return a.value < b.value;
}
bool operator()(const int128_t& a, const int128_t& b) const { return a < b; }
};

template <>
struct BloomFilterTraits<int128_t> {
using ValueDict = std::set<PackedInt128, Int128Comparator>;
using ValueDict = std::set<int128_t, Int128Comparator>;
};

// Builder for bloom filter. In doris, bloom filter index is used in
Expand Down Expand Up @@ -90,9 +88,9 @@ class BloomFilterIndexWriterImpl : public BloomFilterIndexWriter {
_typeinfo->deep_copy(&new_value, v, &_pool);
_values.insert(new_value);
} else if constexpr (_is_int128()) {
PackedInt128 new_value;
memcpy(&new_value.value, v, sizeof(PackedInt128));
_values.insert((*reinterpret_cast<CppType*>(&new_value)));
int128_t new_value;
memcpy(&new_value, v, sizeof(PackedInt128));
_values.insert(new_value);
} else {
_values.insert(*v);
}
Expand Down

0 comments on commit 16bc5fa

Please sign in to comment.