Skip to content

Commit

Permalink
[Fix] avoid core dump cause by malformed bitmap type data (apache#10458)
Browse files Browse the repository at this point in the history
  • Loading branch information
yangzhg authored Jun 30, 2022
1 parent c62c2e3 commit d259770
Show file tree
Hide file tree
Showing 13 changed files with 66 additions and 26 deletions.
8 changes: 4 additions & 4 deletions be/src/olap/rowset/segment_v2/binary_dict_page.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ BinaryDictPageBuilder::BinaryDictPageBuilder(const PageBuilderOptions& options)
_data_page_builder.reset(new BitshufflePageBuilder<OLAP_FIELD_TYPE_INT>(options));
PageBuilderOptions dict_builder_options;
dict_builder_options.data_page_size = _options.dict_page_size;
_dict_builder.reset(new BinaryPlainPageBuilder(dict_builder_options));
_dict_builder.reset(new BinaryPlainPageBuilder<OLAP_FIELD_TYPE_VARCHAR>(dict_builder_options));
reset();
}

Expand Down Expand Up @@ -135,7 +135,7 @@ void BinaryDictPageBuilder::reset() {
_buffer.resize(BINARY_DICT_PAGE_HEADER_SIZE);

if (_encoding_type == DICT_ENCODING && _dict_builder->is_page_full()) {
_data_page_builder.reset(new BinaryPlainPageBuilder(_options));
_data_page_builder.reset(new BinaryPlainPageBuilder<OLAP_FIELD_TYPE_VARCHAR>(_options));
_encoding_type = PLAIN_ENCODING;
} else {
_data_page_builder->reset();
Expand Down Expand Up @@ -206,7 +206,7 @@ Status BinaryDictPageDecoder::init() {
_bit_shuffle_ptr = new BitShufflePageDecoder<OLAP_FIELD_TYPE_INT>(_data, _options));
} else if (_encoding_type == PLAIN_ENCODING) {
DCHECK_EQ(_encoding_type, PLAIN_ENCODING);
_data_page_decoder.reset(new BinaryPlainPageDecoder(_data, _options));
_data_page_decoder.reset(new BinaryPlainPageDecoder<OLAP_FIELD_TYPE_INT>(_data, _options));
} else {
LOG(WARNING) << "invalid encoding type:" << _encoding_type;
return Status::Corruption(strings::Substitute("invalid encoding type:$0", _encoding_type));
Expand All @@ -228,7 +228,7 @@ bool BinaryDictPageDecoder::is_dict_encoding() const {
}

void BinaryDictPageDecoder::set_dict_decoder(PageDecoder* dict_decoder, StringRef* dict_word_info) {
_dict_decoder = (BinaryPlainPageDecoder*)dict_decoder;
_dict_decoder = (BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>*)dict_decoder;
_dict_word_info = dict_word_info;
};

Expand Down
4 changes: 2 additions & 2 deletions be/src/olap/rowset/segment_v2/binary_dict_page.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class BinaryDictPageBuilder : public PageBuilder {

std::unique_ptr<PageBuilder> _data_page_builder;

std::unique_ptr<BinaryPlainPageBuilder> _dict_builder;
std::unique_ptr<BinaryPlainPageBuilder<OLAP_FIELD_TYPE_VARCHAR>> _dict_builder;

EncodingTypePB _encoding_type;
struct HashOfSlice {
Expand Down Expand Up @@ -122,7 +122,7 @@ class BinaryDictPageDecoder : public PageDecoder {
Slice _data;
PageDecoderOptions _options;
std::unique_ptr<PageDecoder> _data_page_decoder;
BinaryPlainPageDecoder* _dict_decoder = nullptr;
BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>* _dict_decoder = nullptr;
BitShufflePageDecoder<OLAP_FIELD_TYPE_INT>* _bit_shuffle_ptr = nullptr;
bool _parsed;
EncodingTypePB _encoding_type;
Expand Down
20 changes: 19 additions & 1 deletion be/src/olap/rowset/segment_v2/binary_plain_page.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
namespace doris {
namespace segment_v2 {

template <FieldType Type>
class BinaryPlainPageBuilder : public PageBuilder {
public:
BinaryPlainPageBuilder(const PageBuilderOptions& options)
Expand All @@ -64,6 +65,11 @@ class BinaryPlainPageBuilder : public PageBuilder {
// If the page is full, should stop adding more items.
while (!is_page_full() && i < *count) {
auto src = reinterpret_cast<const Slice*>(vals);
if constexpr (Type == OLAP_FIELD_TYPE_OBJECT) {
if (_options.need_check_bitmap) {
RETURN_IF_ERROR(BitmapTypeCode::validate(*(src->data)));
}
}
size_t offset = _buffer.size();
_offsets.push_back(offset);
_buffer.append(src->data, src->size);
Expand Down Expand Up @@ -154,6 +160,7 @@ class BinaryPlainPageBuilder : public PageBuilder {
faststring _last_value;
};

template <FieldType Type>
class BinaryPlainPageDecoder : public PageDecoder {
public:
BinaryPlainPageDecoder(Slice data) : BinaryPlainPageDecoder(data, PageDecoderOptions()) {}
Expand Down Expand Up @@ -204,6 +211,11 @@ class BinaryPlainPageDecoder : public PageDecoder {
size_t mem_len[max_fetch];
for (size_t i = 0; i < max_fetch; i++, out++, _cur_idx++) {
*out = string_at_index(_cur_idx);
if constexpr (Type == OLAP_FIELD_TYPE_OBJECT) {
if (_options.need_check_bitmap) {
RETURN_IF_ERROR(BitmapTypeCode::validate(*(out->data)));
}
}
mem_len[i] = out->size;
}

Expand Down Expand Up @@ -246,6 +258,11 @@ class BinaryPlainPageDecoder : public PageDecoder {
uint32_t len = offset(_cur_idx + 1) - start_offset;
len_array[i] = len;
start_offset_array[i] = start_offset;
if constexpr (Type == OLAP_FIELD_TYPE_OBJECT) {
if (_options.need_check_bitmap) {
RETURN_IF_ERROR(BitmapTypeCode::validate(*(_data.data + start_offset)));
}
}
}
dst->insert_many_binary_data(_data.mutable_data(), len_array, start_offset_array,
max_fetch);
Expand All @@ -271,8 +288,9 @@ class BinaryPlainPageDecoder : public PageDecoder {
}

void get_dict_word_info(StringRef* dict_word_info) {
if (_num_elems <= 0) [[unlikely]]
if (UNLIKELY(_num_elems <= 0)) {
return;
}

char* data_begin = (char*)&_data[0];
char* offset_ptr = (char*)&_data[_offsets_pos];
Expand Down
6 changes: 4 additions & 2 deletions be/src/olap/rowset/segment_v2/column_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -747,10 +747,12 @@ Status FileColumnIterator::_read_data_page(const OrdinalPageIndexIterator& iter)
_compress_codec.get()));
// ignore dict_footer.dict_page_footer().encoding() due to only
// PLAIN_ENCODING is supported for dict page right now
_dict_decoder = std::make_unique<BinaryPlainPageDecoder>(dict_data);
_dict_decoder = std::make_unique<BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>>(
dict_data);
RETURN_IF_ERROR(_dict_decoder->init());

auto* pd_decoder = (BinaryPlainPageDecoder*)_dict_decoder.get();
auto* pd_decoder =
(BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>*)_dict_decoder.get();
_dict_word_info.reset(new StringRef[pd_decoder->_num_elems]);
pd_decoder->get_dict_word_info(_dict_word_info.get());
}
Expand Down
4 changes: 2 additions & 2 deletions be/src/olap/rowset/segment_v2/encoding_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,12 @@ struct TypeEncodingTraits<type, PLAIN_ENCODING, CppType> {
template <FieldType type>
struct TypeEncodingTraits<type, PLAIN_ENCODING, Slice> {
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
*builder = new BinaryPlainPageBuilder(opts);
*builder = new BinaryPlainPageBuilder<type>(opts);
return Status::OK();
}
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
PageDecoder** decoder) {
*decoder = new BinaryPlainPageDecoder(data, opts);
*decoder = new BinaryPlainPageDecoder<type>(data, opts);
return Status::OK();
}
};
Expand Down
4 changes: 3 additions & 1 deletion be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,10 @@ Status IndexedColumnIterator::_read_data_page(const PagePointer& pp) {
_compress_codec.get()));
// parse data page
// note that page_index is not used in IndexedColumnIterator, so we pass 0
PageDecoderOptions opts;
opts.need_check_bitmap = false;
return ParsedPage::create(std::move(handle), body, footer.data_page_footer(),
_reader->encoding_info(), pp, 0, &_data_page);
_reader->encoding_info(), pp, 0, &_data_page, opts);
}

Status IndexedColumnIterator::seek_to_ordinal(ordinal_t idx) {
Expand Down
6 changes: 4 additions & 2 deletions be/src/olap/rowset/segment_v2/indexed_column_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,10 @@ Status IndexedColumnWriter::init() {
// because the default encoding of a data type can be changed in the future
DCHECK_NE(_options.encoding, DEFAULT_ENCODING);

PageBuilder* data_page_builder;
RETURN_IF_ERROR(encoding_info->create_page_builder(PageBuilderOptions(), &data_page_builder));
PageBuilder* data_page_builder = nullptr;
PageBuilderOptions builder_option;
builder_option.need_check_bitmap = false;
RETURN_IF_ERROR(encoding_info->create_page_builder(builder_option, &data_page_builder));
_data_page_builder.reset(data_page_builder);

if (_options.write_ordinal_index) {
Expand Down
8 changes: 5 additions & 3 deletions be/src/olap/rowset/segment_v2/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,19 @@
namespace doris {
namespace segment_v2 {

class BinaryPlainPageDecoder;

static constexpr size_t DEFAULT_PAGE_SIZE = 1024 * 1024; // default size: 1M

struct PageBuilderOptions {
size_t data_page_size = DEFAULT_PAGE_SIZE;

size_t dict_page_size = DEFAULT_PAGE_SIZE;

bool need_check_bitmap = true;
};

struct PageDecoderOptions {};
struct PageDecoderOptions {
bool need_check_bitmap = true;
};

} // namespace segment_v2
} // namespace doris
4 changes: 2 additions & 2 deletions be/src/olap/rowset/segment_v2/parsed_page.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ namespace segment_v2 {
struct ParsedPage {
static Status create(PageHandle handle, const Slice& body, const DataPageFooterPB& footer,
const EncodingInfo* encoding, const PagePointer& page_pointer,
uint32_t page_index, ParsedPage* result) {
uint32_t page_index, ParsedPage* result,
PageDecoderOptions opts = PageDecoderOptions()) {
result->~ParsedPage();
ParsedPage* page = new (result)(ParsedPage);
page->page_handle = std::move(handle);
Expand All @@ -53,7 +54,6 @@ struct ParsedPage {
}

Slice data_slice(body.data, body.size - null_size);
PageDecoderOptions opts;
RETURN_IF_ERROR(encoding->create_page_decoder(data_slice, opts, &page->data_decoder));
RETURN_IF_ERROR(page->data_decoder->init());

Expand Down
1 change: 0 additions & 1 deletion be/src/tools/meta_tool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ using doris::RandomAccessFile;
using strings::Substitute;
using doris::segment_v2::SegmentFooterPB;
using doris::segment_v2::ColumnReader;
using doris::segment_v2::BinaryPlainPageDecoder;
using doris::segment_v2::PageHandle;
using doris::segment_v2::PagePointer;
using doris::segment_v2::ColumnReaderOptions;
Expand Down
14 changes: 13 additions & 1 deletion be/src/util/bitmap_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,16 @@ struct BitmapTypeCode {
// added in 0.12
BITMAP64 = 4
};
Status static inline validate(int bitmap_type) {
if (UNLIKELY(bitmap_type < type::EMPTY || bitmap_type > type::BITMAP64)) {
std::string err_msg =
fmt::format("BitmapTypeCode invalid, should between: {} and {} actrual is {}",
BitmapTypeCode::EMPTY, BitmapTypeCode::BITMAP64, bitmap_type);
LOG(ERROR) << err_msg;
return Status::IOError(err_msg);
}
return Status::OK();
}
};

namespace detail {
Expand Down Expand Up @@ -1536,7 +1546,6 @@ class BitmapValue {
// Deserialize a bitmap value from `src`.
// Return false if `src` begins with unknown type code, true otherwise.
bool deserialize(const char* src) {
DCHECK(*src >= BitmapTypeCode::EMPTY && *src <= BitmapTypeCode::BITMAP64);
switch (*src) {
case BitmapTypeCode::EMPTY:
_type = EMPTY;
Expand All @@ -1555,6 +1564,9 @@ class BitmapValue {
_bitmap = detail::Roaring64Map::read(src);
break;
default:
LOG(ERROR) << "BitmapTypeCode invalid, should between: " << BitmapTypeCode::EMPTY
<< " and " << BitmapTypeCode::BITMAP64 << " actrual is "
<< static_cast<int>(*src);
return false;
}
return true;
Expand Down
10 changes: 6 additions & 4 deletions be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,9 @@ class BinaryDictPageTest : public testing::Test {
Status status = page_builder.get_dictionary_page(&dict_slice);
EXPECT_TRUE(status.ok());
PageDecoderOptions dict_decoder_options;
std::unique_ptr<BinaryPlainPageDecoder> dict_page_decoder(
new BinaryPlainPageDecoder(dict_slice.slice(), dict_decoder_options));
std::unique_ptr<BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>> dict_page_decoder(
new BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>(dict_slice.slice(),
dict_decoder_options));
status = dict_page_decoder->init();
EXPECT_TRUE(status.ok());
// because every slice is unique
Expand Down Expand Up @@ -175,8 +176,9 @@ class BinaryDictPageTest : public testing::Test {
int slice_index = random() % results.size();
//int slice_index = 1;
PageDecoderOptions dict_decoder_options;
std::unique_ptr<BinaryPlainPageDecoder> dict_page_decoder(
new BinaryPlainPageDecoder(dict_slice.slice(), dict_decoder_options));
std::unique_ptr<BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>> dict_page_decoder(
new BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>(dict_slice.slice(),
dict_decoder_options));
status = dict_page_decoder->init();
EXPECT_TRUE(status.ok());

Expand Down
3 changes: 2 additions & 1 deletion be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ class BinaryPlainPageTest : public testing::Test {
};

TEST_F(BinaryPlainPageTest, TestBinaryPlainPageBuilderSeekByValueSmallPage) {
TestBinarySeekByValueSmallPage<BinaryPlainPageBuilder, BinaryPlainPageDecoder>();
TestBinarySeekByValueSmallPage<BinaryPlainPageBuilder<OLAP_FIELD_TYPE_VARCHAR>,
BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>>();
}

} // namespace segment_v2
Expand Down

0 comments on commit d259770

Please sign in to comment.