Skip to content

Commit

Permalink
[fix](ip) fix datatype serde for ipv6 with rowstore (apache#43065)
Browse files Browse the repository at this point in the history
before this pr: 
if we has ipv6 column in table which is support store_row_column 
we insert some data
and then make sql with update we will meet core
```
*** Aborted at 1730367188 (unix time) try "date -d @1730367188" if you are using GNU date ***
*** Current BE git commitID: face753 ***
*** SIGSEGV invalid permissions for mapped object (@0x60c000a339ae) received by PID 4176451 (TID 4187168 OR 0x7ff774f5a700) from PID 10697134; stack trace: ***
 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /mnt/disk1/wangqiannan/amory/doris/be/src/common/signal_handler.h:421
 1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in /mnt/disk1/wangqiannan/tool/jdk-17.0.10/lib/server/libjvm.so
 2# JVM_handle_linux_signal in /mnt/disk1/wangqiannan/tool/jdk-17.0.10/lib/server/libjvm.so
 3# 0x00007FFFF63AEB50 in /lib64/libc.so.6
 4# doris::IPv6Value::from_string(unsigned __int128&, char const*, unsigned long) at /mnt/disk1/wangqiannan/amory/doris/be/src/vec/runtime/ipv6_value.h:55
 5# bool doris::vectorized::read_ipv6_text_impl<unsigned __int128>(unsigned __int128&, doris::vectorized::ReadBuffer&) at /mnt/disk1/wangqiannan/amory/doris/be/src/vec/io/io_helper.h:316
 6# doris::vectorized::DataTypeIPv6SerDe::read_one_cell_from_jsonb(doris::vectorized::IColumn&, doris::JsonbValue const*) const at /mnt/disk1/wangqiannan/amory/doris/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp:76
 7# doris::vectorized::JsonbSerializeUtil::jsonb_to_block(std::vector<std::shared_ptr<doris::vectorized::DataTypeSerDe>, std::allocator<std::shared_ptr<doris::vectorized::DataTypeSerDe> > > const&, char const*, unsigned long, std::unordered_map<unsigned int, unsigned int, std::hash<unsigned int>, std::equal_to<unsigned int>, std::allocator<std::pair<unsigned int const, unsigned int> > > const&, doris::vectorized::Block&, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, std::unordered_set<int, std::hash<int>, std::equal_to<int>, std::allocator<int> > const&) at /mnt/disk1/wangqiannan/amory/doris/be/src/vec/jsonb/serialize.cpp:104
 8# doris::vectorized::JsonbSerializeUtil::jsonb_to_block(std::vector<std::shared_ptr<doris::vectorized::DataTypeSerDe>, std::allocator<std::shared_ptr<doris::vectorized::DataTypeSerDe> > > const&, doris::vectorized::ColumnStr<unsigned int> const&, std::unordered_map<unsigned int, unsigned int, std::hash<unsigned int>, std::equal_to<unsigned int>, std::allocator<std::pair<unsigned int const, unsigned int> > > const&, doris::vectorized::Block&, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, std::unordered_set<int, std::hash<int>, std::equal_to<int>, std::allocator<int> > const&) at /mnt/disk1/wangqiannan/amory/doris/be/src/vec/jsonb/serialize.cpp:83
 9# doris::BaseTablet::fetch_value_through_row_column(std::shared_ptr<doris::Rowset>, doris::TabletSchema const&, unsigned int, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, doris::vectorized::Block&) at /mnt/disk1/wangqiannan/amory/doris/be/src/olap/base_tablet.cpp:885
10# doris::FixedReadPlan::read_columns_by_plan(doris::TabletSchema const&, std::vector<unsigned int, std::allocator<unsigned int> >, std::map<doris::RowsetId, std::shared_ptr<doris::Rowset>, std::less<doris::RowsetId>, std::allocator<std::pair<doris::RowsetId const, std::shared_ptr<doris::Rowset> > > > const&, doris::vectorized::Block&, std::map<unsigned int, unsigned int, std::less<unsigned int>, std::allocator<std::pair<unsigned int const, unsigned int> > >*, signed char const*) const at /mnt/disk1/wangqiannan/amory/doris/be/src/olap/partial_update_info.cpp:295
11# doris::FixedReadPlan::fill_missing_columns(doris::RowsetWriterContext*, std::map<doris::RowsetId, std::shared_ptr<doris::Rowset>, std::less<doris::RowsetId>, std::allocator<std::pair<doris::RowsetId const, std::shared_ptr<doris::Rowset> > > > const&, doris::TabletSchema const&, doris::vectorized::Block&, std::vector<bool, std::allocator<bool> > const&, bool, unsigned long const&, doris::vectorized::Block const*) const at /mnt/disk1/wangqiannan/amory/doris/be/src/olap/partial_update_info.cpp:332
12# doris::segment_v2::VerticalSegmentWriter::_append_block_with_partial_content(doris::segment_v2::RowsInBlock&, doris::vectorized::Block&) at /mnt/disk1/wangqiannan/amory/doris/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:502
```
  • Loading branch information
amorynan authored Nov 5, 2024
1 parent cc7e275 commit 9e29566
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 13 deletions.
22 changes: 9 additions & 13 deletions be/src/vec/data_types/serde/data_type_ipv6_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,25 +69,21 @@ Status DataTypeIPv6SerDe::write_column_to_mysql(const IColumn& column,
}

void DataTypeIPv6SerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const {
IPv6 val = 0;
const auto* str_value = static_cast<const JsonbStringVal*>(arg);
ReadBuffer rb(reinterpret_cast<const unsigned char*>(str_value->getBlob()),
str_value->getBlobLen());
if (!read_ipv6_text_impl(val, rb)) {
throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "parse ipv6 fail, string: '{}'",
rb.to_string());
}
assert_cast<ColumnIPv6&>(column).insert_value(val);
const auto* str_value = static_cast<const JsonbBinaryVal*>(arg);
column.deserialize_and_insert_from_arena(str_value->getBlob());
}

void DataTypeIPv6SerDe::write_one_cell_to_jsonb(const IColumn& column,
JsonbWriterT<JsonbOutStream>& result,
Arena* mem_pool, int col_id, int row_num) const {
// we make ipv6 as string in jsonb
// we make ipv6 as BinaryValue in jsonb
result.writeKey(col_id);
IPv6 data = assert_cast<const ColumnIPv6&>(column).get_element(row_num);
IPv6Value ipv6_value(data);
result.writeString(ipv6_value.to_string());
const char* begin = nullptr;
// maybe serialize_value_into_arena should move to here later.
StringRef value = column.serialize_value_into_arena(row_num, *mem_pool, begin);
result.writeStartBinary();
result.writeBinary(value.data, value.size);
result.writeEndBinary();
}

Status DataTypeIPv6SerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
Expand Down
62 changes: 62 additions & 0 deletions be/test/vec/data_types/serde/data_type_serde_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,4 +220,66 @@ TEST(DataTypeSerDeTest, DataTypeScalaSerDeTest) {
serialize_and_deserialize_pb_test();
}

TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) {
// ipv6
{
std::string ip = "5be8:dde9:7f0b:d5a7:bd01:b3be:9c69:573b";
auto vec = vectorized::ColumnVector<IPv6>::create();
IPv6Value ipv6;
EXPECT_TRUE(ipv6.from_string(ip));
vec->insert(ipv6.value());

vectorized::DataTypePtr data_type(std::make_shared<vectorized::DataTypeIPv6>());
auto serde = data_type->get_serde(0);
JsonbWriterT<JsonbOutStream> jsonb_writer;
Arena pool;
jsonb_writer.writeStartObject();
serde->write_one_cell_to_jsonb(*vec, jsonb_writer, &pool, 0, 0);
jsonb_writer.writeEndObject();
auto jsonb_column = ColumnString::create();
jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(),
jsonb_writer.getOutput()->getSize());
StringRef jsonb_data = jsonb_column->get_data_at(0);
auto pdoc = JsonbDocument::createDocument(jsonb_data.data, jsonb_data.size);
JsonbDocument& doc = *pdoc;
for (auto it = doc->begin(); it != doc->end(); ++it) {
serde->read_one_cell_from_jsonb(*vec, it->value());
}
EXPECT_TRUE(vec->size() == 2);
IPv6 data = vec->get_element(1);
IPv6Value ipv6_value(data);
EXPECT_EQ(ipv6_value.to_string(), ip);
}

// ipv4
{
std::string ip = "192.0.0.1";
auto vec = vectorized::ColumnVector<IPv4>::create();
IPv4Value ipv4;
EXPECT_TRUE(ipv4.from_string(ip));
vec->insert(ipv4.value());

vectorized::DataTypePtr data_type(std::make_shared<vectorized::DataTypeIPv4>());
auto serde = data_type->get_serde(0);
JsonbWriterT<JsonbOutStream> jsonb_writer;
Arena pool;
jsonb_writer.writeStartObject();
serde->write_one_cell_to_jsonb(*vec, jsonb_writer, &pool, 0, 0);
jsonb_writer.writeEndObject();
auto jsonb_column = ColumnString::create();
jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(),
jsonb_writer.getOutput()->getSize());
StringRef jsonb_data = jsonb_column->get_data_at(0);
auto pdoc = JsonbDocument::createDocument(jsonb_data.data, jsonb_data.size);
JsonbDocument& doc = *pdoc;
for (auto it = doc->begin(); it != doc->end(); ++it) {
serde->read_one_cell_from_jsonb(*vec, it->value());
}
EXPECT_TRUE(vec->size() == 2);
IPv4 data = vec->get_element(1);
IPv4Value ipv4_value(data);
EXPECT_EQ(ipv4_value.to_string(), ip);
}
}

} // namespace doris::vectorized
12 changes: 12 additions & 0 deletions regression-test/data/datatype_p0/ip/test_ip_basic.out
Original file line number Diff line number Diff line change
Expand Up @@ -361,3 +361,15 @@ ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4
121.25.82.29 2620:44:a000::1
121.25.160.80 2001:418:0:5000::c2d

-- !sql --
1 true 255.255.255.255 5be8:dde9:7f0b:d5a7:bd01:b3be:9c69:573b

-- !sql --
1 false 255.255.255.255 5be8:dde9:7f0b:d5a7:bd01:b3be:9c69:573b

-- !sql --
1 false 127.0.0.1 5be8:dde9:7f0b:d5a7:bd01:b3be:9c69:573b

-- !sql --
1 false 127.0.0.1 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff

14 changes: 14 additions & 0 deletions regression-test/suites/datatype_p0/ip/test_ip_basic.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,18 @@ suite("test_ip_basic") {
sql "DROP TABLE t0"
sql "DROP TABLE t1"
sql "DROP TABLE t2"

// test ip with rowstore
sql """ SET enable_nereids_planner=true """
sql """ SET enable_fallback_to_original_planner=false """
sql """ DROP TABLE IF EXISTS table_ip """
sql """ CREATE TABLE IF NOT EXISTS `table_ip` (`col0` bigint NOT NULL,`col1` boolean NOT NULL, `col24` ipv4 NOT NULL, `col25` ipv6 NOT NULL,INDEX col1 (`col1`) USING INVERTED, INDEX col25 (`col25`) USING INVERTED ) ENGINE=OLAP UNIQUE KEY(`col0`) DISTRIBUTED BY HASH(`col0`) BUCKETS 4 PROPERTIES ("replication_allocation" = "tag.location.default: 1", "store_row_column" = "true") """
sql """ insert into table_ip values (1, true, '255.255.255.255', "5be8:dde9:7f0b:d5a7:bd01:b3be:9c69:573b") """
qt_sql """ select * from table_ip """
sql """ Update table_ip set col1 = false where col0 = 1 """
qt_sql """ select * from table_ip """
sql """ Update table_ip set col24 = '127.0.0.1' where col0 = 1 """
qt_sql """ select * from table_ip where col0 = 1"""
sql """ Update table_ip set col25 = 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' where col0 = 1 """
qt_sql """ select * from table_ip where col0 = 1"""
}

0 comments on commit 9e29566

Please sign in to comment.