From 9e2956601bb48e2fd64649f1c258f015b199a0b7 Mon Sep 17 00:00:00 2001 From: amory Date: Tue, 5 Nov 2024 14:28:45 +0800 Subject: [PATCH] [fix](ip) fix datatype serde for ipv6 with rowstore (#43065) before this pr: if we has ipv6 column in table which is support store_row_column we insert some data and then make sql with update we will meet core ``` *** Aborted at 1730367188 (unix time) try "date -d @1730367188" if you are using GNU date *** *** Current BE git commitID: face753ded *** *** SIGSEGV invalid permissions for mapped object (@0x60c000a339ae) received by PID 4176451 (TID 4187168 OR 0x7ff774f5a700) from PID 10697134; stack trace: *** 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /mnt/disk1/wangqiannan/amory/doris/be/src/common/signal_handler.h:421 1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in /mnt/disk1/wangqiannan/tool/jdk-17.0.10/lib/server/libjvm.so 2# JVM_handle_linux_signal in /mnt/disk1/wangqiannan/tool/jdk-17.0.10/lib/server/libjvm.so 3# 0x00007FFFF63AEB50 in /lib64/libc.so.6 4# doris::IPv6Value::from_string(unsigned __int128&, char const*, unsigned long) at /mnt/disk1/wangqiannan/amory/doris/be/src/vec/runtime/ipv6_value.h:55 5# bool doris::vectorized::read_ipv6_text_impl(unsigned __int128&, doris::vectorized::ReadBuffer&) at /mnt/disk1/wangqiannan/amory/doris/be/src/vec/io/io_helper.h:316 6# doris::vectorized::DataTypeIPv6SerDe::read_one_cell_from_jsonb(doris::vectorized::IColumn&, doris::JsonbValue const*) const at /mnt/disk1/wangqiannan/amory/doris/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp:76 7# doris::vectorized::JsonbSerializeUtil::jsonb_to_block(std::vector, std::allocator > > const&, char const*, unsigned long, std::unordered_map, std::equal_to, std::allocator > > const&, doris::vectorized::Block&, std::vector, std::allocator >, std::allocator, std::allocator > > > const&, std::unordered_set, std::equal_to, std::allocator > const&) at /mnt/disk1/wangqiannan/amory/doris/be/src/vec/jsonb/serialize.cpp:104 8# doris::vectorized::JsonbSerializeUtil::jsonb_to_block(std::vector, std::allocator > > const&, doris::vectorized::ColumnStr const&, std::unordered_map, std::equal_to, std::allocator > > const&, doris::vectorized::Block&, std::vector, std::allocator >, std::allocator, std::allocator > > > const&, std::unordered_set, std::equal_to, std::allocator > const&) at /mnt/disk1/wangqiannan/amory/doris/be/src/vec/jsonb/serialize.cpp:83 9# doris::BaseTablet::fetch_value_through_row_column(std::shared_ptr, doris::TabletSchema const&, unsigned int, std::vector > const&, std::vector > const&, doris::vectorized::Block&) at /mnt/disk1/wangqiannan/amory/doris/be/src/olap/base_tablet.cpp:885 10# doris::FixedReadPlan::read_columns_by_plan(doris::TabletSchema const&, std::vector >, std::map, std::less, std::allocator > > > const&, doris::vectorized::Block&, std::map, std::allocator > >*, signed char const*) const at /mnt/disk1/wangqiannan/amory/doris/be/src/olap/partial_update_info.cpp:295 11# doris::FixedReadPlan::fill_missing_columns(doris::RowsetWriterContext*, std::map, std::less, std::allocator > > > const&, doris::TabletSchema const&, doris::vectorized::Block&, std::vector > const&, bool, unsigned long const&, doris::vectorized::Block const*) const at /mnt/disk1/wangqiannan/amory/doris/be/src/olap/partial_update_info.cpp:332 12# doris::segment_v2::VerticalSegmentWriter::_append_block_with_partial_content(doris::segment_v2::RowsInBlock&, doris::vectorized::Block&) at /mnt/disk1/wangqiannan/amory/doris/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:502 ``` --- .../data_types/serde/data_type_ipv6_serde.cpp | 22 +++---- .../data_types/serde/data_type_serde_test.cpp | 62 +++++++++++++++++++ .../data/datatype_p0/ip/test_ip_basic.out | 12 ++++ .../datatype_p0/ip/test_ip_basic.groovy | 14 +++++ 4 files changed, 97 insertions(+), 13 deletions(-) diff --git a/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp b/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp index f09b6feb4a25ed..612c9ce42227dd 100644 --- a/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp @@ -69,25 +69,21 @@ Status DataTypeIPv6SerDe::write_column_to_mysql(const IColumn& column, } void DataTypeIPv6SerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const { - IPv6 val = 0; - const auto* str_value = static_cast(arg); - ReadBuffer rb(reinterpret_cast(str_value->getBlob()), - str_value->getBlobLen()); - if (!read_ipv6_text_impl(val, rb)) { - throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "parse ipv6 fail, string: '{}'", - rb.to_string()); - } - assert_cast(column).insert_value(val); + const auto* str_value = static_cast(arg); + column.deserialize_and_insert_from_arena(str_value->getBlob()); } void DataTypeIPv6SerDe::write_one_cell_to_jsonb(const IColumn& column, JsonbWriterT& result, Arena* mem_pool, int col_id, int row_num) const { - // we make ipv6 as string in jsonb + // we make ipv6 as BinaryValue in jsonb result.writeKey(col_id); - IPv6 data = assert_cast(column).get_element(row_num); - IPv6Value ipv6_value(data); - result.writeString(ipv6_value.to_string()); + const char* begin = nullptr; + // maybe serialize_value_into_arena should move to here later. + StringRef value = column.serialize_value_into_arena(row_num, *mem_pool, begin); + result.writeStartBinary(); + result.writeBinary(value.data, value.size); + result.writeEndBinary(); } Status DataTypeIPv6SerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, diff --git a/be/test/vec/data_types/serde/data_type_serde_test.cpp b/be/test/vec/data_types/serde/data_type_serde_test.cpp index ef235254db5136..82674b0aa44762 100644 --- a/be/test/vec/data_types/serde/data_type_serde_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_test.cpp @@ -220,4 +220,66 @@ TEST(DataTypeSerDeTest, DataTypeScalaSerDeTest) { serialize_and_deserialize_pb_test(); } +TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) { + // ipv6 + { + std::string ip = "5be8:dde9:7f0b:d5a7:bd01:b3be:9c69:573b"; + auto vec = vectorized::ColumnVector::create(); + IPv6Value ipv6; + EXPECT_TRUE(ipv6.from_string(ip)); + vec->insert(ipv6.value()); + + vectorized::DataTypePtr data_type(std::make_shared()); + auto serde = data_type->get_serde(0); + JsonbWriterT jsonb_writer; + Arena pool; + jsonb_writer.writeStartObject(); + serde->write_one_cell_to_jsonb(*vec, jsonb_writer, &pool, 0, 0); + jsonb_writer.writeEndObject(); + auto jsonb_column = ColumnString::create(); + jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(), + jsonb_writer.getOutput()->getSize()); + StringRef jsonb_data = jsonb_column->get_data_at(0); + auto pdoc = JsonbDocument::createDocument(jsonb_data.data, jsonb_data.size); + JsonbDocument& doc = *pdoc; + for (auto it = doc->begin(); it != doc->end(); ++it) { + serde->read_one_cell_from_jsonb(*vec, it->value()); + } + EXPECT_TRUE(vec->size() == 2); + IPv6 data = vec->get_element(1); + IPv6Value ipv6_value(data); + EXPECT_EQ(ipv6_value.to_string(), ip); + } + + // ipv4 + { + std::string ip = "192.0.0.1"; + auto vec = vectorized::ColumnVector::create(); + IPv4Value ipv4; + EXPECT_TRUE(ipv4.from_string(ip)); + vec->insert(ipv4.value()); + + vectorized::DataTypePtr data_type(std::make_shared()); + auto serde = data_type->get_serde(0); + JsonbWriterT jsonb_writer; + Arena pool; + jsonb_writer.writeStartObject(); + serde->write_one_cell_to_jsonb(*vec, jsonb_writer, &pool, 0, 0); + jsonb_writer.writeEndObject(); + auto jsonb_column = ColumnString::create(); + jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(), + jsonb_writer.getOutput()->getSize()); + StringRef jsonb_data = jsonb_column->get_data_at(0); + auto pdoc = JsonbDocument::createDocument(jsonb_data.data, jsonb_data.size); + JsonbDocument& doc = *pdoc; + for (auto it = doc->begin(); it != doc->end(); ++it) { + serde->read_one_cell_from_jsonb(*vec, it->value()); + } + EXPECT_TRUE(vec->size() == 2); + IPv4 data = vec->get_element(1); + IPv4Value ipv4_value(data); + EXPECT_EQ(ipv4_value.to_string(), ip); + } +} + } // namespace doris::vectorized diff --git a/regression-test/data/datatype_p0/ip/test_ip_basic.out b/regression-test/data/datatype_p0/ip/test_ip_basic.out index 14ecbd47a46984..b69f9708a1b1fc 100644 --- a/regression-test/data/datatype_p0/ip/test_ip_basic.out +++ b/regression-test/data/datatype_p0/ip/test_ip_basic.out @@ -361,3 +361,15 @@ ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4 121.25.82.29 2620:44:a000::1 121.25.160.80 2001:418:0:5000::c2d +-- !sql -- +1 true 255.255.255.255 5be8:dde9:7f0b:d5a7:bd01:b3be:9c69:573b + +-- !sql -- +1 false 255.255.255.255 5be8:dde9:7f0b:d5a7:bd01:b3be:9c69:573b + +-- !sql -- +1 false 127.0.0.1 5be8:dde9:7f0b:d5a7:bd01:b3be:9c69:573b + +-- !sql -- +1 false 127.0.0.1 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff + diff --git a/regression-test/suites/datatype_p0/ip/test_ip_basic.groovy b/regression-test/suites/datatype_p0/ip/test_ip_basic.groovy index 650dc86ad4e354..468b6f6f146e14 100644 --- a/regression-test/suites/datatype_p0/ip/test_ip_basic.groovy +++ b/regression-test/suites/datatype_p0/ip/test_ip_basic.groovy @@ -146,4 +146,18 @@ suite("test_ip_basic") { sql "DROP TABLE t0" sql "DROP TABLE t1" sql "DROP TABLE t2" + + // test ip with rowstore + sql """ SET enable_nereids_planner=true """ + sql """ SET enable_fallback_to_original_planner=false """ + sql """ DROP TABLE IF EXISTS table_ip """ + sql """ CREATE TABLE IF NOT EXISTS `table_ip` (`col0` bigint NOT NULL,`col1` boolean NOT NULL, `col24` ipv4 NOT NULL, `col25` ipv6 NOT NULL,INDEX col1 (`col1`) USING INVERTED, INDEX col25 (`col25`) USING INVERTED ) ENGINE=OLAP UNIQUE KEY(`col0`) DISTRIBUTED BY HASH(`col0`) BUCKETS 4 PROPERTIES ("replication_allocation" = "tag.location.default: 1", "store_row_column" = "true") """ + sql """ insert into table_ip values (1, true, '255.255.255.255', "5be8:dde9:7f0b:d5a7:bd01:b3be:9c69:573b") """ + qt_sql """ select * from table_ip """ + sql """ Update table_ip set col1 = false where col0 = 1 """ + qt_sql """ select * from table_ip """ + sql """ Update table_ip set col24 = '127.0.0.1' where col0 = 1 """ + qt_sql """ select * from table_ip where col0 = 1""" + sql """ Update table_ip set col25 = 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' where col0 = 1 """ + qt_sql """ select * from table_ip where col0 = 1""" }