diff --git a/be/src/exec/json_scanner.cpp b/be/src/exec/json_scanner.cpp index acb91412d1180..ac4be413eb9bc 100644 --- a/be/src/exec/json_scanner.cpp +++ b/be/src/exec/json_scanner.cpp @@ -121,6 +121,46 @@ void JsonScanner::close() { FileScanner::close(); } +static TypeDescriptor construct_json_type(const TypeDescriptor& src_type) { + switch (src_type.type) { + case TYPE_ARRAY: { + TypeDescriptor json_type(TYPE_ARRAY); + const auto& child_type = src_type.children[0]; + json_type.children.emplace_back(construct_json_type(child_type)); + return json_type; + } + case TYPE_STRUCT: { + TypeDescriptor json_type(TYPE_STRUCT); + json_type.field_names = src_type.field_names; + for (auto& child_type : src_type.children) { + json_type.children.emplace_back(construct_json_type(child_type)); + } + return json_type; + } + case TYPE_MAP: { + TypeDescriptor json_type(TYPE_MAP); + const auto& key_type = src_type.children[0]; + const auto& value_type = src_type.children[1]; + json_type.children.emplace_back(construct_json_type(key_type)); + json_type.children.emplace_back(construct_json_type(value_type)); + return json_type; + } + case TYPE_FLOAT: + case TYPE_DOUBLE: + case TYPE_BIGINT: + case TYPE_INT: + case TYPE_SMALLINT: + case TYPE_TINYINT: + case TYPE_VARCHAR: + case TYPE_JSON: { + return src_type; + } + default: + // Treat other types as VARCHAR. + return TypeDescriptor::create_varchar_type(TypeDescriptor::MAX_VARCHAR_LENGTH); + } +} + Status JsonScanner::_construct_json_types() { size_t slot_size = _src_slot_descriptors.size(); _json_types.resize(slot_size); @@ -130,77 +170,7 @@ Status JsonScanner::_construct_json_types() { continue; } - switch (slot_desc->type().type) { - case TYPE_ARRAY: { - TypeDescriptor json_type(TYPE_ARRAY); - TypeDescriptor* child_type = &json_type; - - const TypeDescriptor* slot_type = &(slot_desc->type().children[0]); - while (slot_type->type == TYPE_ARRAY) { - slot_type = &(slot_type->children[0]); - - child_type->children.emplace_back(TYPE_ARRAY); - child_type = &(child_type->children[0]); - } - - // the json lib don't support get_int128_t(), so we load with BinaryColumn and then convert to LargeIntColumn - if (slot_type->type == TYPE_FLOAT || slot_type->type == TYPE_DOUBLE || slot_type->type == TYPE_BIGINT || - slot_type->type == TYPE_INT || slot_type->type == TYPE_SMALLINT || slot_type->type == TYPE_TINYINT) { - // Treat these types as what they are. - child_type->children.emplace_back(slot_type->type); - } else if (slot_type->type == TYPE_VARCHAR) { - auto varchar_type = TypeDescriptor::create_varchar_type(slot_type->len); - child_type->children.emplace_back(varchar_type); - } else if (slot_type->type == TYPE_CHAR) { - auto char_type = TypeDescriptor::create_char_type(slot_type->len); - child_type->children.emplace_back(char_type); - } else if (slot_type->type == TYPE_JSON) { - child_type->children.emplace_back(TypeDescriptor::create_json_type()); - } else { - // Treat other types as VARCHAR. - auto varchar_type = TypeDescriptor::create_varchar_type(TypeDescriptor::MAX_VARCHAR_LENGTH); - child_type->children.emplace_back(varchar_type); - } - - _json_types[column_pos] = std::move(json_type); - break; - } - - // Treat these types as what they are. - case TYPE_FLOAT: - case TYPE_DOUBLE: - case TYPE_BIGINT: - case TYPE_INT: - case TYPE_SMALLINT: - case TYPE_TINYINT: { - _json_types[column_pos] = TypeDescriptor{slot_desc->type().type}; - break; - } - - case TYPE_CHAR: { - auto char_type = TypeDescriptor::create_char_type(slot_desc->type().len); - _json_types[column_pos] = std::move(char_type); - break; - } - - case TYPE_VARCHAR: { - auto varchar_type = TypeDescriptor::create_varchar_type(slot_desc->type().len); - _json_types[column_pos] = std::move(varchar_type); - break; - } - - case TYPE_JSON: { - _json_types[column_pos] = TypeDescriptor::create_json_type(); - break; - } - - // Treat other types as VARCHAR. - default: { - auto varchar_type = TypeDescriptor::create_varchar_type(TypeDescriptor::MAX_VARCHAR_LENGTH); - _json_types[column_pos] = std::move(varchar_type); - break; - } - } + _json_types[column_pos] = construct_json_type(slot_desc->type()); } return Status::OK(); } @@ -306,8 +276,8 @@ Status JsonScanner::_open_next_reader() { LOG(WARNING) << "Failed to create sequential files: " << st.to_string(); return st; } - _cur_file_reader = - std::make_unique(_state, _counter, this, file, _strict_mode, _src_slot_descriptors, range_desc); + _cur_file_reader = std::make_unique(_state, _counter, this, file, _strict_mode, _src_slot_descriptors, + _json_types, range_desc); RETURN_IF_ERROR(_cur_file_reader->open()); _next_range++; return Status::OK(); @@ -334,17 +304,19 @@ StatusOr JsonScanner::_cast_chunk(const starrocks::ChunkPtr& src_chunk JsonReader::JsonReader(starrocks::RuntimeState* state, starrocks::ScannerCounter* counter, JsonScanner* scanner, std::shared_ptr file, bool strict_mode, std::vector slot_descs, - const TBrokerRangeDesc& range_desc) + std::vector type_descs, const TBrokerRangeDesc& range_desc) : _state(state), _counter(counter), _scanner(scanner), _strict_mode(strict_mode), _file(std::move(file)), _slot_descs(std::move(slot_descs)), + _type_descs(type_descs), _op_col_index(-1), _range_desc(range_desc) { int index = 0; - for (const auto& desc : _slot_descs) { + for (size_t i = 0; i < _slot_descs.size(); ++i) { + const auto& desc = _slot_descs[i]; if (desc == nullptr) { continue; } @@ -353,6 +325,7 @@ JsonReader::JsonReader(starrocks::RuntimeState* state, starrocks::ScannerCounter } index++; _slot_desc_dict.emplace(desc->col_name(), desc); + _type_desc_dict.emplace(desc->col_name(), _type_descs[i]); } } @@ -565,15 +538,16 @@ Status JsonReader::_construct_row_without_jsonpath(simdjson::ondemand::object* r } auto slot_desc = itr->second; + auto type_desc = _type_desc_dict[key]; // update the prev parsed position column_index = chunk->get_index_by_slot_id(slot_desc->id()); if (_prev_parsed_position.size() <= key_index) { - _prev_parsed_position.emplace_back(key, column_index, slot_desc->type()); + _prev_parsed_position.emplace_back(key, column_index, type_desc); } else { _prev_parsed_position[key_index].key = key; _prev_parsed_position[key_index].column_index = column_index; - _prev_parsed_position[key_index].type = slot_desc->type(); + _prev_parsed_position[key_index].type = type_desc; } } diff --git a/be/src/exec/json_scanner.h b/be/src/exec/json_scanner.h index 8fda00ac87153..04e73ffa83fc0 100644 --- a/be/src/exec/json_scanner.h +++ b/be/src/exec/json_scanner.h @@ -83,7 +83,8 @@ class JsonScanner : public FileScanner { class JsonReader { public: JsonReader(RuntimeState* state, ScannerCounter* counter, JsonScanner* scanner, std::shared_ptr file, - bool strict_mode, std::vector slot_descs, const TBrokerRangeDesc& range_desc); + bool strict_mode, std::vector slot_descs, std::vector types, + const TBrokerRangeDesc& range_desc); ~JsonReader(); @@ -131,9 +132,11 @@ class JsonReader { std::shared_ptr _file; bool _closed = false; std::vector _slot_descs; + std::vector _type_descs; //Attention: _slot_desc_dict's key is the string_view of the column of _slot_descs, // so the lifecycle of _slot_descs should be longer than _slot_desc_dict; std::unordered_map _slot_desc_dict; + std::unordered_map _type_desc_dict; // For performance reason, the simdjson parser should be reused over several files. //https://github.com/simdjson/simdjson/blob/master/doc/performance.md diff --git a/be/src/formats/CMakeLists.txt b/be/src/formats/CMakeLists.txt index 7554e2c4d5914..38d9833a55360 100644 --- a/be/src/formats/CMakeLists.txt +++ b/be/src/formats/CMakeLists.txt @@ -36,6 +36,8 @@ add_library(Formats STATIC json/nullable_column.cpp json/numeric_column.cpp json/binary_column.cpp + json/struct_column.cpp + json/map_column.cpp avro/nullable_column.cpp avro/numeric_column.cpp avro/binary_column.cpp diff --git a/be/src/formats/json/map_column.cpp b/be/src/formats/json/map_column.cpp new file mode 100644 index 0000000000000..9ce94306dfb56 --- /dev/null +++ b/be/src/formats/json/map_column.cpp @@ -0,0 +1,66 @@ +// Copyright 2021-present StarRocks, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "column/map_column.h" + +#include "fmt/format.h" +#include "formats/json/map_column.h" +#include "formats/json/nullable_column.h" +#include "gutil/strings/substitute.h" + +namespace starrocks { + +Status add_map_column(Column* column, const TypeDescriptor& type_desc, const std::string& name, + simdjson::ondemand::value* value) { + auto map_column = down_cast(column); + + try { + if (value->type() != simdjson::ondemand::json_type::object) { + std::ostringstream ss; + ss << "Expected value type [object], got [" << value->type() << "]"; + return Status::DataQualityError(ss.str()); + } + simdjson::ondemand::object obj = value->get_object(); + simdjson::ondemand::parser parser; + size_t field_count = 0; + for (auto field : obj) { + { + // This is a tricky way to transform a std::string to simdjson:ondemand:value + std::string_view field_name_str = field.unescaped_key(); + auto dummy_json = simdjson::padded_string(R"({"dummy_key": ")" + std::string(field_name_str) + R"("})"); + simdjson::ondemand::document doc = parser.iterate(dummy_json); + simdjson::ondemand::object obj = doc.get_object(); + simdjson::ondemand::value field_key = obj.find_field("dummy_key"); + + RETURN_IF_ERROR(add_nullable_column(map_column->keys_column().get(), type_desc.children[0], name, + &field_key, true)); + } + + { + simdjson::ondemand::value field_value = field.value(); + RETURN_IF_ERROR(add_nullable_column(map_column->values_column().get(), type_desc.children[1], name, + &field_value, true)); + } + ++field_count; + } + map_column->offsets_column()->append(map_column->offsets_column()->get_data().back() + field_count); + + return Status::OK(); + } catch (simdjson::simdjson_error& e) { + auto err_msg = strings::Substitute("Failed to parse value as object, column=$0, error=$1", name, + simdjson::error_message(e.error())); + return Status::DataQualityError(err_msg); + } +} +} // namespace starrocks \ No newline at end of file diff --git a/be/src/formats/json/map_column.h b/be/src/formats/json/map_column.h new file mode 100644 index 0000000000000..8e442ff3ec04c --- /dev/null +++ b/be/src/formats/json/map_column.h @@ -0,0 +1,27 @@ +// Copyright 2021-present StarRocks, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include "column/column.h" +#include "common/status.h" +#include "runtime/types.h" +#include "simdjson.h" + +namespace starrocks { +Status add_map_column(Column* column, const TypeDescriptor& type_desc, const std::string& name, + simdjson::ondemand::value* value); +} // namespace starrocks \ No newline at end of file diff --git a/be/src/formats/json/nullable_column.cpp b/be/src/formats/json/nullable_column.cpp index 0cb93717b9830..051fc18beab01 100644 --- a/be/src/formats/json/nullable_column.cpp +++ b/be/src/formats/json/nullable_column.cpp @@ -18,6 +18,8 @@ #include "column/array_column.h" #include "column/nullable_column.h" #include "formats/json/binary_column.h" +#include "formats/json/map_column.h" +#include "formats/json/struct_column.h" #include "gutil/strings/substitute.h" #include "types/logical_type.h" @@ -203,6 +205,78 @@ static Status add_nullable_native_json_column(Column* column, const TypeDescript } } +static Status add_nullable_struct_column(Column* column, const TypeDescriptor& type_desc, const std::string& name, + simdjson::ondemand::value* value) { + auto nullable_column = down_cast(column); + + if (value->is_null()) { + nullable_column->append_nulls(1); + return Status::OK(); + } + + auto& null_column = nullable_column->null_column(); + auto& data_column = nullable_column->data_column(); + + RETURN_IF_ERROR(add_struct_column(data_column.get(), type_desc, name, value)); + + null_column->append(0); + return Status::OK(); +} + +static Status add_nullable_map_column(Column* column, const TypeDescriptor& type_desc, const std::string& name, + simdjson::ondemand::value* value) { + auto nullable_column = down_cast(column); + + auto& null_column = nullable_column->null_column(); + auto& data_column = nullable_column->data_column(); + + if (value->is_null()) { + nullable_column->append_nulls(1); + return Status::OK(); + } + + RETURN_IF_ERROR(add_map_column(data_column.get(), type_desc, name, value)); + + null_column->append(0); + return Status::OK(); +} + +static Status add_adaptive_nullable_struct_column(Column* column, const TypeDescriptor& type_desc, + const std::string& name, simdjson::ondemand::value* value) { + auto nullable_column = down_cast(column); + + if (value->is_null()) { + nullable_column->append_nulls(1); + return Status::OK(); + } + + auto& data_column = nullable_column->begin_append_not_default_value(); + + RETURN_IF_ERROR(add_struct_column(data_column.get(), type_desc, name, value)); + + nullable_column->finish_append_one_not_default_value(); + + return Status::OK(); +} + +static Status add_adaptive_nullable_map_column(Column* column, const TypeDescriptor& type_desc, const std::string& name, + simdjson::ondemand::value* value) { + auto nullable_column = down_cast(column); + + if (value->is_null()) { + nullable_column->append_nulls(1); + return Status::OK(); + } + + auto& data_column = nullable_column->begin_append_not_default_value(); + + RETURN_IF_ERROR(add_map_column(data_column.get(), type_desc, name, value)); + + nullable_column->finish_append_one_not_default_value(); + + return Status::OK(); +} + static Status add_nullable_column(Column* column, const TypeDescriptor& type_desc, const std::string& name, simdjson::ondemand::value* value) { // The type mappint should be in accord with JsonScanner::_construct_json_types(); @@ -257,6 +331,13 @@ static Status add_nullable_column(Column* column, const TypeDescriptor& type_des return Status::DataQualityError(err_msg); } } + case TYPE_STRUCT: { + return add_nullable_struct_column(column, type_desc, name, value); + } + + case TYPE_MAP: { + return add_nullable_map_column(column, type_desc, name, value); + } default: return add_nullable_binary_column(column, type_desc, name, value); @@ -319,6 +400,13 @@ static Status add_adpative_nullable_column(Column* column, const TypeDescriptor& return Status::DataQualityError(err_msg); } } + case TYPE_STRUCT: { + return add_adaptive_nullable_struct_column(column, type_desc, name, value); + } + + case TYPE_MAP: { + return add_adaptive_nullable_map_column(column, type_desc, name, value); + } default: return add_adpative_nullable_binary_column(column, type_desc, name, value); diff --git a/be/src/formats/json/struct_column.cpp b/be/src/formats/json/struct_column.cpp new file mode 100644 index 0000000000000..34bd3bcbc6925 --- /dev/null +++ b/be/src/formats/json/struct_column.cpp @@ -0,0 +1,50 @@ +// Copyright 2021-present StarRocks, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "formats/json/struct_column.h" + +#include "column/struct_column.h" +#include "formats/json/nullable_column.h" +#include "gutil/strings/substitute.h" + +namespace starrocks { + +Status add_struct_column(Column* column, const TypeDescriptor& type_desc, const std::string& name, + simdjson::ondemand::value* value) { + auto struct_column = down_cast(column); + + try { + if (value->type() != simdjson::ondemand::json_type::object) { + std::ostringstream ss; + ss << "Expected value type [object], got [" << value->type() << "]"; + return Status::DataQualityError(ss.str()); + } + simdjson::ondemand::object obj = value->get_object(); + + for (size_t i = 0; i < type_desc.children.size(); i++) { + const auto& field_name = type_desc.field_names[i]; + const auto& field_type_desc = type_desc.children[i]; + + auto field_column = struct_column->field_column(field_name); + simdjson::ondemand::value field_value = obj.find_field_unordered(field_name); + RETURN_IF_ERROR(add_nullable_column(field_column.get(), field_type_desc, name, &field_value, true)); + } + return Status::OK(); + } catch (simdjson::simdjson_error& e) { + auto err_msg = strings::Substitute("Failed to parse value as object, column=$0, error=$1", name, + simdjson::error_message(e.error())); + return Status::DataQualityError(err_msg); + } +} +} // namespace starrocks \ No newline at end of file diff --git a/be/src/formats/json/struct_column.h b/be/src/formats/json/struct_column.h new file mode 100644 index 0000000000000..3d64a15c17fbc --- /dev/null +++ b/be/src/formats/json/struct_column.h @@ -0,0 +1,27 @@ +// Copyright 2021-present StarRocks, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include "column/column.h" +#include "common/status.h" +#include "runtime/types.h" +#include "simdjson.h" + +namespace starrocks { +Status add_struct_column(Column* column, const TypeDescriptor& type_desc, const std::string& name, + simdjson::ondemand::value* value); +} // namespace starrocks \ No newline at end of file diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt index 277bc2f5697ab..eb0ddd807ce01 100644 --- a/be/test/CMakeLists.txt +++ b/be/test/CMakeLists.txt @@ -160,6 +160,8 @@ set(EXEC_FILES ./formats/json/binary_column_test.cpp ./formats/json/numeric_column_test.cpp ./formats/json/nullable_column_test.cpp + ./formats/json/struct_column_test.cpp + ./formats/json/map_column_test.cpp ./formats/avro/binary_column_test.cpp ./formats/avro/numeric_column_test.cpp ./formats/avro/nullable_column_test.cpp diff --git a/be/test/exec/json_scanner_test.cpp b/be/test/exec/json_scanner_test.cpp index 75b288d1705e7..c3816c5478ee4 100644 --- a/be/test/exec/json_scanner_test.cpp +++ b/be/test/exec/json_scanner_test.cpp @@ -795,11 +795,15 @@ TEST_F(JsonScannerTest, test_multi_type) { types.emplace_back(TypeDescriptor::create_varchar_type(20)); types.emplace_back(TYPE_DATE); types.emplace_back(TYPE_DATETIME); - types.emplace_back(TypeDescriptor::create_varchar_type(20)); + types.emplace_back(TypeDescriptor::create_array_type(TypeDescriptor(TYPE_INT))); types.emplace_back(TypeDescriptor::create_decimalv3_type(TYPE_DECIMAL128, 27, 9)); types.emplace_back(TypeDescriptor::create_char_type(20)); types.emplace_back(TYPE_TIME); + types.emplace_back(TypeDescriptor::create_struct_type( + {"f_int", "f_string"}, {TypeDescriptor(TYPE_INT), TypeDescriptor::create_varchar_type(20)})); + types.emplace_back( + TypeDescriptor::create_map_type(TypeDescriptor::create_varchar_type(20), TypeDescriptor(TYPE_DOUBLE))); std::vector ranges; TBrokerRangeDesc range; @@ -812,22 +816,23 @@ TEST_F(JsonScannerTest, test_multi_type) { range.__set_path("./be/test/exec/test_data/json_scanner/test_multi_type.json"); ranges.emplace_back(range); - auto scanner = - create_json_scanner(types, ranges, - {"f_bool", "f_tinyint", "f_smallint", "f_int", "f_bigint", "f_float", "f_double", - "f_varchar", "f_date", "f_datetime", "f_array", "f_decimal", "f_char", "f_time"}); + auto scanner = create_json_scanner( + types, ranges, + {"f_bool", "f_tinyint", "f_smallint", "f_int", "f_bigint", "f_float", "f_double", "f_varchar", "f_date", + "f_datetime", "f_array", "f_decimal", "f_char", "f_time", "f_struct", "f_map"}); Status st; st = scanner->open(); ASSERT_TRUE(st.ok()); ChunkPtr chunk = scanner->get_next().value(); - EXPECT_EQ(14, chunk->num_columns()); + EXPECT_EQ(16, chunk->num_columns()); EXPECT_EQ(1, chunk->num_rows()); auto expected = "[1, 127, 32767, 2147483647, 9223372036854775807, 3.14, 3.14, 'starrocks', 2021-12-09, 2021-12-09 " - "10:00:00, '[1,3,5]', 1234565789012345678901234567.123456789, 'starrocks', 36000]"; + "10:00:00, [1,3,5], 1234565789012345678901234567.123456789, 'starrocks', 36000, {f_int:1,f_string:'a'}, " + "{'f_double1':3.14,'f_double2':3.141}]"; EXPECT_EQ(expected, chunk->debug_row(0)); } diff --git a/be/test/exec/test_data/json_scanner/test_multi_type.json b/be/test/exec/test_data/json_scanner/test_multi_type.json index 52efe0b96d0c6..e3d6ee975bef9 100644 --- a/be/test/exec/test_data/json_scanner/test_multi_type.json +++ b/be/test/exec/test_data/json_scanner/test_multi_type.json @@ -16,5 +16,13 @@ ], "f_decimal": "1234565789012345678901234567.123456789", "f_char": "starrocks", - "f_time": "10:00:00" + "f_time": "10:00:00", + "f_struct": { + "f_int": 1, + "f_string": "a" + }, + "f_map": { + "f_double1": 3.14, + "f_double2": 3.141 + } } \ No newline at end of file diff --git a/be/test/formats/json/map_column_test.cpp b/be/test/formats/json/map_column_test.cpp new file mode 100644 index 0000000000000..a47db8a32b504 --- /dev/null +++ b/be/test/formats/json/map_column_test.cpp @@ -0,0 +1,57 @@ +// Copyright 2021-present StarRocks, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "formats/json/map_column.h" + +#include + +#include "column/column_helper.h" +#include "runtime/types.h" +#include "testutil/assert.h" + +namespace starrocks { + +class AddMapColumnTest : public ::testing::Test {}; + +TEST_F(AddMapColumnTest, test_good_json) { + TypeDescriptor type_desc = TypeDescriptor::create_map_type(TypeDescriptor::create_varchar_type(10), + TypeDescriptor::create_varchar_type(10)); + + auto column = ColumnHelper::create_column(type_desc, false); + + simdjson::ondemand::parser parser; + auto json = R"( { "key1": "foo", "key2": "bar", "key3": "baz" } )"_padded; + auto doc = parser.iterate(json); + simdjson::ondemand::value val = doc.get_value(); + + EXPECT_OK(add_map_column(column.get(), type_desc, "root_key", &val)); + + EXPECT_EQ("{'key1':'foo','key2':'bar','key3':'baz'}", column->debug_string()); +} + +TEST_F(AddMapColumnTest, test_bad_json) { + TypeDescriptor type_desc = TypeDescriptor::create_map_type(TypeDescriptor::create_varchar_type(10), + TypeDescriptor::create_varchar_type(10)); + + auto column = ColumnHelper::create_column(type_desc, false); + + simdjson::ondemand::parser parser; + auto json = R"( { "key1": "foo", "key2": "bar", "key3": "baz" )"_padded; + auto doc = parser.iterate(json); + simdjson::ondemand::value val = doc.get_value(); + + EXPECT_STATUS(Status::DataQualityError(""), add_map_column(column.get(), type_desc, "root_key", &val)); +} + +} // namespace starrocks diff --git a/be/test/formats/json/nullable_column_test.cpp b/be/test/formats/json/nullable_column_test.cpp index 18dc3fbed37f2..327f8787bb711 100644 --- a/be/test/formats/json/nullable_column_test.cpp +++ b/be/test/formats/json/nullable_column_test.cpp @@ -19,6 +19,7 @@ #include "column/column_helper.h" #include "runtime/types.h" #include "simdjson.h" +#include "testutil/assert.h" namespace starrocks { @@ -111,4 +112,34 @@ TEST_F(AddNullableColumnTest, add_null_numeric_array) { column->check_or_die(); } +TEST_F(AddNullableColumnTest, test_add_struct) { + TypeDescriptor type_desc = TypeDescriptor::create_struct_type( + {"key1", "key2"}, {TypeDescriptor::create_varchar_type(10), TypeDescriptor::create_varchar_type(10)}); + auto column = ColumnHelper::create_column(type_desc, true); + + simdjson::ondemand::parser parser; + auto json = R"( { "key0": {"key1": "foo", "key2": "bar", "key3": "baz" }} )"_padded; + auto doc = parser.iterate(json); + simdjson::ondemand::value val = doc.find_field_unordered("key0"); + + ASSERT_OK(add_nullable_column(column.get(), type_desc, "root_key", &val, true)); + + ASSERT_EQ("[{key1:'foo',key2:'bar'}]", column->debug_string()); +} + +TEST_F(AddNullableColumnTest, test_add_map) { + TypeDescriptor type_desc = TypeDescriptor::create_map_type(TypeDescriptor::create_varchar_type(10), + TypeDescriptor::create_varchar_type(10)); + + auto column = ColumnHelper::create_column(type_desc, true); + + simdjson::ondemand::parser parser; + auto json = R"( { "key0": {"key1": "foo", "key2": "bar", "key3": "baz" }} )"_padded; + auto doc = parser.iterate(json); + simdjson::ondemand::value val = doc.find_field_unordered("key0"); + + ASSERT_OK(add_nullable_column(column.get(), type_desc, "root_key", &val, true)); + + ASSERT_EQ("[{'key1':'foo','key2':'bar','key3':'baz'}]", column->debug_string()); +} } // namespace starrocks diff --git a/be/test/formats/json/struct_column_test.cpp b/be/test/formats/json/struct_column_test.cpp new file mode 100644 index 0000000000000..b3cee037c8265 --- /dev/null +++ b/be/test/formats/json/struct_column_test.cpp @@ -0,0 +1,56 @@ +// Copyright 2021-present StarRocks, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "formats/json/struct_column.h" + +#include + +#include "column/column_helper.h" +#include "runtime/types.h" +#include "testutil/assert.h" + +namespace starrocks { + +class AddStructColumnTest : public ::testing::Test {}; + +TEST_F(AddStructColumnTest, test_good_json) { + TypeDescriptor type_desc = TypeDescriptor::create_struct_type( + {"key1", "key2"}, {TypeDescriptor::create_varchar_type(10), TypeDescriptor::create_varchar_type(10)}); + auto column = ColumnHelper::create_column(type_desc, false); + + simdjson::ondemand::parser parser; + auto json = R"( { "key1": "foo", "key2": "bar", "key3": "baz" } )"_padded; + auto doc = parser.iterate(json); + simdjson::ondemand::value val = doc.get_value(); + + EXPECT_OK(add_struct_column(column.get(), type_desc, "root_key", &val)); + + EXPECT_EQ("{key1:'foo',key2:'bar'}", column->debug_string()); +} + +TEST_F(AddStructColumnTest, test_bad_json) { + TypeDescriptor type_desc = TypeDescriptor::create_struct_type( + {"key1", "key2", "key3"}, {TypeDescriptor::create_varchar_type(10), TypeDescriptor::create_varchar_type(10), + TypeDescriptor::create_varchar_type(10)}); + auto column = ColumnHelper::create_column(type_desc, false); + + simdjson::ondemand::parser parser; + auto json = R"( { "key1": "foo", "key2": "bar" "key3": "baz"} )"_padded; + auto doc = parser.iterate(json); + simdjson::ondemand::value val = doc.get_value(); + + EXPECT_STATUS(Status::DataQualityError(""), add_struct_column(column.get(), type_desc, "root_key", &val)); +} + +} // namespace starrocks