Skip to content

Commit

Permalink
Bump simdjson to 3.9.4
Browse files Browse the repository at this point in the history
Signed-off-by: wyb <wybb86@gmail.com>
  • Loading branch information
wyb committed Jul 4, 2024
1 parent eb62f0e commit cf93183
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 11 deletions.
19 changes: 19 additions & 0 deletions be/src/formats/json/numeric_column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,25 @@ static Status add_column_with_numeric_value(FixedLengthColumn<T>* column, const
return Status::OK();
}

case simdjson::ondemand::number_type::big_integer: {
auto s = value->raw_json_token();
StringParser::ParseResult r;
auto in = StringParser::string_to_int<int128_t>(s.data(), s.size(), &r);
if (r != StringParser::PARSE_SUCCESS) {
auto err_msg = strings::Substitute("Fail to convert big integer. column=$0, value=$1", name, s);
return Status::InvalidArgument(err_msg);
}

T out{};
if (!checked_cast(in, &out)) {
column->append_numbers(&out, sizeof(out));
} else {
auto err_msg = strings::Substitute("Value is overflow. column=$0, value=$1", name, in);
return Status::InvalidArgument(err_msg);
}
return Status::OK();
}

case simdjson::ondemand::number_type::floating_point_number: {
double in = value->get_double();
T out{};
Expand Down
26 changes: 20 additions & 6 deletions be/test/formats/json/numeric_column_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ TEST_F(AddNumericColumnTest, test_add_int64_overflow) {
auto doc = parser.iterate(json);
simdjson::ondemand::value val = doc.find_field("f_int64");
auto st = add_numeric_column<int64_t>(column.get(), t, "f_int64", &val);
ASSERT_TRUE(st.is_data_quality_error());
ASSERT_TRUE(st.is_invalid_argument());
}

TEST_F(AddNumericColumnTest, test_add_int64_overflow2) {
Expand All @@ -119,6 +119,19 @@ TEST_F(AddNumericColumnTest, test_add_int64_overflow2) {
ASSERT_TRUE(st.is_invalid_argument());
}

TEST_F(AddNumericColumnTest, test_add_int64_overflow3) {
auto column = FixedLengthColumn<int64_t>::create();
TypeDescriptor t(TYPE_BIGINT);

simdjson::ondemand::parser parser;
auto json = R"( { "f_int64": 18446744073709551616} )"_padded;
auto doc = parser.iterate(json);
simdjson::ondemand::value val = doc.find_field("f_int64");

auto st = add_numeric_column<int64_t>(column.get(), t, "f_int64", &val);
ASSERT_TRUE(st.is_invalid_argument());
}

TEST_F(AddNumericColumnTest, test_add_int128) {
auto column = FixedLengthColumn<int128_t>::create();
TypeDescriptor t(TYPE_LARGEINT);
Expand All @@ -134,9 +147,7 @@ TEST_F(AddNumericColumnTest, test_add_int128) {
ASSERT_EQ("[9223372036854775808]", column->debug_string());
}

// Currently simdjson can not parse number < -9223372036854775808 (lower bound of int64_t)
// or > 18446744073709551615 (upper bound of uint64_t)
TEST_F(AddNumericColumnTest, test_add_int128_invalid) {
TEST_F(AddNumericColumnTest, test_add_int128_big_integer) {
auto column = FixedLengthColumn<int128_t>::create();
TypeDescriptor t(TYPE_LARGEINT);

Expand All @@ -146,14 +157,17 @@ TEST_F(AddNumericColumnTest, test_add_int128_invalid) {
simdjson::ondemand::value val = doc.find_field("f_int128");

auto st = add_numeric_column<int128_t>(column.get(), t, "f_int128", &val);
ASSERT_TRUE(st.is_data_quality_error());
ASSERT_TRUE(st.ok());
ASSERT_EQ("[-9223372036854775809]", column->debug_string());

column->reset_column();
json = R"( { "f_int128": 18446744073709551616} )"_padded;
doc = parser.iterate(json);
val = doc.find_field("f_int128");

st = add_numeric_column<int128_t>(column.get(), t, "f_int128", &val);
ASSERT_TRUE(st.is_data_quality_error());
ASSERT_TRUE(st.ok());
ASSERT_EQ("[18446744073709551616]", column->debug_string());
}

} // namespace starrocks
16 changes: 16 additions & 0 deletions be/test/formats/json/struct_column_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,22 @@ TEST_F(AddStructColumnTest, test_bad_json) {
EXPECT_EQ("{key1:'foo',key2:'bar',key3:NULL}", column->debug_string());
}

TEST_F(AddStructColumnTest, test_bad_json2) {
TypeDescriptor type_desc = TypeDescriptor::create_struct_type(
{"key1", "key2", "key3"},{TypeDescriptor::create_varchar_type(10), TypeDescriptor::create_json_type(),
TypeDescriptor::create_varchar_type(10)});
auto column = ColumnHelper::create_column(type_desc, false);

simdjson::ondemand::parser parser;
auto json = R"( { "key1": "foo", "key2": {a:1,b:2}, "key3": "bar"} )"_padded;
auto doc = parser.iterate(json);
simdjson::ondemand::value val = doc.get_value();

EXPECT_OK(add_struct_column(column.get(), type_desc, "root_key", &val));

EXPECT_EQ("{key1:'foo',key2:NULL,key3:NULL}", column->debug_string());
}

TEST_F(AddStructColumnTest, test_field_not_found) {
TypeDescriptor type_desc = TypeDescriptor::create_struct_type(
{"key1", "key2"}, {TypeDescriptor::create_varchar_type(10), TypeDescriptor::create_varchar_type(10)});
Expand Down
10 changes: 5 additions & 5 deletions thirdparty/vars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ export TP_JAR_DIR=$TP_INSTALL_DIR/lib/jar
# Definitions for architecture-related thirdparty
MACHINE_TYPE=$(uname -m)
# handle mac m1 platform, change arm64 to aarch64
if [[ "${MACHINE_TYPE}" == "arm64" ]]; then
if [[ "${MACHINE_TYPE}" == "arm64" ]]; then
MACHINE_TYPE="aarch64"
fi

Expand Down Expand Up @@ -163,10 +163,10 @@ RAPIDJSON_SOURCE=rapidjson-1.1.0
RAPIDJSON_MD5SUM="badd12c511e081fec6c89c43a7027bce"

# simdjson
SIMDJSON_DOWNLOAD="https://github.com/simdjson/simdjson/archive/refs/tags/v2.2.0.tar.gz"
SIMDJSON_NAME=simdjson-v2.2.0.tar.gz
SIMDJSON_SOURCE=simdjson-2.2.0
SIMDJSON_MD5SUM="9bd0ced53281484d8842a9429065943d"
SIMDJSON_DOWNLOAD="https://github.com/simdjson/simdjson/archive/refs/tags/v3.9.4.tar.gz"
SIMDJSON_NAME=simdjson-v3.9.4.tar.gz
SIMDJSON_SOURCE=simdjson-3.9.4
SIMDJSON_MD5SUM="bdc1dfcb2a89dc0c09e8370808a946f5"

# curl
CURL_DOWNLOAD="https://curl.se/download/curl-8.4.0.tar.gz"
Expand Down

0 comments on commit cf93183

Please sign in to comment.