Skip to content

Commit

Permalink
[fix](decimal) fix undefined behaviour of divide by zero when cast st…
Browse files Browse the repository at this point in the history
…ring to decimal (apache#26792)
  • Loading branch information
jacktengg authored and gnehil committed Dec 4, 2023
1 parent 7cdd9c9 commit fd78443
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 37 deletions.
45 changes: 14 additions & 31 deletions be/src/util/string_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,9 @@ inline int StringParser::StringParseTraits<__int128>::max_ascii_len() {
template <PrimitiveType P, typename T>
T StringParser::string_to_decimal(const char* s, int len, int type_precision, int type_scale,
ParseResult* result) {
static_assert(
std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || std::is_same_v<T, __int128>,
"Cast string to decimal only support target type int32_t, int64_t or __int128.");
// Special cases:
// 1) '' == Fail, an empty string fails to parse.
// 2) ' # ' == #, leading and trailing white space is ignored.
Expand Down Expand Up @@ -670,11 +673,7 @@ T StringParser::string_to_decimal(const char* s, int len, int type_precision, in
return 0;
}
*result = StringParser::PARSE_SUCCESS;
if constexpr (std::is_same_v<T, vectorized::Int128I>) {
value *= get_scale_multiplier<__int128>(type_scale - scale);
} else {
value *= get_scale_multiplier<T>(type_scale - scale);
}
value *= get_scale_multiplier<T>(type_scale - scale);

return is_negative ? T(-value) : T(value);
}
Expand Down Expand Up @@ -762,11 +761,7 @@ T StringParser::string_to_decimal(const char* s, int len, int type_precision, in
// scale must be set to 0 and the value set to 100 which means a precision of 3.
precision += exponent - scale;

if constexpr (std::is_same_v<T, vectorized::Int128I>) {
value *= get_scale_multiplier<__int128>(exponent - scale);
} else {
value *= get_scale_multiplier<T>(exponent - scale);
}
value *= get_scale_multiplier<T>(exponent - scale);
scale = 0;
} else {
// Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed,
Expand Down Expand Up @@ -795,22 +790,14 @@ T StringParser::string_to_decimal(const char* s, int len, int type_precision, in
} else if (UNLIKELY(scale > type_scale)) {
*result = StringParser::PARSE_UNDERFLOW;
int shift = scale - type_scale;
if (shift > 0) {
T divisor;
if constexpr (std::is_same_v<T, vectorized::Int128I>) {
divisor = get_scale_multiplier<__int128>(shift);
} else {
divisor = get_scale_multiplier<T>(shift);
}
if (LIKELY(divisor > 0)) {
T remainder = value % divisor;
value /= divisor;
if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) {
value += 1;
}
} else {
DCHECK(divisor == -1 || divisor == 0); // //DCHECK_EQ doesn't work with __int128.
value = 0;
T divisor = get_scale_multiplier<T>(shift);
if (UNLIKELY(divisor == std::numeric_limits<T>::max())) {
value = 0;
} else {
T remainder = value % divisor;
value /= divisor;
if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) {
value += 1;
}
}
DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128.
Expand All @@ -819,11 +806,7 @@ T StringParser::string_to_decimal(const char* s, int len, int type_precision, in
}

if (type_scale > scale) {
if constexpr (std::is_same_v<T, vectorized::Int128I>) {
value *= get_scale_multiplier<__int128>(type_scale - scale);
} else {
value *= get_scale_multiplier<T>(type_scale - scale);
}
value *= get_scale_multiplier<T>(type_scale - scale);
}

return is_negative ? T(-value) : T(value);
Expand Down
82 changes: 76 additions & 6 deletions be/src/vec/common/int_exp.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,16 +66,86 @@ inline uint64_t int_exp10(int x) {

namespace common {

inline constexpr std::int32_t exp10_i32(int x) {
return exp_details::get_exp<std::int32_t, 10, 10>(x);
constexpr inline int exp10_i32(int x) {
if (x < 0) return 0;
if (x > 9) return std::numeric_limits<int>::max();

constexpr int values[] = {1, 10, 100, 1000, 10000,
100000, 1000000, 10000000, 100000000, 1000000000};
return values[x];
}

inline constexpr std::int64_t exp10_i64(int x) {
return exp_details::get_exp<std::int64_t, 10, 19>(x);
constexpr inline int64_t exp10_i64(int x) {
if (x < 0) return 0;
if (x > 18) return std::numeric_limits<int64_t>::max();

constexpr int64_t values[] = {1LL,
10LL,
100LL,
1000LL,
10000LL,
100000LL,
1000000LL,
10000000LL,
100000000LL,
1000000000LL,
10000000000LL,
100000000000LL,
1000000000000LL,
10000000000000LL,
100000000000000LL,
1000000000000000LL,
10000000000000000LL,
100000000000000000LL,
1000000000000000000LL};
return values[x];
}

inline constexpr __int128 exp10_i128(int x) {
return exp_details::get_exp<__int128, 10, 39>(x);
constexpr inline __int128 exp10_i128(int x) {
if (x < 0) return 0;
if (x > 38) return std::numeric_limits<__int128>::max();

constexpr __int128 values[] = {
static_cast<__int128>(1LL),
static_cast<__int128>(10LL),
static_cast<__int128>(100LL),
static_cast<__int128>(1000LL),
static_cast<__int128>(10000LL),
static_cast<__int128>(100000LL),
static_cast<__int128>(1000000LL),
static_cast<__int128>(10000000LL),
static_cast<__int128>(100000000LL),
static_cast<__int128>(1000000000LL),
static_cast<__int128>(10000000000LL),
static_cast<__int128>(100000000000LL),
static_cast<__int128>(1000000000000LL),
static_cast<__int128>(10000000000000LL),
static_cast<__int128>(100000000000000LL),
static_cast<__int128>(1000000000000000LL),
static_cast<__int128>(10000000000000000LL),
static_cast<__int128>(100000000000000000LL),
static_cast<__int128>(1000000000000000000LL),
static_cast<__int128>(1000000000000000000LL) * 10LL,
static_cast<__int128>(1000000000000000000LL) * 100LL,
static_cast<__int128>(1000000000000000000LL) * 1000LL,
static_cast<__int128>(1000000000000000000LL) * 10000LL,
static_cast<__int128>(1000000000000000000LL) * 100000LL,
static_cast<__int128>(1000000000000000000LL) * 1000000LL,
static_cast<__int128>(1000000000000000000LL) * 10000000LL,
static_cast<__int128>(1000000000000000000LL) * 100000000LL,
static_cast<__int128>(1000000000000000000LL) * 1000000000LL,
static_cast<__int128>(1000000000000000000LL) * 10000000000LL,
static_cast<__int128>(1000000000000000000LL) * 100000000000LL,
static_cast<__int128>(1000000000000000000LL) * 1000000000000LL,
static_cast<__int128>(1000000000000000000LL) * 10000000000000LL,
static_cast<__int128>(1000000000000000000LL) * 100000000000000LL,
static_cast<__int128>(1000000000000000000LL) * 1000000000000000LL,
static_cast<__int128>(1000000000000000000LL) * 10000000000000000LL,
static_cast<__int128>(1000000000000000000LL) * 100000000000000000LL,
static_cast<__int128>(1000000000000000000LL) * 100000000000000000LL * 10LL,
static_cast<__int128>(1000000000000000000LL) * 100000000000000000LL * 100LL,
static_cast<__int128>(1000000000000000000LL) * 100000000000000000LL * 1000LL};
return values[x];
}

} // namespace common
16 changes: 16 additions & 0 deletions regression-test/data/datatype_p0/decimalv3/test_decimalv3_cast.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !cast0 --
0

-- !cast1 --
0

-- !cast2 --
0

-- !cast3 --
0

-- !cast4 --
0

Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_decimalv3_cast") {
qt_cast0 """ select cast('0.00164999999999998' as decimalv3(9,0)); """
qt_cast1 """ select cast('0.00164999999999998' as decimalv3(10,0)); """
qt_cast2 """ select cast('0.000000000001234567890' as decimalv3(18,0)); """
qt_cast3 """ select cast('0.000000000001234567890' as decimalv3(19,0)); """
qt_cast4 """ select cast('0.00000000000000000000000000000012345678901' as decimalv3(38,0)); """
}

0 comments on commit fd78443

Please sign in to comment.