Skip to content

Commit 97d2829

Browse files
authored
Fix semantic issues in cast function (#280)
1 parent 23c0569 commit 97d2829

File tree

5 files changed

+62
-15
lines changed

5 files changed

+62
-15
lines changed

velox/expression/tests/CastExprTest.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,16 @@ TEST_F(CastExprTest, date) {
421421

422422
setCastIntByTruncate(true);
423423
testCast<std::string, Date>("date", input, result);
424+
425+
// Wrong date format case.
426+
std::vector<std::optional<std::string>> inputWrongFormat{
427+
"1970-01/01", "2023/05/10", "2023-/05-/10", "20150318"};
428+
std::vector<std::optional<Date>> nullResult{
429+
std::nullopt, std::nullopt, std::nullopt, std::nullopt};
430+
testCast<std::string, Date>(
431+
"date", inputWrongFormat, nullResult, false, true);
432+
testCast<std::string, Date>(
433+
"date", inputWrongFormat, nullResult, true, false);
424434
}
425435

426436
TEST_F(CastExprTest, invalidDate) {
@@ -552,6 +562,13 @@ TEST_F(CastExprTest, allowDecimal) {
552562
"int", {"-.", "0.0", "125.5", "-128.3"}, {0, 0, 125, -128}, false, true);
553563
}
554564

565+
TEST_F(CastExprTest, sparkSemantic) {
566+
// Allow decimal.
567+
setCastIntAllowDecimalAndByTruncate(true);
568+
testCast<float, bool>(
569+
"bool", {0.5, -0.5, 1, 0}, {true, true, true, false}, false, true);
570+
}
571+
555572
constexpr vector_size_t kVectorSize = 1'000;
556573

557574
TEST_F(CastExprTest, mapCast) {

velox/functions/lib/tests/DateTimeFormatterTest.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -547,11 +547,13 @@ TEST_F(JodaDateTimeFormatterTest, parseYear) {
547547
EXPECT_THROW(parseJoda("++100", "y"), VeloxUserError);
548548

549549
// Probe the year range
550-
EXPECT_THROW(parseJoda("-292275056", "y"), VeloxUserError);
551-
EXPECT_THROW(parseJoda("292278995", "y"), VeloxUserError);
552-
EXPECT_EQ(
553-
util::fromTimestampString("292278994-01-01"),
554-
parseJoda("292278994", "y").timestamp);
550+
// Temporarily removed for adapting to spark semantic (not allowed year digits
551+
// larger than 7).
552+
// EXPECT_THROW(parseJoda("-292275056", "y"), VeloxUserError);
553+
// EXPECT_THROW(parseJoda("292278995", "y"), VeloxUserError);
554+
// EXPECT_EQ(
555+
// util::fromTimestampString("292278994-01-01"),
556+
// parseJoda("292278994", "y").timestamp);
555557
}
556558

557559
TEST_F(JodaDateTimeFormatterTest, parseWeekYear) {
@@ -626,9 +628,11 @@ TEST_F(JodaDateTimeFormatterTest, parseWeekYear) {
626628

627629
TEST_F(JodaDateTimeFormatterTest, parseCenturyOfEra) {
628630
// Probe century range
629-
EXPECT_EQ(
630-
util::fromTimestampString("292278900-01-01 00:00:00"),
631-
parseJoda("2922789", "CCCCCCC").timestamp);
631+
// Temporarily removed for adapting to spark semantic (not allowed year digits
632+
// larger than 7).
633+
// EXPECT_EQ(
634+
// util::fromTimestampString("292278900-01-01 00:00:00"),
635+
// parseJoda("2922789", "CCCCCCC").timestamp);
632636
EXPECT_EQ(
633637
util::fromTimestampString("00-01-01 00:00:00"),
634638
parseJoda("0", "C").timestamp);

velox/type/Conversions.h

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,9 @@ struct Converter<
301301
if (v > LimitType::maxLimit()) {
302302
return LimitType::max();
303303
}
304-
if (v < LimitType::minLimit()) {
304+
// bool type's min is 0, but spark expects true for casting negative float
305+
// data.
306+
if (!std::is_same_v<T, bool> && v < LimitType::minLimit()) {
305307
return LimitType::min();
306308
}
307309
return LimitType::cast(v);
@@ -321,7 +323,9 @@ struct Converter<
321323
if (v > LimitType::maxLimit()) {
322324
return LimitType::max();
323325
}
324-
if (v < LimitType::minLimit()) {
326+
// bool type's min is 0, but spark expects true for casting negative float
327+
// data.
328+
if (!std::is_same_v<T, bool> && v < LimitType::minLimit()) {
325329
return LimitType::min();
326330
}
327331
return LimitType::cast(v);
@@ -596,15 +600,30 @@ struct Converter<TypeKind::DATE, void, TRUNCATE, ALLOW_DECIMAL> {
596600
}
597601

598602
static T cast(folly::StringPiece v, bool& nullOutput) {
599-
return fromDateString(v.data(), v.size());
603+
try {
604+
return fromDateString(v.data(), v.size());
605+
} catch (const VeloxUserError& ve) {
606+
nullOutput = true;
607+
return (T)0;
608+
}
600609
}
601610

602611
static T cast(const StringView& v, bool& nullOutput) {
603-
return fromDateString(v.data(), v.size());
612+
try {
613+
return fromDateString(v.data(), v.size());
614+
} catch (const VeloxUserError& ve) {
615+
nullOutput = true;
616+
return (T)0;
617+
}
604618
}
605619

606620
static T cast(const std::string& v, bool& nullOutput) {
607-
return fromDateString(v.data(), v.size());
621+
try {
622+
return fromDateString(v.data(), v.size());
623+
} catch (const VeloxUserError& ve) {
624+
nullOutput = true;
625+
return (T)0;
626+
}
608627
}
609628

610629
static T cast(const Timestamp& t, bool& nullOutput) {

velox/type/TimestampConversion.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,12 +177,17 @@ bool tryParseDateString(
177177
if (!characterIsDigit(buf[pos])) {
178178
return false;
179179
}
180+
int yearSegStart = pos;
180181
// First parse the year.
181182
for (; pos < len && characterIsDigit(buf[pos]); pos++) {
182183
year = checkedPlus((buf[pos] - '0'), checkedMultiply(year, 10));
183184
if (year > kMaxYear) {
184185
break;
185186
}
187+
// Align with spark, year digits should not be greater than 7.
188+
if (pos - yearSegStart + 1 > 7) {
189+
return false;
190+
}
186191
}
187192
if (yearneg) {
188193
year = checkedNegate(year);
@@ -203,7 +208,8 @@ bool tryParseDateString(
203208

204209
// Fetch the separator.
205210
sep = buf[pos++];
206-
if (sep != ' ' && sep != '-' && sep != '/' && sep != '\\') {
211+
// For spark, "/" separtor is not supported.
212+
if (sep != ' ' && sep != '-' && sep != '\\') {
207213
// Invalid separator.
208214
return false;
209215
}

velox/type/tests/TimestampConversionTest.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ TEST(DateTimeUtilTest, fromDateString) {
8383
EXPECT_EQ(-719162, fromDateString(" \t \n 00001-1-1 \n"));
8484

8585
// Different separators.
86-
EXPECT_EQ(-719162, fromDateString("1/1/1"));
86+
// Illegal date format for spark.
87+
// EXPECT_EQ(-719162, fromDateString("1/1/1"));
8788
EXPECT_EQ(-719162, fromDateString("1 1 1"));
8889
EXPECT_EQ(-719162, fromDateString("1\\1\\1"));
8990

0 commit comments

Comments
 (0)