From 8a6ab15696bb1ca68a4d69678e855e00f5bc8742 Mon Sep 17 00:00:00 2001 From: Wei He Date: Wed, 18 Sep 2024 10:15:36 -0700 Subject: [PATCH] Support UNSUPPORTED_INPUT_UNCATCHABLE VeloxRuntimeError in expression fuzzer (#10993) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/10993 We recently made cast-varchar-to-integral throw VeloxRuntimeError on unicode inputs and made date-time functions throw VeloxRuntimeError when timestamps are out of supported range. The expression fuzzer tests have been failing due to these changes because VeloxRuntimeError is not allowed in fuzzer. This diff introduces a new ErrorCode `UNSUPPORTED_INPUT_UNCATCHABLE` to VeloxRuntimeError that is ignored in fuzzers, and makes the cast and date-time functions throw errors with this error code. Reviewed By: xiaoxmeng, kgpai Differential Revision: D62612232 fbshipit-source-id: 334d615555a12c47d9b2219dca1eb382edcfabf3 --- velox/common/base/Exceptions.h | 25 +++++++++++++ velox/common/base/VeloxException.h | 6 ++++ velox/expression/CastExpr-inl.h | 4 ++- .../fuzzer/ExpressionFuzzerVerifier.cpp | 33 +++++++++-------- velox/expression/fuzzer/FuzzerToolkit.h | 3 ++ velox/expression/tests/ExpressionVerifier.cpp | 35 ++++++++++++++----- velox/type/Timestamp.cpp | 6 ++-- velox/type/tz/TimeZoneMap.cpp | 4 ++- 8 files changed, 88 insertions(+), 28 deletions(-) diff --git a/velox/common/base/Exceptions.h b/velox/common/base/Exceptions.h index f552f0709c0b..093641cd2b35 100644 --- a/velox/common/base/Exceptions.h +++ b/velox/common/base/Exceptions.h @@ -196,6 +196,20 @@ DECLARE_CHECK_FAIL_TEMPLATES(::facebook::velox::VeloxRuntimeError); /* isRetriable */ false, \ ##__VA_ARGS__) +/// Throws VeloxRuntimeError when functions receive input values out of the +/// supported range. This should only be used when we want to force TRY() to not +/// suppress the error. +#define VELOX_CHECK_UNSUPPORTED_INPUT_UNCATCHABLE(expr, ...) \ + if (UNLIKELY(!(expr))) { \ + _VELOX_THROW_IMPL( \ + ::facebook::velox::VeloxRuntimeError, \ + #expr, \ + ::facebook::velox::error_source::kErrorSourceRuntime.c_str(), \ + ::facebook::velox::error_code::kUnsupportedInputUncatchable.c_str(), \ + /* isRetriable */ false, \ + __VA_ARGS__); \ + } + // If the caller passes a custom message (4 *or more* arguments), we // have to construct a format string from ours ("({} vs. {})") plus // theirs by adding a space and shuffling arguments. If they don't (exactly 3 @@ -330,6 +344,17 @@ DECLARE_CHECK_FAIL_TEMPLATES(::facebook::velox::VeloxRuntimeError); /* isRetriable */ false, \ ##__VA_ARGS__) +/// Throws VeloxRuntimeError when functions receive input values out of the +/// supported range. This should only be used when we want to force TRY() to not +/// suppress the error. +#define VELOX_FAIL_UNSUPPORTED_INPUT_UNCATCHABLE(...) \ + _VELOX_THROW( \ + ::facebook::velox::VeloxRuntimeError, \ + ::facebook::velox::error_source::kErrorSourceRuntime.c_str(), \ + ::facebook::velox::error_code::kUnsupportedInputUncatchable.c_str(), \ + /* isRetriable */ false, \ + ##__VA_ARGS__) + DECLARE_CHECK_FAIL_TEMPLATES(::facebook::velox::VeloxUserError); // For all below macros, an additional message can be passed using a diff --git a/velox/common/base/VeloxException.h b/velox/common/base/VeloxException.h index c14db1c3da62..bb22e9a490c2 100644 --- a/velox/common/base/VeloxException.h +++ b/velox/common/base/VeloxException.h @@ -110,6 +110,12 @@ inline constexpr auto kFileNotFound = "FILE_NOT_FOUND"_fs; // We do not know how to classify it yet. inline constexpr auto kUnknown = "UNKNOWN"_fs; + +// VeloxRuntimeErrors due to unsupported input values such as unicode input to +// cast-varchar-to-integer and timestamps beyond the year 2037 to datetime +// functions. This kind of errors is allowed in expression fuzzer. +inline constexpr auto kUnsupportedInputUncatchable = + "UNSUPPORTED_INPUT_UNCATCHABLE"_fs; } // namespace error_code class VeloxException : public std::exception { diff --git a/velox/expression/CastExpr-inl.h b/velox/expression/CastExpr-inl.h index d22b4adac870..e767c5277cff 100644 --- a/velox/expression/CastExpr-inl.h +++ b/velox/expression/CastExpr-inl.h @@ -309,7 +309,9 @@ void CastExpr::applyCastKernel( ToKind == TypeKind::INTEGER || ToKind == TypeKind::BIGINT || ToKind == TypeKind::HUGEINT) { if constexpr (TPolicy::throwOnUnicode) { - VELOX_CHECK( + // This is a special case where we intentionally throw + // VeloxRuntimeError to avoid it being suppressed by TRY(). + VELOX_CHECK_UNSUPPORTED_INPUT_UNCATCHABLE( functions::stringCore::isAscii( inputRowValue.data(), inputRowValue.size()), "Unicode characters are not supported for conversion to integer types"); diff --git a/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp b/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp index cbd7979bbf51..0d325ade5fba 100644 --- a/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp +++ b/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp @@ -240,19 +240,17 @@ void ExpressionFuzzerVerifier::retryWithTry( plan->type(), std::vector{plan}, "try")); } - RowVectorPtr tryResult; + ResultOrError tryResult; - // The function throws if anything goes wrong. + // The function throws if anything goes wrong except + // UNSUPPORTED_INPUT_UNCATCHABLE errors. try { - tryResult = - verifier_ - .verify( - tryPlans, - rowVector, - resultVector ? BaseVector::copy(*resultVector) : nullptr, - false, // canThrow - columnsToWrapInLazy) - .result; + tryResult = verifier_.verify( + tryPlans, + rowVector, + resultVector ? BaseVector::copy(*resultVector) : nullptr, + false, // canThrow + columnsToWrapInLazy); } catch (const std::exception&) { if (options_.findMinimalSubexpression) { test::computeMinimumSubExpression( @@ -264,10 +262,15 @@ void ExpressionFuzzerVerifier::retryWithTry( } throw; } + if (tryResult.unsupportedInputUncatchableError) { + LOG(INFO) + << "Retry with try fails to find minimal subexpression due to UNSUPPORTED_INPUT_UNCATCHABLE error."; + return; + } // Re-evaluate the original expression on rows that didn't produce an // error (i.e. returned non-NULL results when evaluated with TRY). - BufferPtr noErrorIndices = extractNonNullIndices(tryResult); + BufferPtr noErrorIndices = extractNonNullIndices(tryResult.result); if (noErrorIndices != nullptr) { auto noErrorRowVector = wrapChildren(noErrorIndices, rowVector); @@ -360,8 +363,10 @@ void ExpressionFuzzerVerifier::go() { // If both paths threw compatible exceptions, we add a try() function to // the expression's root and execute it again. This time the expression - // cannot throw. - if (result.exceptionPtr && options_.retryWithTry) { + // cannot throw. Expressions that throw UNSUPPORTED_INPUT_UNCATCHABLE errors + // are not supported. + if (result.exceptionPtr && options_.retryWithTry && + !result.unsupportedInputUncatchableError) { LOG(INFO) << "Both paths failed with compatible exceptions. Retrying expression using try()."; retryWithTry(plans, rowVector, resultVectors, columnsToWrapInLazy); diff --git a/velox/expression/fuzzer/FuzzerToolkit.h b/velox/expression/fuzzer/FuzzerToolkit.h index 9d78d0899c82..6eb0a17f9909 100644 --- a/velox/expression/fuzzer/FuzzerToolkit.h +++ b/velox/expression/fuzzer/FuzzerToolkit.h @@ -46,6 +46,9 @@ struct SignatureTemplate { struct ResultOrError { RowVectorPtr result; std::exception_ptr exceptionPtr; + /// Whether the exception is UNSUPPORTED_INPUT_UNCATCHABLE error. This flag + /// should only be set to true when exceptionPtr is not a nullptr. + bool unsupportedInputUncatchableError{false}; }; /// Sort callable function signatures. diff --git a/velox/expression/tests/ExpressionVerifier.cpp b/velox/expression/tests/ExpressionVerifier.cpp index 1d3b74cf82fb..d22ec7cbde9b 100644 --- a/velox/expression/tests/ExpressionVerifier.cpp +++ b/velox/expression/tests/ExpressionVerifier.cpp @@ -107,6 +107,7 @@ fuzzer::ResultOrError ExpressionVerifier::verify( // Execute with common expression eval path. Some columns of the input row // vector will be wrapped in lazy as specified in 'columnsToWrapInLazy'. + bool unsupportedInputUncatchableError = false; try { exec::ExprSet exprSetCommon( plans, execCtx_, !options_.disableConstantFolding); @@ -133,10 +134,17 @@ fuzzer::ResultOrError ExpressionVerifier::verify( "Copy of original input", "Input after common"); } - } catch (const VeloxUserError&) { - if (!canThrow) { - LOG(ERROR) - << "Common eval wasn't supposed to throw, but it did. Aborting."; + } catch (const VeloxException& e) { + if (e.errorCode() == error_code::kUnsupportedInputUncatchable) { + unsupportedInputUncatchableError = true; + } else if (!(canThrow && e.isUserError())) { + if (!canThrow) { + LOG(ERROR) + << "Common eval wasn't supposed to throw, but it did. Aborting."; + } else if (!e.isUserError()) { + LOG(ERROR) + << "Common eval: VeloxRuntimeErrors other than UNSUPPORTED_INPUT_UNCATCHABLE error are not allowed."; + } persistReproInfoIfNeeded( rowVector, columnsToWrapInLazy, copiedResult, sql, complexConstants); throw; @@ -144,7 +152,7 @@ fuzzer::ResultOrError ExpressionVerifier::verify( exceptionCommonPtr = std::current_exception(); } catch (...) { LOG(ERROR) - << "Common eval: Exceptions other than VeloxUserError are not allowed."; + << "Common eval: Exceptions other than VeloxUserError or VeloxRuntimeError of UNSUPPORTED_INPUT_UNCATCHABLE are not allowed."; persistReproInfoIfNeeded( rowVector, columnsToWrapInLazy, copiedResult, sql, complexConstants); throw; @@ -168,11 +176,19 @@ fuzzer::ResultOrError ExpressionVerifier::verify( "Copy of original input", "Input after simplified"); - } catch (const VeloxUserError&) { + } catch (const VeloxException& e) { + if (!e.isUserError() && + e.errorCode() != error_code::kUnsupportedInputUncatchable) { + LOG(ERROR) + << "Simplified eval: VeloxRuntimeErrors other than UNSUPPORTED_INPUT_UNCATCHABLE error are not allowed."; + persistReproInfoIfNeeded( + rowVector, columnsToWrapInLazy, copiedResult, sql, complexConstants); + throw; + } exceptionSimplifiedPtr = std::current_exception(); } catch (...) { LOG(ERROR) - << "Simplified eval: Exceptions other than VeloxUserError are not allowed."; + << "Simplified eval: Exceptions other than VeloxUserError or VeloxRuntimeError with UNSUPPORTED_INPUT are not allowed."; persistReproInfoIfNeeded( rowVector, columnsToWrapInLazy, copiedResult, sql, complexConstants); throw; @@ -184,7 +200,7 @@ fuzzer::ResultOrError ExpressionVerifier::verify( // Throws in case exceptions are not compatible. If they are compatible, // return false to signal that the expression failed. fuzzer::compareExceptions(exceptionCommonPtr, exceptionSimplifiedPtr); - return {nullptr, exceptionCommonPtr}; + return {nullptr, exceptionCommonPtr, unsupportedInputUncatchableError}; } else { // Throws in case output is different. VELOX_CHECK_EQ(commonEvalResult.size(), plans.size()); @@ -217,7 +233,8 @@ fuzzer::ResultOrError ExpressionVerifier::verify( return { VectorMaker(commonEvalResult[0]->pool()).rowVector(commonEvalResult), - nullptr}; + nullptr, + unsupportedInputUncatchableError}; } void ExpressionVerifier::persistReproInfoIfNeeded( diff --git a/velox/type/Timestamp.cpp b/velox/type/Timestamp.cpp index 324a41f8f658..4d84b8b7071e 100644 --- a/velox/type/Timestamp.cpp +++ b/velox/type/Timestamp.cpp @@ -79,9 +79,9 @@ void Timestamp::toTimezone(const tz::TimeZone& zone) { seconds_ = zone.to_local(std::chrono::seconds(seconds_)).count(); } catch (const std::invalid_argument& e) { // Invalid argument means we hit a conversion not supported by - // external/date. Need to throw a RuntimeError so that try() statements do - // not suppress it. - VELOX_FAIL(e.what()); + // external/date. This is a special case where we intentionally throw + // VeloxRuntimeError to avoid it being suppressed by TRY(). + VELOX_FAIL_UNSUPPORTED_INPUT_UNCATCHABLE(e.what()); } } diff --git a/velox/type/tz/TimeZoneMap.cpp b/velox/type/tz/TimeZoneMap.cpp index 4fc8762d1bfa..93dacadba677 100644 --- a/velox/type/tz/TimeZoneMap.cpp +++ b/velox/type/tz/TimeZoneMap.cpp @@ -232,7 +232,9 @@ void validateRangeImpl(time_point timePoint) { auto year = year_month_day(floor(timePoint)).year(); if (year < kMinYear || year > kMaxYear) { - VELOX_FAIL( + // This is a special case where we intentionally throw + // VeloxRuntimeError to avoid it being suppressed by TRY(). + VELOX_FAIL_UNSUPPORTED_INPUT_UNCATCHABLE( "Timepoint is outside of supported year range: [{}, {}], got {}", (int)kMinYear, (int)kMaxYear,