diff --git a/velox/expression/ReverseSignatureBinder.h b/velox/expression/ReverseSignatureBinder.h index 02b92d217cf4..ba5fbbda69f1 100644 --- a/velox/expression/ReverseSignatureBinder.h +++ b/velox/expression/ReverseSignatureBinder.h @@ -43,6 +43,12 @@ class ReverseSignatureBinder : private SignatureBinderBase { return typeVariablesBindings_; } + /// Return the integer bindings produced by 'tryBind'. This function should be + /// called after 'tryBind' and only if 'tryBind' returns true. + const std::unordered_map& integerBindings() const { + return integerVariablesBindings_; + } + private: /// Return whether there is a constraint on an integer variable in type /// signature. diff --git a/velox/expression/tests/ArgumentTypeFuzzerTest.cpp b/velox/expression/tests/ArgumentTypeFuzzerTest.cpp index e139ae0c4f7a..7122acc9b570 100644 --- a/velox/expression/tests/ArgumentTypeFuzzerTest.cpp +++ b/velox/expression/tests/ArgumentTypeFuzzerTest.cpp @@ -403,4 +403,205 @@ TEST_F(ArgumentTypeFuzzerTest, orderableConstraint) { } } +TEST_F(ArgumentTypeFuzzerTest, fuzzDecimalArgumentTypes) { + auto fuzzArgumentTypes = [](const exec::FunctionSignature& signature, + const TypePtr& returnType) { + std::mt19937 seed{0}; + ArgumentTypeFuzzer fuzzer{signature, returnType, seed}; + bool ok = fuzzer.fuzzArgumentTypes(kMaxVariadicArgs); + VELOX_CHECK( + ok, + "Signature: {}, Return type: {}", + signature.toString(), + returnType->toString()); + return fuzzer.argumentTypes(); + }; + + // Argument type must match return type. + auto signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s") + .returnType("decimal(p,s)") + .argumentType("decimal(p,s)") + .build(); + + auto argTypes = fuzzArgumentTypes(*signature, DECIMAL(10, 7)); + ASSERT_EQ(1, argTypes.size()); + EXPECT_EQ(DECIMAL(10, 7)->toString(), argTypes[0]->toString()); + + // Argument type can be any decimal. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s") + .returnType("boolean") + .argumentType("decimal(p,s)") + .build(); + + argTypes = fuzzArgumentTypes(*signature, BOOLEAN()); + ASSERT_EQ(1, argTypes.size()); + EXPECT_TRUE(argTypes[0]->isDecimal()); + + // Argument type can be any decimal with scale 30. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .returnType("boolean") + .argumentType("decimal(p,30)") + .build(); + + argTypes = fuzzArgumentTypes(*signature, BOOLEAN()); + ASSERT_EQ(1, argTypes.size()); + EXPECT_TRUE(argTypes[0]->isDecimal()); + EXPECT_EQ(30, getDecimalPrecisionScale(*argTypes[0]).second); + + // Another way to specify fixed scale. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s", "3") + .returnType("boolean") + .argumentType("decimal(p,s)") + .build(); + + argTypes = fuzzArgumentTypes(*signature, BOOLEAN()); + ASSERT_EQ(1, argTypes.size()); + EXPECT_TRUE(argTypes[0]->isDecimal()); + EXPECT_EQ(3, getDecimalPrecisionScale(*argTypes[0]).second); + + // Argument type can be any decimal with precision 3. + signature = exec::FunctionSignatureBuilder() + .integerVariable("s") + .returnType("boolean") + .argumentType("decimal(3,s)") + .build(); + + argTypes = fuzzArgumentTypes(*signature, BOOLEAN()); + ASSERT_EQ(1, argTypes.size()); + EXPECT_TRUE(argTypes[0]->isDecimal()); + EXPECT_EQ(3, getDecimalPrecisionScale(*argTypes[0]).first); + + // Another way to specify fixed precision. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p", "30") + .integerVariable("s") + .returnType("boolean") + .argumentType("decimal(p,s)") + .build(); + + argTypes = fuzzArgumentTypes(*signature, BOOLEAN()); + ASSERT_EQ(1, argTypes.size()); + EXPECT_TRUE(argTypes[0]->isDecimal()); + EXPECT_EQ(30, getDecimalPrecisionScale(*argTypes[0]).first); + + // Multiple arguments. All must be the same as return type. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s") + .returnType("decimal(p,s)") + .argumentType("decimal(p,s)") + .argumentType("decimal(p,s)") + .argumentType("decimal(p,s)") + .build(); + + argTypes = fuzzArgumentTypes(*signature, DECIMAL(10, 7)); + ASSERT_EQ(3, argTypes.size()); + EXPECT_EQ(DECIMAL(10, 7)->toString(), argTypes[0]->toString()); + EXPECT_EQ(DECIMAL(10, 7)->toString(), argTypes[1]->toString()); + EXPECT_EQ(DECIMAL(10, 7)->toString(), argTypes[2]->toString()); + + // Multiple arguments. Some have fixed precision, scale or both. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s") + .returnType("decimal(p,s)") + .argumentType("decimal(p,s)") + .argumentType("decimal(p,10)") + .argumentType("decimal(12,s)") + .argumentType("decimal(2,1)") + .build(); + + argTypes = fuzzArgumentTypes(*signature, DECIMAL(10, 7)); + ASSERT_EQ(4, argTypes.size()); + EXPECT_EQ(DECIMAL(10, 7)->toString(), argTypes[0]->toString()); + EXPECT_EQ(DECIMAL(10, 10)->toString(), argTypes[1]->toString()); + EXPECT_EQ(DECIMAL(12, 7)->toString(), argTypes[2]->toString()); + EXPECT_EQ(DECIMAL(2, 1)->toString(), argTypes[3]->toString()); +} + +TEST_F(ArgumentTypeFuzzerTest, fuzzDecimalReturnType) { + auto fuzzReturnType = [](const exec::FunctionSignature& signature) { + std::mt19937 seed{0}; + ArgumentTypeFuzzer fuzzer{signature, seed}; + return fuzzer.fuzzReturnType(); + }; + + // Return type can be any decimal. + auto signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s") + .returnType("decimal(p,s)") + .argumentType("decimal(p,s)") + .build(); + + auto returnType = fuzzReturnType(*signature); + EXPECT_TRUE(returnType->isDecimal()); + + // Return type can be any decimal with scale 3. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s") + .returnType("decimal(p,3)") + .argumentType("decimal(p,s)") + .build(); + + returnType = fuzzReturnType(*signature); + EXPECT_TRUE(returnType->isDecimal()); + EXPECT_EQ(3, getDecimalPrecisionScale(*returnType).second); + + // Another way to specify that scale must be 3. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s", "3") + .returnType("decimal(p,s)") + .argumentType("decimal(p,s)") + .build(); + + returnType = fuzzReturnType(*signature); + EXPECT_TRUE(returnType->isDecimal()); + EXPECT_EQ(3, getDecimalPrecisionScale(*returnType).second); + + // Return type can be any decimal with precision 22. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s") + .returnType("decimal(22,s)") + .argumentType("decimal(p,s)") + .build(); + + returnType = fuzzReturnType(*signature); + EXPECT_TRUE(returnType->isDecimal()); + EXPECT_EQ(22, getDecimalPrecisionScale(*returnType).first); + + // Another way to specify that precision must be 22. + signature = exec::FunctionSignatureBuilder() + .integerVariable("p", "22") + .integerVariable("s") + .returnType("decimal(p,s)") + .argumentType("decimal(p,s)") + .build(); + + returnType = fuzzReturnType(*signature); + EXPECT_TRUE(returnType->isDecimal()); + EXPECT_EQ(22, getDecimalPrecisionScale(*returnType).first); + + // Return type can only be DECIMAL(10, 7). + signature = exec::FunctionSignatureBuilder() + .integerVariable("p") + .integerVariable("s") + .returnType("decimal(10,7)") + .argumentType("decimal(p,s)") + .build(); + + returnType = fuzzReturnType(*signature); + EXPECT_EQ(DECIMAL(10, 7)->toString(), returnType->toString()); +} + } // namespace facebook::velox::test diff --git a/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp b/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp index 44c8a0a69bf3..79ab736ef089 100644 --- a/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp +++ b/velox/expression/tests/utils/ArgumentTypeFuzzer.cpp @@ -27,6 +27,9 @@ namespace facebook::velox::test { std::string typeToBaseName(const TypePtr& type) { + if (type->isDecimal()) { + return "decimal"; + } return boost::algorithm::to_lower_copy(std::string{type->kindName()}); } @@ -35,6 +38,91 @@ std::optional baseNameToTypeKind(const std::string& typeName) { return tryMapNameToTypeKind(kindName); } +namespace { + +bool isDecimalBaseName(const std::string& typeName) { + auto normalized = boost::algorithm::to_lower_copy(typeName); + + return normalized == "decimal"; +} + +/// Returns true only if 'str' contains digits. +bool isPositiveInteger(const std::string& str) { + return !str.empty() && + std::find_if(str.begin(), str.end(), [](unsigned char c) { + return !std::isdigit(c); + }) == str.end(); +} + +int32_t rand(FuzzerGenerator& rng) { + return boost::random::uniform_int_distribution()(rng); +} +} // namespace + +void ArgumentTypeFuzzer::determineUnboundedIntegerVariables( + const exec::TypeSignature& type) { + if (!isDecimalBaseName(type.baseName())) { + return; + } + + VELOX_CHECK_EQ(2, type.parameters().size()) + + const auto& precision = type.parameters()[0].baseName(); + const auto& scale = type.parameters()[1].baseName(); + + // Bind 'name' variable, if not already bound, using 'constant' constraint + // ('name'='123'). Return bound value if 'name' is already bound or was + // successfully bound to a constant value. Return std::nullopt otherwise. + auto tryFixedBinding = [&](const auto& name) -> std::optional { + auto it = variables().find(name); + if (it == variables().end()) { + return std::stoi(name); + } + + if (integerBindings_.count(name) > 0) { + return integerBindings_[name]; + } + + if (isPositiveInteger(it->second.constraint())) { + const auto value = std::stoi(it->second.constraint()); + integerBindings_[name] = value; + return value; + } + + return std::nullopt; + }; + + std::optional p = tryFixedBinding(precision); + std::optional s = tryFixedBinding(scale); + + if (p.has_value() && s.has_value()) { + return; + } + + if (s.has_value()) { + p = std::max(1, s.value()); + if (p < LongDecimalType::kMaxPrecision) { + p = p.value() + + rand(rng_) % (LongDecimalType::kMaxPrecision - p.value() + 1); + } + + integerBindings_[precision] = p.value(); + return; + } + + if (p.has_value()) { + s = rand(rng_) % (p.value() + 1); + integerBindings_[scale] = s.value(); + return; + } + + p = 1 + rand(rng_) % (LongDecimalType::kMaxPrecision); + s = rand(rng_) % (p.value() + 1); + + integerBindings_[precision] = p.value(); + integerBindings_[scale] = s.value(); +} + void ArgumentTypeFuzzer::determineUnboundedTypeVariables() { for (auto& [variableName, variableInfo] : variables()) { if (!variableInfo.isTypeParameter()) { @@ -68,12 +156,15 @@ bool ArgumentTypeFuzzer::fuzzArgumentTypes(uint32_t maxVariadicArgs) { const auto& formalArgs = signature_.argumentTypes(); auto formalArgsCnt = formalArgs.size(); + std::unordered_map integerBindings; + if (returnType_) { exec::ReverseSignatureBinder binder{signature_, returnType_}; if (!binder.tryBind()) { return false; } bindings_ = binder.bindings(); + integerBindings_ = binder.integerBindings(); } else { for (const auto& [name, _] : signature_.variables()) { bindings_.insert({name, nullptr}); @@ -81,13 +172,16 @@ bool ArgumentTypeFuzzer::fuzzArgumentTypes(uint32_t maxVariadicArgs) { } determineUnboundedTypeVariables(); + for (const auto& argType : signature_.argumentTypes()) { + determineUnboundedIntegerVariables(argType); + } for (auto i = 0; i < formalArgsCnt; i++) { TypePtr actualArg; if (formalArgs[i].baseName() == "any") { actualArg = randType(); } else { actualArg = exec::SignatureBinder::tryResolveType( - formalArgs[i], variables(), bindings_); + formalArgs[i], variables(), bindings_, integerBindings_); VELOX_CHECK(actualArg != nullptr); } argumentTypes_.push_back(actualArg); @@ -114,15 +208,19 @@ TypePtr ArgumentTypeFuzzer::fuzzReturnType() { "Only fuzzing uninitialized return type is allowed."); determineUnboundedTypeVariables(); - if (signature_.returnType().baseName() == "any") { + determineUnboundedIntegerVariables(signature_.returnType()); + + const auto& returnType = signature_.returnType(); + + if (returnType.baseName() == "any") { returnType_ = randType(); - return returnType_; } else { returnType_ = exec::SignatureBinder::tryResolveType( - signature_.returnType(), variables(), bindings_); - VELOX_CHECK_NE(returnType_, nullptr); - return returnType_; + returnType, variables(), bindings_, integerBindings_); } + + VELOX_CHECK_NOT_NULL(returnType_); + return returnType_; } } // namespace facebook::velox::test diff --git a/velox/expression/tests/utils/ArgumentTypeFuzzer.h b/velox/expression/tests/utils/ArgumentTypeFuzzer.h index 540a353221bf..d7fd528913b5 100644 --- a/velox/expression/tests/utils/ArgumentTypeFuzzer.h +++ b/velox/expression/tests/utils/ArgumentTypeFuzzer.h @@ -69,6 +69,8 @@ class ArgumentTypeFuzzer { /// randomly generated type. void determineUnboundedTypeVariables(); + void determineUnboundedIntegerVariables(const exec::TypeSignature& type); + TypePtr randType(); /// Generates an orderable random type, including structs, and arrays. @@ -83,6 +85,8 @@ class ArgumentTypeFuzzer { /// Bindings between type variables and their actual types. std::unordered_map bindings_; + std::unordered_map integerBindings_; + /// RNG to generate random types for unbounded type variables when necessary. std::mt19937& rng_; };