Skip to content

Commit 5014a1c

Browse files
natashasehgalmeta-codesync[bot]
authored andcommitted
feat: Add SetDigest result verifier (#15619)
Summary: Pull Request resolved: #15619 feat: Add SetDigest result verifier to be used in functions with SetDigest output Differential Revision: D87835419
1 parent 5e0b1c8 commit 5014a1c

File tree

2 files changed

+189
-0
lines changed

2 files changed

+189
-0
lines changed

velox/functions/prestosql/fuzzer/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ target_link_libraries(
1919
velox_core
2020
velox_aggregation_fuzzer_base
2121
velox_exec_test_lib
22+
velox_functions_lib
2223
velox_vector
2324
velox_vector_test_lib
2425
)
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#pragma once
17+
18+
#include "velox/common/memory/HashStringAllocator.h"
19+
#include "velox/core/PlanNode.h"
20+
#include "velox/exec/fuzzer/ResultVerifier.h"
21+
#include "velox/exec/tests/utils/AssertQueryBuilder.h"
22+
#include "velox/exec/tests/utils/PlanBuilder.h"
23+
#include "velox/functions/lib/SetDigest.h"
24+
#include "velox/vector/ComplexVector.h"
25+
26+
namespace facebook::velox::exec::test {
27+
28+
class SetDigestResultVerifier : public ResultVerifier {
29+
public:
30+
bool supportsCompare() override {
31+
return true;
32+
}
33+
34+
bool supportsVerify() override {
35+
return false;
36+
}
37+
38+
void initialize(
39+
const std::vector<RowVectorPtr>& /*input*/,
40+
const std::vector<core::ExprPtr>& /*projections*/,
41+
const std::vector<std::string>& groupingKeys,
42+
const core::AggregationNode::Aggregate& /*aggregate*/,
43+
const std::string& aggregateName) override {
44+
keys_ = groupingKeys;
45+
resultName_ = aggregateName;
46+
}
47+
48+
void initializeWindow(
49+
const std::vector<RowVectorPtr>& /*input*/,
50+
const std::vector<core::ExprPtr>& /*projections*/,
51+
const std::vector<std::string>& /*partitionByKeys*/,
52+
const std::vector<SortingKeyAndOrder>& /*sortingKeysAndOrders*/,
53+
const core::WindowNode::Function& /*function*/,
54+
const std::string& /*frame*/,
55+
const std::string& windowName) override {
56+
keys_ = {"row_number"};
57+
resultName_ = windowName;
58+
}
59+
60+
bool compare(const RowVectorPtr& result, const RowVectorPtr& altResult)
61+
override {
62+
VELOX_CHECK_EQ(result->size(), altResult->size());
63+
64+
auto projection = keys_;
65+
projection.push_back(resultName_);
66+
67+
auto planNodeIdGenerator = std::make_shared<core::PlanNodeIdGenerator>();
68+
auto builder = PlanBuilder(planNodeIdGenerator).values({result});
69+
if (!keys_.empty()) {
70+
builder = builder.orderBy(keys_, false);
71+
}
72+
auto sortByKeys = builder.project(projection).planNode();
73+
auto sortedResult =
74+
AssertQueryBuilder(sortByKeys).copyResults(result->pool());
75+
76+
builder = PlanBuilder(planNodeIdGenerator).values({altResult});
77+
if (!keys_.empty()) {
78+
builder = builder.orderBy(keys_, false);
79+
}
80+
sortByKeys = builder.project(projection).planNode();
81+
auto sortedAltResult =
82+
AssertQueryBuilder(sortByKeys).copyResults(altResult->pool());
83+
84+
VELOX_CHECK_EQ(sortedResult->size(), sortedAltResult->size());
85+
auto size = sortedResult->size();
86+
for (auto i = 0; i < size; i++) {
87+
auto resultIsNull = sortedResult->childAt(resultName_)->isNullAt(i);
88+
auto altResultIsNull = sortedAltResult->childAt(resultName_)->isNullAt(i);
89+
if (resultIsNull || altResultIsNull) {
90+
VELOX_CHECK(
91+
resultIsNull && altResultIsNull,
92+
"Null mismatch at row {}: result={}, altResult={}",
93+
i,
94+
resultIsNull,
95+
altResultIsNull);
96+
continue;
97+
}
98+
99+
auto resultValue = sortedResult->childAt(resultName_)
100+
->as<SimpleVector<StringView>>()
101+
->valueAt(i);
102+
auto altResultValue = sortedAltResult->childAt(resultName_)
103+
->as<SimpleVector<StringView>>()
104+
->valueAt(i);
105+
106+
if (resultValue == altResultValue) {
107+
continue;
108+
}
109+
110+
checkEquivalentSetDigest(resultValue, altResultValue);
111+
}
112+
return true;
113+
}
114+
115+
bool verify(const RowVectorPtr& /*result*/) override {
116+
VELOX_UNSUPPORTED();
117+
}
118+
119+
void reset() override {
120+
keys_.clear();
121+
resultName_.clear();
122+
}
123+
124+
private:
125+
void checkEquivalentSetDigest(
126+
const StringView& result,
127+
const StringView& altResult) {
128+
auto pool = memory::memoryManager()->addLeafPool();
129+
HashStringAllocator allocator(pool.get());
130+
131+
facebook::velox::functions::SetDigest resultDigest(&allocator);
132+
facebook::velox::functions::SetDigest altResultDigest(&allocator);
133+
134+
// Deserialize SetDigests
135+
try {
136+
resultDigest.deserialize(result.data(), result.size());
137+
altResultDigest.deserialize(altResult.data(), altResult.size());
138+
} catch (const std::exception& e) {
139+
VELOX_FAIL("Failed to deserialize SetDigest: {}", e.what());
140+
}
141+
142+
// Extract cardinality and exactness mode
143+
auto resultCardinality = resultDigest.cardinality();
144+
auto altResultCardinality = altResultDigest.cardinality();
145+
bool resultIsExact = resultDigest.isExact();
146+
bool altResultIsExact = altResultDigest.isExact();
147+
148+
// For exact mode, cardinalities must match exactly
149+
if (resultIsExact && altResultIsExact) {
150+
VELOX_CHECK_EQ(
151+
resultCardinality,
152+
altResultCardinality,
153+
"SetDigest exact cardinality mismatch: {} vs {}",
154+
resultCardinality,
155+
altResultCardinality);
156+
return;
157+
}
158+
159+
// For approximate mode or mixed modes, compare with error tolerance.
160+
if (resultCardinality == 0 && altResultCardinality == 0) {
161+
return;
162+
}
163+
164+
// Calculate relative error
165+
double maxCardinality = std::max(
166+
static_cast<double>(resultCardinality),
167+
static_cast<double>(altResultCardinality));
168+
double errorRate = std::abs(
169+
static_cast<double>(resultCardinality) -
170+
static_cast<double>(altResultCardinality)) /
171+
maxCardinality;
172+
173+
VELOX_CHECK_LT(
174+
errorRate,
175+
kApproximateErrorTolerance,
176+
"SetDigest cardinality mismatch: {} vs {} (error: {:.2%})",
177+
resultCardinality,
178+
altResultCardinality,
179+
errorRate);
180+
}
181+
182+
static constexpr double kApproximateErrorTolerance = 0.05;
183+
184+
std::vector<std::string> keys_;
185+
std::string resultName_;
186+
};
187+
188+
} // namespace facebook::velox::exec::test

0 commit comments

Comments
 (0)