1+ /*
2+ * Copyright (c) Facebook, Inc. and its affiliates.
3+ *
4+ * Licensed under the Apache License, Version 2.0 (the "License");
5+ * you may not use this file except in compliance with the License.
6+ * You may obtain a copy of the License at
7+ *
8+ * http://www.apache.org/licenses/LICENSE-2.0
9+ *
10+ * Unless required by applicable law or agreed to in writing, software
11+ * distributed under the License is distributed on an "AS IS" BASIS,
12+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+ * See the License for the specific language governing permissions and
14+ * limitations under the License.
15+ */
16+ #include " velox/functions/sparksql/MightContain.h"
17+
18+ #include " velox/common/base/BloomFilter.h"
19+ #include " velox/expression/DecodedArgs.h"
20+ #include " velox/vector/FlatVector.h"
21+
22+ #include < glog/logging.h>
23+
24+ namespace facebook ::velox::functions::sparksql {
25+ namespace {
26+ class BloomFilterMightContainFunction final : public exec::VectorFunction {
27+ bool isDefaultNullBehavior () const final {
28+ return false ;
29+ }
30+
31+ void apply (
32+ const SelectivityVector& rows,
33+ std::vector<VectorPtr>& args, // Not using const ref so we can reuse args
34+ const TypePtr& outputType,
35+ exec::EvalCtx& context,
36+ VectorPtr& resultRef) const final {
37+ VELOX_CHECK_EQ (args.size (), 2 );
38+ context.ensureWritable (rows, BOOLEAN (), resultRef);
39+ auto & result = *resultRef->as <FlatVector<bool >>();
40+ exec::DecodedArgs decodedArgs (rows, args, context);
41+ auto serialized = decodedArgs.at (0 );
42+ auto value = decodedArgs.at (1 );
43+ if (serialized->isConstantMapping () && serialized->isNullAt (0 )) {
44+ rows.applyToSelected ([&](int row) { result.setNull (row, true ); });
45+ return ;
46+ }
47+
48+ if (serialized->isConstantMapping ()) {
49+ BloomFilter<int64_t , false > output;
50+ auto serializedBloom = serialized->valueAt <StringView>(0 );
51+ BloomFilter<int64_t , false >::deserialize (serializedBloom.data (), output);
52+ rows.applyToSelected ([&](int row) {
53+ result.set (row, output.mayContain (value->valueAt <int64_t >(row)));
54+ });
55+ return ;
56+ }
57+
58+ rows.applyToSelected ([&](int row) {
59+ BloomFilter<int64_t , false > output;
60+ auto serializedBloom = serialized->valueAt <StringView>(row);
61+ BloomFilter<int64_t , false >::deserialize (serializedBloom.data (), output);
62+ result.set (row, output.mayContain (value->valueAt <int64_t >(row)));
63+ });
64+ }
65+ };
66+ } // namespace
67+
68+ std::vector<std::shared_ptr<exec::FunctionSignature>> mightContainSignatures () {
69+ return {exec::FunctionSignatureBuilder ()
70+ .returnType (" boolean" )
71+ .argumentType (" varbinary" )
72+ .argumentType (" bigint" )
73+ .build ()};
74+ }
75+
76+ std::shared_ptr<exec::VectorFunction> makeMightContain (
77+ const std::string& name,
78+ const std::vector<exec::VectorFunctionArg>& inputArgs) {
79+ static const auto kHashFunction =
80+ std::make_shared<BloomFilterMightContainFunction>();
81+ return kHashFunction ;
82+ }
83+
84+ } // namespace facebook::velox::functions::sparksql
0 commit comments