forked from StarRocks/starrocks
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Feature] add hash function for xx_hash3_64 (StarRocks#28910)
xx_hash3_64 has much better performance than murmur_hash3_32 by using AVX2 instruction according their benchmark, and has state-of-art hash quality which is broadly integrated in many software. Reference: https://github.com/Cyan4973/xxHash --------- Signed-off-by: beans <jing.gao@outlook.com>
- Loading branch information
1 parent
82f73cf
commit 058e08b
Showing
10 changed files
with
268 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
// Copyright 2021-present StarRocks, Inc. All rights reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// https://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#include <benchmark/benchmark.h> | ||
#include <glog/logging.h> | ||
#include <gtest/gtest.h> | ||
#include <testutil/assert.h> | ||
|
||
#include <memory> | ||
#include <vector> | ||
|
||
#include "bench.h" | ||
#include "exprs/hash_functions.h" | ||
|
||
namespace starrocks { | ||
|
||
class HashFunctionsBench { | ||
public: | ||
void SetUp(); | ||
void TearDown() {} | ||
|
||
HashFunctionsBench(size_t num_column, size_t num_rows) : _num_column(num_column), _num_rows(num_rows) {} | ||
|
||
void do_bench(benchmark::State& state, size_t num_column, bool test_default_hash); | ||
|
||
private: | ||
const TypeDescriptor type_desc = TypeDescriptor(TYPE_VARCHAR); | ||
size_t _num_column = 0; | ||
size_t _num_rows = 0; | ||
std::vector<ColumnPtr> _columns{}; | ||
}; | ||
|
||
void HashFunctionsBench::SetUp() { | ||
for (int i = 0; i < _num_column; i++) { | ||
auto columnPtr = Bench::create_random_column(type_desc, _num_rows, false, false, 32); | ||
_columns.push_back(std::move(columnPtr)); | ||
} | ||
} | ||
|
||
void HashFunctionsBench::do_bench(benchmark::State& state, size_t num_rows, bool test_default_hash) { | ||
std::unique_ptr<FunctionContext> ctx(FunctionContext::create_test_context()); | ||
if (test_default_hash) { | ||
ColumnPtr result = HashFunctions::murmur_hash3_32(ctx.get(), _columns).value(); | ||
auto column = ColumnHelper::cast_to<TYPE_INT>(result); | ||
} else { | ||
ColumnPtr result = HashFunctions::xx_hash3_64(ctx.get(), _columns).value(); | ||
auto column = ColumnHelper::cast_to<TYPE_BIGINT>(result); | ||
} | ||
} | ||
|
||
static void BM_HashFunctions_Eval_Arg(benchmark::internal::Benchmark* b) { | ||
b->Args({10, true}); | ||
b->Args({10, false}); | ||
b->Args({100, true}); | ||
b->Args({100, false}); | ||
b->Args({10000, true}); | ||
b->Args({10000, false}); | ||
b->Args({1000000, true}); | ||
b->Args({1000000, false}); | ||
b->Iterations(10000); | ||
} | ||
|
||
static void BM_HashFunctions_Eval(benchmark::State& state) { | ||
size_t num_rows = state.range(0); | ||
bool test_default_hash = state.range(1); | ||
|
||
HashFunctionsBench hashFunctionsBench(1, num_rows); | ||
hashFunctionsBench.SetUp(); | ||
|
||
for (auto _ : state) { | ||
hashFunctionsBench.do_bench(state, num_rows, test_default_hash); | ||
} | ||
} | ||
|
||
BENCHMARK(BM_HashFunctions_Eval)->Apply(BM_HashFunctions_Eval_Arg); | ||
|
||
} // namespace starrocks | ||
|
||
BENCHMARK_MAIN(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
// Copyright 2021-present StarRocks, Inc. All rights reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// https://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#include "util/hash_util.hpp" | ||
|
||
#include "util/xxh3.h" | ||
|
||
namespace starrocks { | ||
|
||
uint64_t HashUtil::xx_hash3_64(const void* key, int32_t len, uint64_t seed) { | ||
return XXH3_64bits_withSeed(key, len, seed); | ||
} | ||
|
||
} // namespace starrocks |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
40 changes: 40 additions & 0 deletions
40
docs/sql-reference/sql-functions/hash-functions/xx_hash3_64.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# xx_hash3_64 | ||
|
||
## Description | ||
|
||
Returns the 64-bit xxhash3 hash value of the input string. | ||
|
||
## Syntax | ||
|
||
```Haskell | ||
BIGINT XX_HASH3_64(VARCHAR input, ...) | ||
``` | ||
|
||
## Examples | ||
|
||
```Plain Text | ||
MySQL > select xx_hash3_64(null); | ||
+-------------------+ | ||
| xx_hash3_64(NULL) | | ||
+-------------------+ | ||
| NULL | | ||
+-------------------+ | ||
MySQL > select xx_hash3_64("hello"); | ||
+----------------------+ | ||
| xx_hash3_64('hello') | | ||
+----------------------+ | ||
| -7685981735718036227 | | ||
+----------------------+ | ||
MySQL > select xx_hash3_64("hello", "world"); | ||
+-------------------------------+ | ||
| xx_hash3_64('hello', 'world') | | ||
+-------------------------------+ | ||
| 7001965798170371843 | | ||
+-------------------------------+ | ||
``` | ||
|
||
## keyword | ||
|
||
XX_HASH3_64,HASH |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters