Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support a faster unique id implement #4701

Merged
merged 4 commits into from
Apr 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion be/src/exprs/vectorized/function_call_expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ Status VectorizedFunctionCallExpr::prepare(starrocks::RuntimeState* state, starr

_is_returning_random_value = _fn.fid == 10300 /* rand */ || _fn.fid == 10301 /* random */ ||
_fn.fid == 10302 /* rand */ || _fn.fid == 10303 /* random */ ||
_fn.fid == 100015 /* uuid */;
_fn.fid == 100015 /* uuid */ || _fn.fid == 100016 /* uniq_id */;

return Status::OK();
}
Expand Down
74 changes: 74 additions & 0 deletions be/src/exprs/vectorized/utility_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,25 @@

#include "exprs/vectorized/utility_functions.h"

#include <cstdint>
#include <cstdlib>
#include <limits>
#include <mutex>
#include <random>
#include <thread>

#include "column/column_builder.h"
#include "column/column_viewer.h"
#include "column/vectorized_fwd.h"
#include "common/config.h"
#include "gen_cpp/version.h"
#include "runtime/primitive_type.h"
#include "runtime/runtime_state.h"
#include "service/backend_options.h"
#include "udf/udf_internal.h"
#include "util/monotime.h"
#include "util/thread.h"
#include "util/time.h"
#include "util/uid_util.h"

namespace starrocks::vectorized {
Expand Down Expand Up @@ -61,4 +74,65 @@ ColumnPtr UtilityFunctions::uuid(FunctionContext*, const Columns& columns) {
return result.build(false);
}

inline int128_t next_uuid(int64_t timestamp, int16_t backendId, int16_t rand, int16_t tid, int16_t inc) {
union {
struct {
int64_t timestamp;
int16_t backendId;
int16_t rand;
int16_t tid;
int16_t inc;
} data;
int128_t res;
} v;
v.data.timestamp = timestamp;
v.data.backendId = backendId;
v.data.rand = rand;
v.data.tid = tid;
v.data.inc = inc;
return v.res;
}

// thread ids
// The number of executor threads is fixed.
static std::atomic<int16_t> inc{};
//
static thread_local int uniq_tid = -1;

int16_t get_uniq_tid() {
if (uniq_tid == -1) {
uniq_tid = inc.fetch_add(1);
}
return uniq_tid;
}

ColumnPtr UtilityFunctions::uuid_numeric(FunctionContext*, const Columns& columns) {
int32_t num_rows = ColumnHelper::get_const_value<TYPE_INT>(columns.back());
auto result = Int128Column::create(num_rows);

static std::random_device rd;
static std::mt19937 mt(rd());

std::uniform_int_distribution<int16_t> dist(std::numeric_limits<int16_t>::min(),
std::numeric_limits<int16_t>::max());

auto& data = result->get_data();

int backend_id = std::hash<std::string>()(BackendOptions::get_localhost());
backend_id ^= config::brpc_port;
// config::brpc_port
// current thread id
int tid = get_uniq_tid();
int64_t timestamp = MonotonicNanos();
int16_t rand = dist(mt);

DCHECK_LE(num_rows, std::numeric_limits<int16_t>::max());

for (int i = 0; i < num_rows; ++i) {
data[i] = next_uuid(timestamp, backend_id, rand, tid, i);
}

return result;
}

} // namespace starrocks::vectorized
6 changes: 6 additions & 0 deletions be/src/exprs/vectorized/utility_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ class UtilityFunctions {
* returns uuid.
*/
DEFINE_VECTORIZED_FN(uuid);
/**
* Returns an approximate UUID.
* timestamp(64bit) + backend_id(32bit: hash(ip) ^ port) + rand (16bit) +
* tid(thread id 32 bit) + i (increment 16 bit)
*/
DEFINE_VECTORIZED_FN(uuid_numeric);
};

} // namespace vectorized
Expand Down
13 changes: 13 additions & 0 deletions be/test/exprs/vectorized/utility_functions_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "column/column_helper.h"
#include "column/column_viewer.h"
#include "runtime/primitive_type.h"
#include "util/random.h"
#include "util/time.h"

Expand Down Expand Up @@ -99,6 +100,18 @@ TEST_F(UtilityFunctionsTest, uuidTest) {

ASSERT_EQ(deduplication.size(), column_size);
}

{
int32_t chunk_size = 4096;
auto var1_col = ColumnHelper::create_const_column<TYPE_INT>(chunk_size, 1);
Columns columns;
columns.emplace_back(var1_col);
ColumnPtr result = UtilityFunctions::uuid_numeric(ctx, columns);
Int128Column* col = ColumnHelper::cast_to_raw<TYPE_LARGEINT>(result);
std::set<int128_t> vals;
vals.insert(col->get_data().begin(), col->get_data().end());
ASSERT_EQ(vals.size(), chunk_size);
}
}

} // namespace vectorized
Expand Down
1 change: 1 addition & 0 deletions gensrc/script/vectorized/vectorized_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,7 @@
[100013, 'current_version', 'VARCHAR', [], "UtilityFunctions::current_version"],
[100014, 'last_query_id', 'VARCHAR', [], "UtilityFunctions::last_query_id"],
[100015, 'uuid', 'VARCHAR', [], "UtilityFunctions::uuid"],
[100016, 'uuid_numeric', 'LARGEINT', [], "UtilityFunctions::uuid_numeric"],

# json string function
[110000, "get_json_int", "INT", ["VARCHAR", "VARCHAR"], "JsonFunctions::get_json_int",
Expand Down