Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Enhancement] Avoid call QueryCtx destructor when hold mutex #16602

Merged
merged 3 commits into from
Jan 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 52 additions & 13 deletions be/src/exec/pipeline/fragment_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
#include "runtime/stream_load/transaction_mgr.h"
#include "util/debug/query_trace.h"
#include "util/pretty_printer.h"
#include "util/runtime_profile.h"
#include "util/time.h"
#include "util/uid_util.h"

Expand Down Expand Up @@ -553,28 +554,66 @@ Status FragmentExecutor::prepare(ExecEnv* exec_env, const TExecPlanFragmentParam
UnifiedExecPlanFragmentParams request(common_request, unique_request);

bool prepare_success = false;
int64_t prepare_time = 0;
DeferOp defer([this, &request, &prepare_success, &prepare_time]() {
struct {
int64_t prepare_time = 0;
int64_t prepare_query_ctx_time = 0;
int64_t prepare_fragment_ctx_time = 0;
int64_t prepare_runtime_state_time = 0;
int64_t prepare_pipeline_driver_time = 0;
} profiler;

DeferOp defer([this, &request, &prepare_success, &profiler]() {
if (prepare_success) {
auto fragment_ctx = _query_ctx->fragment_mgr()->get(request.fragment_instance_id());
auto* prepare_timer =
ADD_TIMER(fragment_ctx->runtime_state()->runtime_profile(), "FragmentInstancePrepareTime");
COUNTER_SET(prepare_timer, prepare_time);
COUNTER_SET(prepare_timer, profiler.prepare_time);
auto* prepare_query_ctx_timer =
ADD_CHILD_TIMER_THESHOLD(fragment_ctx->runtime_state()->runtime_profile(), "prepare-query-ctx",
"FragmentInstancePrepareTime", 10_ms);
COUNTER_SET(prepare_query_ctx_timer, profiler.prepare_query_ctx_time);

auto* prepare_fragment_ctx_timer =
ADD_CHILD_TIMER_THESHOLD(fragment_ctx->runtime_state()->runtime_profile(), "prepare-fragment-ctx",
"FragmentInstancePrepareTime", 10_ms);
COUNTER_SET(prepare_fragment_ctx_timer, profiler.prepare_fragment_ctx_time);

auto* prepare_runtime_state_timer =
ADD_CHILD_TIMER_THESHOLD(fragment_ctx->runtime_state()->runtime_profile(), "prepare-runtime-state",
"FragmentInstancePrepareTime", 10_ms);
COUNTER_SET(prepare_runtime_state_timer, profiler.prepare_runtime_state_time);

auto* prepare_pipeline_driver_timer =
ADD_CHILD_TIMER_THESHOLD(fragment_ctx->runtime_state()->runtime_profile(),
"prepare-pipeline-driver", "FragmentInstancePrepareTime", 10_ms);
COUNTER_SET(prepare_pipeline_driver_timer, profiler.prepare_runtime_state_time);
} else {
_fail_cleanup();
}
});
SCOPED_RAW_TIMER(&prepare_time);
RETURN_IF_ERROR(exec_env->query_pool_mem_tracker()->check_mem_limit("Start execute plan fragment."));

RETURN_IF_ERROR(_prepare_query_ctx(exec_env, request));
RETURN_IF_ERROR(_prepare_fragment_ctx(request));
RETURN_IF_ERROR(_prepare_workgroup(request));
RETURN_IF_ERROR(_prepare_runtime_state(exec_env, request));
RETURN_IF_ERROR(_prepare_exec_plan(exec_env, request));
RETURN_IF_ERROR(_prepare_global_dict(request));
RETURN_IF_ERROR(_prepare_pipeline_driver(exec_env, request));
RETURN_IF_ERROR(_prepare_stream_load_pipe(exec_env, request));
SCOPED_RAW_TIMER(&profiler.prepare_time);
RETURN_IF_ERROR(exec_env->query_pool_mem_tracker()->check_mem_limit("Start execute plan fragment."));
{
SCOPED_RAW_TIMER(&profiler.prepare_query_ctx_time);
RETURN_IF_ERROR(_prepare_query_ctx(exec_env, request));
}
{
SCOPED_RAW_TIMER(&profiler.prepare_fragment_ctx_time);
RETURN_IF_ERROR(_prepare_fragment_ctx(request));
}
{
SCOPED_RAW_TIMER(&profiler.prepare_runtime_state_time);
RETURN_IF_ERROR(_prepare_workgroup(request));
RETURN_IF_ERROR(_prepare_runtime_state(exec_env, request));
RETURN_IF_ERROR(_prepare_exec_plan(exec_env, request));
RETURN_IF_ERROR(_prepare_global_dict(request));
}
{
SCOPED_RAW_TIMER(&profiler.prepare_pipeline_driver_time);
RETURN_IF_ERROR(_prepare_pipeline_driver(exec_env, request));
RETURN_IF_ERROR(_prepare_stream_load_pipe(exec_env, request));
}

RETURN_IF_ERROR(_query_ctx->fragment_mgr()->register_ctx(request.fragment_instance_id(), _fragment_ctx));
prepare_success = true;
Expand Down
18 changes: 14 additions & 4 deletions be/src/exec/pipeline/query_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
#include "exec/pipeline/query_context.h"

#include <memory>
#include <vector>

#include "agent/master_info.h"
#include "exec/pipeline/fragment_context.h"
#include "exec/pipeline/pipeline_fwd.h"
#include "exec/workgroup/work_group.h"
#include "runtime/client_cache.h"
#include "runtime/current_thread.h"
Expand Down Expand Up @@ -198,11 +200,12 @@ Status QueryContextManager::init() {
return Status::InternalError("Fail to create clean_thread of QueryContextManager");
}
}
void QueryContextManager::_clean_slot_unlocked(size_t i) {
void QueryContextManager::_clean_slot_unlocked(size_t i, std::vector<QueryContextPtr>& del) {
auto& sc_map = _second_chance_maps[i];
auto sc_it = sc_map.begin();
while (sc_it != sc_map.end()) {
if (sc_it->second->has_no_active_instances() && sc_it->second->is_delivery_expired()) {
del.emplace_back(std::move(sc_it->second));
sc_it = sc_map.erase(sc_it);
} else {
++sc_it;
Expand All @@ -212,8 +215,9 @@ void QueryContextManager::_clean_slot_unlocked(size_t i) {
void QueryContextManager::_clean_query_contexts() {
for (auto i = 0; i < _num_slots; ++i) {
auto& mutex = _mutexes[i];
std::vector<QueryContextPtr> del_list;
std::unique_lock write_lock(mutex);
_clean_slot_unlocked(i);
_clean_slot_unlocked(i, del_list);
}
}

Expand Down Expand Up @@ -323,8 +327,13 @@ bool QueryContextManager::remove(const TUniqueId& query_id) {
auto& context_map = _context_maps[i];
auto& sc_map = _second_chance_maps[i];

// retain the query_ctx reference to avoid call destructors while holding a lock
// we should define them before hold the write lock
QueryContextPtr query_ctx;
std::vector<QueryContextPtr> del_list;

std::unique_lock<std::shared_mutex> write_lock(mutex);
_clean_slot_unlocked(i);
_clean_slot_unlocked(i, del_list);
// return directly if query_ctx is absent
auto it = context_map.find(query_id);
if (it == context_map.end()) {
Expand All @@ -333,6 +342,7 @@ bool QueryContextManager::remove(const TUniqueId& query_id) {

// the query context is really dead, so just cleanup
if (it->second->is_dead()) {
query_ctx = std::move(it->second);
context_map.erase(it);
return true;
} else if (it->second->has_no_active_instances()) {
Expand Down Expand Up @@ -405,7 +415,7 @@ void QueryContextManager::report_fragments_with_same_host(
params.__set_backend_id(backend_id.value());
}

report_exec_status_params_vector.push_back(params);
report_exec_status_params_vector.emplace_back(std::move(params));
cur_batch_report_indexes.push_back(i);
reported[i] = true;
}
Expand Down
2 changes: 1 addition & 1 deletion be/src/exec/pipeline/query_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ class QueryContextManager {
void _stop_clean_func() { _stop.store(true); }
bool _is_stopped() { return _stop; }
size_t _slot_idx(const TUniqueId& query_id);
void _clean_slot_unlocked(size_t i);
void _clean_slot_unlocked(size_t i, std::vector<QueryContextPtr>& del);

private:
const size_t _num_slots;
Expand Down
20 changes: 13 additions & 7 deletions be/src/util/runtime_profile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,13 +318,14 @@ void RuntimeProfile::add_child_unlock(RuntimeProfile* child, bool indent, ChildV
}

RuntimeProfile::Counter* RuntimeProfile::add_counter_unlock(const std::string& name, TUnit::type type,
const std::string& parent_name, bool skip_merge) {
const std::string& parent_name, bool skip_merge,
int64_t threshold) {
if (auto iter = _counter_map.find(name); iter != _counter_map.end()) {
return iter->second.first;
}

DCHECK(parent_name == ROOT_COUNTER || _counter_map.find(parent_name) != _counter_map.end());
Counter* counter = _pool->add(new Counter(type, 0, skip_merge));
Counter* counter = _pool->add(new Counter(type, 0, skip_merge, threshold));
_counter_map[name] = std::make_pair(counter, parent_name);
_child_counter_map[parent_name].insert(name);
return counter;
Expand Down Expand Up @@ -435,9 +436,10 @@ ADD_COUNTER_IMPL(AddHighWaterMarkCounter, HighWaterMarkCounter)
//ADD_COUNTER_IMPL(AddConcurrentTimerCounter, ConcurrentTimerCounter);

RuntimeProfile::Counter* RuntimeProfile::add_child_counter(const std::string& name, TUnit::type type,
const std::string& parent_name, bool skip_merge) {
const std::string& parent_name, bool skip_merge,
int64_t threshold) {
std::lock_guard<std::mutex> l(_counter_lock);
return add_counter_unlock(name, type, parent_name, skip_merge);
return add_counter_unlock(name, type, parent_name, skip_merge, threshold);
}

RuntimeProfile::DerivedCounter* RuntimeProfile::add_derived_counter(const std::string& name, TUnit::type type,
Expand Down Expand Up @@ -993,9 +995,13 @@ void RuntimeProfile::print_child_counters(const std::string& prefix, const std::
for (const std::string& child_counter : child_counters) {
auto iter = counter_map.find(child_counter);
DCHECK(iter != counter_map.end());
stream << prefix << " - " << iter->first << ": "
<< PrettyPrinter::print(iter->second.first->value(), iter->second.first->type()) << std::endl;
RuntimeProfile::print_child_counters(prefix + " ", child_counter, counter_map, child_counter_map, s);
auto value = iter->second.first->value();
auto display_threshold = iter->second.first->display_threshold();
if (display_threshold > 0 && value > display_threshold) {
stream << prefix << " - " << iter->first << ": "
<< PrettyPrinter::print(iter->second.first->value(), iter->second.first->type()) << std::endl;
RuntimeProfile::print_child_counters(prefix + " ", child_counter, counter_map, child_counter_map, s);
}
}
}
}
Expand Down
22 changes: 16 additions & 6 deletions be/src/util/runtime_profile.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@

namespace starrocks {

inline unsigned long long operator"" _ms(unsigned long long x) {
return x * 1000 * 1000;
}

// Define macros for updating counters. The macros make it very easy to disable
// all counters at compile time. Set this to 0 to remove counters. This is useful
// to do to make sure the counters aren't affecting the system.
Expand All @@ -68,6 +72,8 @@ namespace starrocks {
#define ADD_TIMER(profile, name) (profile)->add_counter(name, TUnit::TIME_NS)
#define ADD_CHILD_COUNTER(profile, name, type, parent) (profile)->add_child_counter(name, type, parent)
#define ADD_CHILD_COUNTER_SKIP_MERGE(profile, name, type, parent) (profile)->add_child_counter(name, type, parent, true)
#define ADD_CHILD_TIMER_THESHOLD(profile, name, parent, threshold) \
(profile)->add_child_counter(name, TUnit::TIME_NS, parent, threshold)
#define ADD_CHILD_TIMER(profile, name, parent) (profile)->add_child_counter(name, TUnit::TIME_NS, parent)
#define SCOPED_TIMER(c) ScopedTimer<MonotonicStopWatch> MACRO_CONCAT(SCOPED_TIMER, __COUNTER__)(c)
#define CANCEL_SAFE_SCOPED_TIMER(c, is_cancelled) \
Expand Down Expand Up @@ -104,8 +110,8 @@ class RuntimeProfile {
public:
class Counter {
public:
explicit Counter(TUnit::type type, int64_t value = 0, bool skip_merge = false)
: _value(value), _type(type), _skip_merge(skip_merge){};
explicit Counter(TUnit::type type, int64_t value = 0, bool skip_merge = false, int64_t display_threshold = 0)
: _value(value), _type(type), _skip_merge(skip_merge), _display_threshold(display_threshold){};

virtual ~Counter() = default;

Expand All @@ -132,6 +138,8 @@ class RuntimeProfile {

bool skip_merge() const { return _skip_merge; }

int64_t display_threshold() const { return _display_threshold; }

private:
friend class RuntimeProfile;

Expand All @@ -143,6 +151,8 @@ class RuntimeProfile {
// its original value after merge. We can set the flag _skip_merge to true to skip the merge process
// of the counter.
const bool _skip_merge;

const int64_t _display_threshold;
};

class ConcurrentTimerCounter;
Expand Down Expand Up @@ -334,9 +344,9 @@ class RuntimeProfile {
// parent_name.
// If the counter already exists, the existing counter object is returned.
Counter* add_child_counter(const std::string& name, TUnit::type type, const std::string& parent_name,
bool skip_merge = false);
Counter* add_counter(const std::string& name, TUnit::type type, bool skip_merge = false) {
return add_child_counter(name, type, ROOT_COUNTER, skip_merge);
bool skip_merge = false, int64_t threshold = 0);
Counter* add_counter(const std::string& name, TUnit::type type, bool skip_merge = false, int64_t threshold = 0) {
return add_child_counter(name, type, ROOT_COUNTER, skip_merge, threshold);
}

// Add a derived counter with 'name'/'type'. The counter is owned by the
Expand Down Expand Up @@ -460,7 +470,7 @@ class RuntimeProfile {

void add_child_unlock(RuntimeProfile* child, bool indent, ChildVector::iterator pos);
Counter* add_counter_unlock(const std::string& name, TUnit::type type, const std::string& parent_name,
bool skip_merge);
bool skip_merge, int64_t theshold = 0);

RuntimeProfile* _parent;

Expand Down