Skip to content

Commit

Permalink
[Metrics] Add metrics to monitor BE's agent task queue size (apache#5648
Browse files Browse the repository at this point in the history
)

* [Metrics] Add metrics to monitor BE's agent task queue size

Sometimes, user's DDL or background task may last a long time,
it's not easy to find out which procedure has problem.
This patch add metric to monitor BE's agent task queue size,
which would be helpful for troubleshooting.

The raw metrics on BE looks like:
doris_be_agent_task_queue_size{type="REPORT_OLAP_TABLE"} 0
doris_be_agent_task_queue_size{type="REPORT_DISK_STATE"} 0
doris_be_agent_task_queue_size{type="REPORT_TASK"} 0
doris_be_agent_task_queue_size{type="CHECK_CONSISTENCY"} 0
doris_be_agent_task_queue_size{type="DELETE"} 0
doris_be_agent_task_queue_size{type="CLEAR_TRANSACTION_TASK"} 0
doris_be_agent_task_queue_size{type="PUBLISH_VERSION"} 0
doris_be_agent_task_queue_size{type="UPLOAD"} 0
doris_be_agent_task_queue_size{type="DROP_TABLE"} 0
doris_be_agent_task_queue_size{type="CREATE_TABLE"} 39
doris_be_agent_task_queue_size{type="RELEASE_SNAPSHOT"} 0
doris_be_agent_task_queue_size{type="STORAGE_MEDIUM_MIGRATE"} 245
doris_be_agent_task_queue_size{type="CLONE"} 0
doris_be_agent_task_queue_size{type="MOVE"} 0
doris_be_agent_task_queue_size{type="ALTER_TABLE"} 0
doris_be_agent_task_queue_size{type="DOWNLOAD"} 0
doris_be_agent_task_queue_size{type="PUSH"} 0
doris_be_agent_task_queue_size{type="UPDATE_TABLET_META_INFO"} 0
doris_be_agent_task_queue_size{type="MAKE_SNAPSHOT"} 0

* fix typo
  • Loading branch information
acelyc111 authored Apr 21, 2021
1 parent ab64dbe commit d15fe05
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 11 deletions.
18 changes: 16 additions & 2 deletions be/src/agent/task_worker_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ using std::vector;

namespace doris {

DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(agent_task_queue_size, MetricUnit::NOUNIT);

const uint32_t TASK_FINISH_MAX_RETRY = 3;
const uint32_t PUBLISH_VERSION_MAX_RETRY = 3;
const uint32_t REPORT_TASK_WORKER_COUNT = 1;
Expand All @@ -81,8 +83,7 @@ FrontendServiceClientCache TaskWorkerPool::_master_service_client_cache;

TaskWorkerPool::TaskWorkerPool(const TaskWorkerType task_worker_type, ExecEnv* env,
const TMasterInfo& master_info)
: _name(strings::Substitute("TaskWorkerPool.$0", TYPE_STRING(task_worker_type))),
_master_info(master_info),
: _master_info(master_info),
_agent_utils(new AgentUtils()),
_master_client(new MasterServerClient(_master_info, &_master_service_client_cache)),
_env(env),
Expand All @@ -93,11 +94,24 @@ TaskWorkerPool::TaskWorkerPool(const TaskWorkerType task_worker_type, ExecEnv* e
_backend.__set_host(BackendOptions::get_localhost());
_backend.__set_be_port(config::be_port);
_backend.__set_http_port(config::webserver_port);

string task_worker_type_name = TYPE_STRING(task_worker_type);
_name = strings::Substitute("TaskWorkerPool.$0", task_worker_type_name);

_metric_entity = DorisMetrics::instance()->metric_registry()->register_entity(
task_worker_type_name, {{"type", task_worker_type_name}});
REGISTER_ENTITY_HOOK_METRIC(_metric_entity, this, agent_task_queue_size, [this]() {
lock_guard<Mutex> lock(_worker_thread_lock);
return _tasks.size();
});
}

TaskWorkerPool::~TaskWorkerPool() {
_stop_background_threads_latch.count_down();
stop();

DEREGISTER_ENTITY_HOOK_METRIC(_metric_entity, agent_task_queue_size);
DorisMetrics::instance()->metric_registry()->deregister_entity(_metric_entity);
}

void TaskWorkerPool::start() {
Expand Down
3 changes: 3 additions & 0 deletions be/src/agent/task_worker_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,9 @@ class TaskWorkerPool {
std::unique_ptr<ThreadPool> _thread_pool;
std::deque<TAgentTaskRequest> _tasks;

std::shared_ptr<MetricEntity> _metric_entity;
UIntGauge* agent_task_queue_size;

uint32_t _worker_count;
TaskWorkerType _task_worker_type;

Expand Down
21 changes: 12 additions & 9 deletions be/src/util/doris_metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,19 @@

namespace doris {

#define REGISTER_ENTITY_HOOK_METRIC(entity, owner, metric, func) \
owner->metric = (UIntGauge*)(entity->register_metric<UIntGauge>(&METRIC_##metric)); \
entity->register_hook(#metric, [&]() { owner->metric->set_value(func()); });

#define REGISTER_HOOK_METRIC(metric, func) \
DorisMetrics::instance()->metric = \
(UIntGauge*)(DorisMetrics::instance()->server_entity()->register_metric<UIntGauge>( \
&METRIC_##metric)); \
DorisMetrics::instance()->server_entity()->register_hook( \
#metric, [&]() { DorisMetrics::instance()->metric->set_value(func()); });

#define DEREGISTER_HOOK_METRIC(name) \
DorisMetrics::instance()->server_entity()->deregister_metric(&METRIC_##name); \
DorisMetrics::instance()->server_entity()->deregister_hook(#name);
REGISTER_ENTITY_HOOK_METRIC(DorisMetrics::instance()->server_entity(), DorisMetrics::instance(), metric, func)

#define DEREGISTER_ENTITY_HOOK_METRIC(entity, name) \
entity->deregister_metric(&METRIC_##name); \
entity->deregister_hook(#name);

#define DEREGISTER_HOOK_METRIC(name) \
DEREGISTER_ENTITY_HOOK_METRIC(DorisMetrics::instance()->server_entity(), name)

class DorisMetrics {
public:
Expand Down

0 comments on commit d15fe05

Please sign in to comment.