Skip to content

Commit

Permalink
[bugfix](scannerscheduler) should minus num_of_scanners before check …
Browse files Browse the repository at this point in the history
…should schedule apache#28926 (apache#29331)



---------

Co-authored-by: yiguolei <yiguolei@gmail.com>
  • Loading branch information
yiguolei and Doris-Extras authored Jan 3, 2024
1 parent c84cd30 commit bd8113f
Show file tree
Hide file tree
Showing 19 changed files with 272 additions and 353 deletions.
3 changes: 3 additions & 0 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1158,6 +1158,9 @@ DEFINE_mBool(enable_column_type_check, "true");
// 128 MB
DEFINE_mInt64(local_exchange_buffer_mem_limit, "134217728");

// Default 300s, if its value <= 0, then log is disabled
DEFINE_mInt64(enable_debug_log_timeout_secs, "0");

// clang-format off
#ifdef BE_TEST
// test s3
Expand Down
2 changes: 2 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1232,6 +1232,8 @@ DECLARE_mInt32(variant_max_merged_tablet_schema_size);

DECLARE_mInt64(local_exchange_buffer_mem_limit);

DECLARE_mInt64(enable_debug_log_timeout_secs);

DECLARE_mBool(enable_column_type_check);

#ifdef BE_TEST
Expand Down
8 changes: 0 additions & 8 deletions be/src/common/status.h
Original file line number Diff line number Diff line change
Expand Up @@ -434,14 +434,6 @@ class [[nodiscard]] Status {

bool ok() const { return _code == ErrorCode::OK; }

// Convert into TStatus. Call this if 'status_container' contains an optional
// TStatus field named 'status'. This also sets __isset.status.
template <typename T>
void set_t_status(T* status_container) const {
to_thrift(&status_container->status);
status_container->__isset.status = true;
}

// Convert into TStatus.
void to_thrift(TStatus* status) const;
TStatus to_thrift() const;
Expand Down
4 changes: 4 additions & 0 deletions be/src/exec/exec_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,10 @@ class ExecNode {

size_t children_count() const { return _children.size(); }

// when the fragment is normal finished, call this method to do some finish work
// such as send the last buffer to remote.
virtual Status try_close(RuntimeState* state) { return Status::OK(); }

protected:
friend class DataSink;

Expand Down
64 changes: 20 additions & 44 deletions be/src/pipeline/exec/scan_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ bool ScanOperator::can_read() {
return false;
}
} else {
// If scanner meet any error, done == true
if (_node->_eos || _node->_scanner_ctx->done()) {
// _eos: need eos
// _scanner_ctx->done(): need finish
Expand All @@ -64,14 +65,6 @@ bool ScanOperator::can_read() {
}
}

bool ScanOperator::is_pending_finish() const {
return _node->_scanner_ctx && !_node->_scanner_ctx->no_schedule();
}

Status ScanOperator::try_close(RuntimeState* state) {
return _node->try_close(state);
}

bool ScanOperator::runtime_filters_are_ready_or_timeout() {
return _node->runtime_filters_are_ready_or_timeout();
}
Expand All @@ -81,9 +74,8 @@ std::string ScanOperator::debug_string() const {
fmt::format_to(debug_string_buffer, "{}, scanner_ctx is null: {} ",
SourceOperator::debug_string(), _node->_scanner_ctx == nullptr);
if (_node->_scanner_ctx) {
fmt::format_to(debug_string_buffer, ", num_running_scanners = {}, num_scheduling_ctx = {} ",
_node->_scanner_ctx->get_num_running_scanners(),
_node->_scanner_ctx->get_num_scheduling_ctx());
fmt::format_to(debug_string_buffer, ", num_running_scanners = {}",
_node->_scanner_ctx->debug_string());
}
return fmt::to_string(debug_string_buffer);
}
Expand All @@ -101,9 +93,6 @@ std::string ScanOperator::debug_string() const {
template <typename Derived>
ScanLocalState<Derived>::ScanLocalState(RuntimeState* state, OperatorXBase* parent)
: ScanLocalStateBase(state, parent) {
_finish_dependency = std::make_shared<FinishDependency>(
parent->operator_id(), parent->node_id(), parent->get_name() + "_FINISH_DEPENDENCY",
state->get_query_ctx());
_filter_dependency = std::make_shared<RuntimeFilterDependency>(
parent->operator_id(), parent->node_id(), parent->get_name() + "_FILTER_DEPENDENCY",
state->get_query_ctx());
Expand Down Expand Up @@ -175,7 +164,6 @@ Status ScanLocalState<Derived>::open(RuntimeState* state) {

auto status = _eos ? Status::OK() : _prepare_scanners();
if (_scanner_ctx) {
_finish_dependency->block();
DCHECK(!_eos && _num_scanners->value() > 0);
RETURN_IF_ERROR(_scanner_ctx->init());
RETURN_IF_ERROR(state->exec_env()->scanner_scheduler()->submit(_scanner_ctx));
Expand Down Expand Up @@ -569,15 +557,14 @@ std::string ScanLocalState<Derived>::debug_string(int indentation_level) const {
_eos.load());
if (_scanner_ctx) {
fmt::format_to(debug_string_buffer, "");
fmt::format_to(
debug_string_buffer,
", Scanner Context: (_is_finished = {}, _should_stop = {}, "
"_num_running_scanners={}, "
"_num_scheduling_ctx = {}, _num_unfinished_scanners = {}, status = {}, error = {})",
_scanner_ctx->is_finished(), _scanner_ctx->should_stop(),
_scanner_ctx->get_num_running_scanners(), _scanner_ctx->get_num_scheduling_ctx(),
_scanner_ctx->get_num_unfinished_scanners(), _scanner_ctx->status().to_string(),
_scanner_ctx->status_error());
fmt::format_to(debug_string_buffer,
", Scanner Context: (_is_finished = {}, _should_stop = {}, "
"_num_running_scanners={}, "
" _num_unfinished_scanners = {}, status = {})",
_scanner_ctx->is_finished(), _scanner_ctx->should_stop(),
_scanner_ctx->get_num_running_scanners(),
_scanner_ctx->get_num_unfinished_scanners(),
_scanner_ctx->status().to_string());
}

return fmt::to_string(debug_string_buffer);
Expand Down Expand Up @@ -1225,23 +1212,27 @@ template <typename Derived>
Status ScanLocalState<Derived>::_prepare_scanners() {
std::list<vectorized::VScannerSPtr> scanners;
RETURN_IF_ERROR(_init_scanners(&scanners));
// Init scanner wrapper
for (auto it = scanners.begin(); it != scanners.end(); ++it) {
_scanners.emplace_back(std::make_shared<vectorized::ScannerDelegate>(*it));
}
if (scanners.empty()) {
_eos = true;
_scan_dependency->set_ready();
} else {
COUNTER_SET(_num_scanners, static_cast<int64_t>(scanners.size()));
RETURN_IF_ERROR(_start_scanners(scanners));
RETURN_IF_ERROR(_start_scanners(_scanners));
}
return Status::OK();
}

template <typename Derived>
Status ScanLocalState<Derived>::_start_scanners(
const std::list<vectorized::VScannerSPtr>& scanners) {
const std::list<std::shared_ptr<vectorized::ScannerDelegate>>& scanners) {
auto& p = _parent->cast<typename Derived::Parent>();
_scanner_ctx = PipXScannerContext::create_shared(
state(), this, p._output_tuple_desc, p.output_row_descriptor(), scanners, p.limit(),
state()->scan_queue_mem_limit(), _scan_dependency, _finish_dependency);
state()->scan_queue_mem_limit(), _scan_dependency);
return Status::OK();
}

Expand Down Expand Up @@ -1317,9 +1308,6 @@ Status ScanLocalState<Derived>::_init_profile() {

_max_scanner_thread_num = ADD_COUNTER(_runtime_profile, "MaxScannerThreadNum", TUnit::UNIT);

_wait_for_finish_dependency_timer =
ADD_TIMER(_runtime_profile, "WaitForPendingFinishDependency");

return Status::OK();
}

Expand Down Expand Up @@ -1427,17 +1415,6 @@ Status ScanOperatorX<LocalStateType>::open(RuntimeState* state) {
return Status::OK();
}

template <typename LocalStateType>
Status ScanOperatorX<LocalStateType>::try_close(RuntimeState* state) {
auto& local_state = get_local_state(state);
if (local_state._scanner_ctx) {
// mark this scanner ctx as should_stop to make sure scanners will not be scheduled anymore
// TODO: there is a lock in `set_should_stop` may cause some slight impact
local_state._scanner_ctx->set_should_stop();
}
return Status::OK();
}

template <typename Derived>
Status ScanLocalState<Derived>::close(RuntimeState* state) {
if (_closed) {
Expand All @@ -1449,10 +1426,9 @@ Status ScanLocalState<Derived>::close(RuntimeState* state) {

SCOPED_TIMER(exec_time_counter());
if (_scanner_ctx) {
_scanner_ctx->clear_and_join(reinterpret_cast<ScanLocalStateBase*>(this), state);
_scanner_ctx->stop_scanners(state);
}
COUNTER_SET(_wait_for_dependency_timer, _scan_dependency->watcher_elapse_time());
COUNTER_SET(_wait_for_finish_dependency_timer, _finish_dependency->watcher_elapse_time());
COUNTER_SET(_wait_for_rf_timer, _filter_dependency->watcher_elapse_time());

return PipelineXLocalState<ScanDependency>::close(state);
Expand Down Expand Up @@ -1509,7 +1485,7 @@ Status ScanOperatorX<LocalStateType>::get_block(RuntimeState* state, vectorized:
if (eos) {
source_state = SourceState::FINISHED;
// reach limit, stop the scanners.
local_state._scanner_ctx->set_should_stop();
local_state._scanner_ctx->stop_scanners(state);
}

return Status::OK();
Expand Down
16 changes: 6 additions & 10 deletions be/src/pipeline/exec/scan_operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@
namespace doris {
class ExecNode;
} // namespace doris
namespace doris::vectorized {
class ScannerDelegate;
}

namespace doris::pipeline {
class PipScannerContext;
Expand All @@ -49,13 +52,9 @@ class ScanOperator : public SourceOperator<vectorized::VScanNode> {

bool can_read() override; // for source

bool is_pending_finish() const override;

bool runtime_filters_are_ready_or_timeout() override;

std::string debug_string() const override;

Status try_close(RuntimeState* state) override;
};

class ScanDependency final : public Dependency {
Expand Down Expand Up @@ -174,7 +173,6 @@ class ScanLocalStateBase : public PipelineXLocalState<ScanDependency>,
RuntimeProfile::Counter* _wait_for_scanner_done_timer = nullptr;
// time of prefilter input block from scanner
RuntimeProfile::Counter* _wait_for_eos_timer = nullptr;
RuntimeProfile::Counter* _wait_for_finish_dependency_timer = nullptr;
RuntimeProfile::Counter* _wait_for_rf_timer = nullptr;
};

Expand Down Expand Up @@ -215,7 +213,6 @@ class ScanLocalState : public ScanLocalStateBase {
int64_t get_push_down_count() override;

RuntimeFilterDependency* filterdependency() override { return _filter_dependency.get(); };
Dependency* finishdependency() override { return _finish_dependency.get(); }

protected:
template <typename LocalStateType>
Expand Down Expand Up @@ -351,7 +348,7 @@ class ScanLocalState : public ScanLocalStateBase {
Status _prepare_scanners();

// Submit the scanner to the thread pool and start execution
Status _start_scanners(const std::list<vectorized::VScannerSPtr>& scanners);
Status _start_scanners(const std::list<std::shared_ptr<vectorized::ScannerDelegate>>& scanners);

// For some conjunct there is chance to elimate cast operator
// Eg. Variant's sub column could eliminate cast in storage layer if
Expand Down Expand Up @@ -414,14 +411,13 @@ class ScanLocalState : public ScanLocalStateBase {

std::shared_ptr<RuntimeFilterDependency> _filter_dependency;

std::shared_ptr<Dependency> _finish_dependency;
// ScanLocalState owns the ownership of scanner, scanner context only has its weakptr
std::list<std::shared_ptr<vectorized::ScannerDelegate>> _scanners;
};

template <typename LocalStateType>
class ScanOperatorX : public OperatorX<LocalStateType> {
public:
Status try_close(RuntimeState* state) override;

Status init(const TPlanNode& tnode, RuntimeState* state) override;
Status prepare(RuntimeState* state) override { return OperatorXBase::prepare(state); }
Status open(RuntimeState* state) override;
Expand Down
29 changes: 28 additions & 1 deletion be/src/pipeline/pipeline_task.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <string>
#include <vector>

#include "common/config.h"
#include "common/status.h"
#include "exec/operator.h"
#include "pipeline.h"
Expand Down Expand Up @@ -264,6 +265,32 @@ class PipelineTask {
bool is_running() { return _running.load(); }
void set_running(bool running) { _running = running; }

bool is_exceed_debug_timeout() {
if (_has_exceed_timeout) {
return true;
}
// If enable_debug_log_timeout_secs <= 0, then disable the log
if (_pipeline_task_watcher.elapsed_time() >
config::enable_debug_log_timeout_secs * 1000l * 1000l * 1000l) {
_has_exceed_timeout = true;
return true;
}
return false;
}

void log_detail_if_need() {
if (config::enable_debug_log_timeout_secs < 1) {
return;
}
if (is_exceed_debug_timeout()) {
LOG(INFO) << "query id|instanceid " << print_id(_state->query_id()) << "|"
<< print_id(_state->fragment_instance_id())
<< " current pipeline exceed run time "
<< config::enable_debug_log_timeout_secs << " seconds. Task state "
<< get_state_name(get_state()) << debug_string();
}
}

protected:
void _finish_p_dependency() {
for (const auto& p : _pipeline->_parents) {
Expand All @@ -278,7 +305,7 @@ class PipelineTask {
uint32_t _index;
PipelinePtr _pipeline;
bool _dependency_finish = false;

bool _has_exceed_timeout = false;
bool _prepared;
bool _opened;
RuntimeState* _state = nullptr;
Expand Down
2 changes: 2 additions & 0 deletions be/src/pipeline/task_scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ void BlockedTaskScheduler::_schedule() {
while (iter != local_blocked_tasks.end()) {
auto* task = *iter;
auto state = task->get_state();
task->log_detail_if_need();
if (state == PipelineTaskState::PENDING_FINISH) {
// should cancel or should finish
if (task->is_pending_finish()) {
Expand Down Expand Up @@ -236,6 +237,7 @@ void TaskScheduler::_do_work(size_t index) {
static_cast<void>(_task_queue->push_back(task, index));
continue;
}
task->log_detail_if_need();
task->set_running(true);
task->set_task_queue(_task_queue.get());
auto* fragment_ctx = task->fragment_context();
Expand Down
2 changes: 1 addition & 1 deletion be/src/runtime/fragment_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ void FragmentMgr::coordinator_callback(const ReportStatusRequest& req) {
params.__set_backend_num(req.backend_num);
params.__set_fragment_instance_id(req.fragment_instance_id);
params.__set_fragment_id(req.fragment_id);
exec_status.set_t_status(&params);
params.__set_status(exec_status.to_thrift());
params.__set_done(req.done);
params.__set_query_type(req.runtime_state->query_type());
params.__set_finished_scan_ranges(req.runtime_state->num_finished_range());
Expand Down
2 changes: 1 addition & 1 deletion be/src/service/backend_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ void BackendService::exec_plan_fragment(TExecPlanFragmentResult& return_val,
const TExecPlanFragmentParams& params) {
LOG(INFO) << "exec_plan_fragment() instance_id=" << print_id(params.params.fragment_instance_id)
<< " coord=" << params.coord << " backend#=" << params.backend_num;
start_plan_fragment_execution(params).set_t_status(&return_val);
return_val.__set_status(start_plan_fragment_execution(params).to_thrift());
}

Status BackendService::start_plan_fragment_execution(const TExecPlanFragmentParams& exec_params) {
Expand Down
Loading

0 comments on commit bd8113f

Please sign in to comment.