Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pipeline: support join without spill #7333

Merged
merged 20 commits into from
Apr 23, 2023
21 changes: 14 additions & 7 deletions dbms/src/Flash/Pipeline/Pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <Flash/Planner/PhysicalPlanNode.h>
#include <Flash/Planner/Plans/PhysicalGetResultSink.h>
#include <Flash/Statistics/traverseExecutors.h>
#include <Interpreters/Settings.h>
#include <tipb/select.pb.h>

namespace DB
Expand Down Expand Up @@ -104,7 +105,7 @@ void PipelineEvents::mapInputs(const PipelineEvents & inputs)
* ```
*/

// If the outputs is fine grained model, the intputs must also be.
// If the outputs is fine grained mode, the intputs must also be.
RUNTIME_CHECK(inputs.is_fine_grained || !is_fine_grained);
for (const auto & output : events)
{
Expand All @@ -116,6 +117,10 @@ void PipelineEvents::mapInputs(const PipelineEvents & inputs)

void Pipeline::addPlanNode(const PhysicalPlanNodePtr & plan_node)
{
assert(plan_node);
/// For fine grained mode, all plan node should enable fine grained shuffle.
if (!plan_node->getFineGrainedShuffle().enable())
is_fine_grained_mode = false;
plan_nodes.push_back(plan_node);
}

Expand Down Expand Up @@ -188,9 +193,7 @@ PipelineExecGroup Pipeline::buildExecGroup(PipelineExecutorStatus & exec_status,
*/
bool Pipeline::isFineGrainedMode() const
{
assert(!plan_nodes.empty());
// The source plan node determines whether the execution mode is fine grained or non-fine grained.
return plan_nodes.front()->getFineGrainedShuffle().enable();
return is_fine_grained_mode;
}

Events Pipeline::toEvents(PipelineExecutorStatus & status, Context & context, size_t concurrency)
Expand All @@ -211,12 +214,12 @@ PipelineEvents Pipeline::toSelfEvents(PipelineExecutorStatus & status, Context &
auto fine_grained_exec_group = buildExecGroup(status, context, concurrency);
for (auto & pipeline_exec : fine_grained_exec_group)
self_events.push_back(std::make_shared<FineGrainedPipelineEvent>(status, memory_tracker, log->identifier(), std::move(pipeline_exec)));
LOG_DEBUG(log, "Execute in fine grained model and generate {} fine grained pipeline event", self_events.size());
LOG_DEBUG(log, "Execute in fine grained mode and generate {} fine grained pipeline event", self_events.size());
}
else
{
self_events.push_back(std::make_shared<PlainPipelineEvent>(status, memory_tracker, log->identifier(), context, shared_from_this(), concurrency));
LOG_DEBUG(log, "Execute in non fine grained model and generate one plain pipeline event");
LOG_DEBUG(log, "Execute in non fine grained mode and generate one plain pipeline event");
}
return {std::move(self_events), isFineGrainedMode()};
}
Expand All @@ -230,7 +233,7 @@ PipelineEvents Pipeline::doToEvents(PipelineExecutorStatus & status, Context & c
return self_events;
}

bool Pipeline::isSupported(const tipb::DAGRequest & dag_request)
bool Pipeline::isSupported(const tipb::DAGRequest & dag_request, const Settings & settings)
{
bool is_supported = true;
traverseExecutors(
Expand All @@ -253,6 +256,10 @@ bool Pipeline::isSupported(const tipb::DAGRequest & dag_request)
// TODO support non fine grained shuffle.
is_supported = FineGrainedShuffle(&executor).enable();
return is_supported;
case tipb::ExecType::TypeJoin:
// TODO support spill.
is_supported = (settings.max_bytes_before_external_join == 0);
return is_supported;
default:
is_supported = false;
return false;
Expand Down
6 changes: 5 additions & 1 deletion dbms/src/Flash/Pipeline/Pipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class DAGRequest;

namespace DB
{
struct Settings;

class Pipeline;
using PipelinePtr = std::shared_ptr<Pipeline>;
using Pipelines = std::vector<PipelinePtr>;
Expand Down Expand Up @@ -72,7 +74,7 @@ class Pipeline : public std::enable_shared_from_this<Pipeline>

Events toEvents(PipelineExecutorStatus & status, Context & context, size_t concurrency);

static bool isSupported(const tipb::DAGRequest & dag_request);
static bool isSupported(const tipb::DAGRequest & dag_request, const Settings & settings);

Block getSampleBlock() const;

Expand All @@ -88,6 +90,8 @@ class Pipeline : public std::enable_shared_from_this<Pipeline>
const UInt32 id;
LoggerPtr log;

bool is_fine_grained_mode = true;

// data flow: plan_nodes.begin() --> plan_nodes.end()
std::deque<PhysicalPlanNodePtr> plan_nodes;

Expand Down
4 changes: 3 additions & 1 deletion dbms/src/Flash/Planner/PlanType.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ struct PlanType
AggregationBuild = 14,
AggregationConvergent = 15,
Expand = 16,
GetResult = 17
JoinBuild = 17,
JoinProbe = 18,
GetResult = 19,
};
PlanTypeEnum enum_value;

Expand Down
25 changes: 20 additions & 5 deletions dbms/src/Flash/Planner/Plans/PhysicalJoin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
#include <Flash/Planner/FinalizeHelper.h>
#include <Flash/Planner/PhysicalPlanHelper.h>
#include <Flash/Planner/Plans/PhysicalJoin.h>
#include <Flash/Planner/Plans/PhysicalJoinBuild.h>
#include <Flash/Planner/Plans/PhysicalJoinProbe.h>
#include <Interpreters/Context.h>
#include <common/logger_useful.h>
#include <fmt/format.h>
Expand Down Expand Up @@ -251,16 +253,29 @@ void PhysicalJoin::buildPipeline(
PipelineExecutorStatus & exec_status)
{
// Break the pipeline for join build.
// FIXME: Should be newly created PhysicalJoinBuild.
auto join_build_builder = builder.breakPipeline(shared_from_this());
auto join_build = std::make_shared<PhysicalJoinBuild>(
executor_id,
build()->getSchema(),
fine_grained_shuffle,
log->identifier(),
build(),
join_ptr,
build_side_prepare_actions);
auto join_build_builder = builder.breakPipeline(join_build);
// Join build pipeline.
build()->buildPipeline(join_build_builder, context, exec_status);
join_build_builder.build();

// Join probe pipeline.
probe()->buildPipeline(builder, context, exec_status);
// FIXME: Should be newly created PhysicalJoinProbe.
builder.addPlanNode(shared_from_this());
throw Exception("Unsupport");
auto join_probe = std::make_shared<PhysicalJoinProbe>(
executor_id,
schema,
log->identifier(),
probe(),
join_ptr,
probe_side_prepare_actions);
builder.addPlanNode(join_probe);
}

void PhysicalJoin::finalize(const Names & parent_require)
Expand Down
37 changes: 37 additions & 0 deletions dbms/src/Flash/Planner/Plans/PhysicalJoinBuild.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright 2023 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Flash/Coprocessor/InterpreterUtils.h>
#include <Flash/Pipeline/Exec/PipelineExecBuilder.h>
#include <Flash/Planner/Plans/PhysicalJoinBuild.h>
#include <Operators/HashJoinBuildSink.h>

namespace DB
{
void PhysicalJoinBuild::buildPipelineExecGroup(
PipelineExecutorStatus & exec_status,
PipelineExecGroupBuilder & group_builder,
Context & /*context*/,
size_t /*concurrency*/)
{
executeExpression(exec_status, group_builder, prepare_actions, log);

size_t build_index = 0;
group_builder.transform([&](auto & builder) {
builder.setSinkOp(std::make_unique<HashJoinBuildSink>(exec_status, log->identifier(), join_ptr, build_index++));
});
join_ptr->initBuild(group_builder.getCurrentHeader(), group_builder.concurrency);
join_ptr->setInitActiveBuildThreads();
}
} // namespace DB
53 changes: 53 additions & 0 deletions dbms/src/Flash/Planner/Plans/PhysicalJoinBuild.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright 2023 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <Flash/Planner/Plans/PhysicalUnary.h>
#include <Flash/Planner/Plans/PipelineBreakerHelper.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/Join.h>

namespace DB
{
class PhysicalJoinBuild : public PhysicalUnary
{
public:
PhysicalJoinBuild(
const String & executor_id_,
const NamesAndTypes & schema_,
const FineGrainedShuffle & fine_grained_shuffle_,
const String & req_id,
const PhysicalPlanNodePtr & child_,
const JoinPtr & join_ptr_,
const ExpressionActionsPtr & prepare_actions_)
: PhysicalUnary(executor_id_, PlanType::JoinBuild, schema_, fine_grained_shuffle_, req_id, child_)
, join_ptr(join_ptr_)
, prepare_actions(prepare_actions_)
{}

void buildPipelineExecGroup(
PipelineExecutorStatus & exec_status,
PipelineExecGroupBuilder & group_builder,
Context & /*context*/,
size_t /*concurrency*/) override;

private:
DISABLE_USELESS_FUNCTION_FOR_BREAKER

private:
JoinPtr join_ptr;
ExpressionActionsPtr prepare_actions;
};
} // namespace DB
58 changes: 58 additions & 0 deletions dbms/src/Flash/Planner/Plans/PhysicalJoinProbe.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Copyright 2023 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Flash/Coprocessor/InterpreterUtils.h>
#include <Flash/Pipeline/Exec/PipelineExecBuilder.h>
#include <Flash/Planner/Plans/PhysicalJoinProbe.h>
#include <Interpreters/Context.h>
#include <Operators/HashJoinProbeTransformOp.h>

namespace DB
{
void PhysicalJoinProbe::buildPipelineExecGroup(
PipelineExecutorStatus & exec_status,
PipelineExecGroupBuilder & group_builder,
Context & context,
size_t /*concurrency*/)
{
executeExpression(exec_status, group_builder, prepare_actions, log);

auto input_header = group_builder.getCurrentHeader();
join_ptr->initProbe(input_header, group_builder.concurrency);
size_t probe_index = 0;
const auto & max_block_size = context.getSettingsRef().max_block_size;
group_builder.transform([&](auto & builder) {
builder.appendTransformOp(std::make_unique<HashJoinProbeTransformOp>(
exec_status,
log->identifier(),
join_ptr,
probe_index++,
max_block_size,
input_header));
});

/// add a project to remove all the useless column
ExpressionActionsPtr schema_project = std::make_shared<ExpressionActions>(group_builder.getCurrentHeader().getColumnsWithTypeAndName());
NamesWithAliases schema_project_cols;
for (auto & c : schema)
{
/// do not need to care about duplicated column names because
/// it is guaranteed by its children physical plan nodes
schema_project_cols.emplace_back(c.name, c.name);
}
assert(!schema_project_cols.empty());
schema_project->add(ExpressionAction::project(schema_project_cols));
executeExpression(exec_status, group_builder, schema_project, log);
}
} // namespace DB
52 changes: 52 additions & 0 deletions dbms/src/Flash/Planner/Plans/PhysicalJoinProbe.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright 2023 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <Flash/Planner/Plans/PhysicalUnary.h>
#include <Flash/Planner/Plans/PipelineBreakerHelper.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/Join.h>

namespace DB
{
class PhysicalJoinProbe : public PhysicalUnary
{
public:
PhysicalJoinProbe(
const String & executor_id_,
const NamesAndTypes & schema_,
const String & req_id,
const PhysicalPlanNodePtr & child_,
const JoinPtr & join_ptr_,
const ExpressionActionsPtr & prepare_actions_)
: PhysicalUnary(executor_id_, PlanType::JoinProbe, schema_, FineGrainedShuffle{}, req_id, child_)
, join_ptr(join_ptr_)
, prepare_actions(prepare_actions_)
{}

void buildPipelineExecGroup(
PipelineExecutorStatus & exec_status,
PipelineExecGroupBuilder & group_builder,
Context & context,
size_t /*concurrency*/) override;

private:
DISABLE_USELESS_FUNCTION_FOR_BREAKER

private:
JoinPtr join_ptr;
ExpressionActionsPtr prepare_actions;
};
} // namespace DB
2 changes: 1 addition & 1 deletion dbms/src/Flash/executeQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ std::optional<QueryExecutorPtr> executeAsPipeline(Context & context, bool intern
const auto & logger = dag_context.log;
RUNTIME_ASSERT(logger);

if (!TaskScheduler::instance || !Pipeline::isSupported(*dag_context.dag_request))
if (!TaskScheduler::instance || !Pipeline::isSupported(*dag_context.dag_request, context.getSettingsRef()))
{
LOG_DEBUG(logger, "Can't run by pipeline model, fallback to block inputstream model");
return {};
Expand Down
Loading