From b91f11d81975cce5ec3f083ad74ff1a8bc38bbdd Mon Sep 17 00:00:00 2001 From: Laurens Kuiper Date: Thu, 30 May 2024 16:02:43 +0200 Subject: [PATCH] polishing up the PR --- .../scan/physical_column_data_scan.cpp | 21 +++++++++++-------- .../duckdb/common/optionally_owned_ptr.hpp | 1 + src/optimizer/join_order/relation_manager.cpp | 2 ++ .../joins/delim_join_dont_explode.test_slow | 5 ++++- 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/execution/operator/scan/physical_column_data_scan.cpp b/src/execution/operator/scan/physical_column_data_scan.cpp index 82abc8d4b9a0..8c4b3e0e5085 100644 --- a/src/execution/operator/scan/physical_column_data_scan.cpp +++ b/src/execution/operator/scan/physical_column_data_scan.cpp @@ -19,11 +19,12 @@ PhysicalColumnDataScan::PhysicalColumnDataScan(vector types, Physic : PhysicalOperator(op_type, std::move(types), estimated_cardinality), collection(nullptr), cte_index(cte_index) { } -class PhysicalColumnGlobalDataScanState : public GlobalSourceState { +class PhysicalColumnDataGlobalScanState : public GlobalSourceState { public: - PhysicalColumnGlobalDataScanState(const ClientContext &context, const ColumnDataCollection &collection) - : max_threads(MaxValue( - context.config.verify_parallelism ? collection.ChunkCount() : collection.ChunkCount() / 60, 1)) { + PhysicalColumnDataGlobalScanState(const ClientContext &context, const ColumnDataCollection &collection) + : max_threads(MaxValue(context.config.verify_parallelism ? collection.ChunkCount() + : collection.ChunkCount() / CHUNKS_PER_THREAD, + 1)) { collection.InitializeScan(global_scan_state); } @@ -33,27 +34,29 @@ class PhysicalColumnGlobalDataScanState : public GlobalSourceState { public: ColumnDataParallelScanState global_scan_state; + + static constexpr idx_t CHUNKS_PER_THREAD = 32; const idx_t max_threads; }; -class PhysicalColumnLocalDataScanState : public LocalSourceState { +class PhysicalColumnDataLocalScanState : public LocalSourceState { public: ColumnDataLocalScanState local_scan_state; }; unique_ptr PhysicalColumnDataScan::GetGlobalSourceState(ClientContext &context) const { - return make_uniq(context, *collection); + return make_uniq(context, *collection); } unique_ptr PhysicalColumnDataScan::GetLocalSourceState(ExecutionContext &, GlobalSourceState &) const { - return make_uniq(); + return make_uniq(); } SourceResultType PhysicalColumnDataScan::GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const { - auto &gstate = input.global_state.Cast(); - auto &lstate = input.local_state.Cast(); + auto &gstate = input.global_state.Cast(); + auto &lstate = input.local_state.Cast(); collection->Scan(gstate.global_scan_state, lstate.local_scan_state, chunk); return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT; } diff --git a/src/include/duckdb/common/optionally_owned_ptr.hpp b/src/include/duckdb/common/optionally_owned_ptr.hpp index b7d9e7270f34..1f904abd96a4 100644 --- a/src/include/duckdb/common/optionally_owned_ptr.hpp +++ b/src/include/duckdb/common/optionally_owned_ptr.hpp @@ -9,6 +9,7 @@ #pragma once #include "duckdb/common/exception.hpp" +#include "duckdb/common/optional_ptr.hpp" #include "duckdb/common/unique_ptr.hpp" namespace duckdb { diff --git a/src/optimizer/join_order/relation_manager.cpp b/src/optimizer/join_order/relation_manager.cpp index a290e5f584be..daa6cc6168c7 100644 --- a/src/optimizer/join_order/relation_manager.cpp +++ b/src/optimizer/join_order/relation_manager.cpp @@ -42,6 +42,8 @@ void RelationManager::AddAggregateOrWindowRelation(LogicalOperator &op, optional } } relations.push_back(std::move(relation)); + op.estimated_cardinality = stats.cardinality; + op.has_estimated_cardinality = true; } void RelationManager::AddRelation(LogicalOperator &op, optional_ptr parent, diff --git a/test/optimizer/joins/delim_join_dont_explode.test_slow b/test/optimizer/joins/delim_join_dont_explode.test_slow index 238529ac8bd7..b9dcc2ff7708 100644 --- a/test/optimizer/joins/delim_join_dont_explode.test_slow +++ b/test/optimizer/joins/delim_join_dont_explode.test_slow @@ -44,6 +44,9 @@ exists( ---- 143 values hashing to dc5d1675d206057ccfe13739a38ee082 +# The query plan here used to join the two SEQ_SCANs first, and then join the DELIM_SCAN, +# Since PR #12290, we can reorder DELIM_SCANS +# Now the DELIM_SCAN is joined with a SEQ_SCAN first, and then with the SEQ_SCAN query II EXPLAIN SELECT * @@ -59,7 +62,7 @@ exists( ) order by bt.id ---- -physical_plan :.*HASH_JOIN.*DELIM_SCAN.*SEQ_SCAN.*SEQ_SCAN.* +physical_plan :.*HASH_JOIN.*SEQ_SCAN.*DELIM_SCAN.*SEQ_SCAN.*