Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multiway fusion #1289

Merged
merged 1 commit into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions python/benchmark/preprocess_tantivy_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env python3

# This script extracts queries from the tantivy queries file:
# https://github.com/quickwit-oss/search-benchmark-game/blob/master/queries.txt

import sys
import json


def process_queries(tantivy_queries_file, output_queries_file):
queries = []
for line in open(tantivy_queries_file, "r"):
obj = json.loads(line)
if "union" in obj["tags"]:
queries.append(obj["query"])
sorted_queries = sorted(queries)
with open(output_queries_file, "w") as file:
for query in sorted_queries:
file.write(query + "\n")


if __name__ == "__main__":
if len(sys.argv) != 3:
print(
"Usage: python preprocess_tantivy_queries <tantivy_queries_file> <output_queries_file>"
)
sys.exit(1)

tantivy_queries_file = sys.argv[1]
output_queries_file = sys.argv[2]
process_queries(tantivy_queries_file, output_queries_file)
13 changes: 13 additions & 0 deletions src/executor/fragment_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import physical_sink;
import physical_source;
import physical_explain;
import physical_knn_scan;
import physical_fusion;
import status;
import infinity_exception;

Expand Down Expand Up @@ -244,6 +245,18 @@ void FragmentBuilder::BuildFragments(PhysicalOperator *phys_op, PlanFragment *cu
BuildFragments(phys_op->right(), next_plan_fragment.get());
current_fragment_ptr->AddChild(std::move(next_plan_fragment));
}
if (phys_op->operator_type() == PhysicalOperatorType::kFusion) {
PhysicalFusion *phys_fusion = static_cast<PhysicalFusion *>(phys_op);
for (auto &child_op : phys_fusion->other_children_) {
auto next_plan_fragment = MakeUnique<PlanFragment>(GetFragmentId());
next_plan_fragment->SetSinkNode(query_context_ptr_,
SinkType::kLocalQueue,
child_op->GetOutputNames(),
child_op->GetOutputTypes());
BuildFragments(child_op.get(), next_plan_fragment.get());
current_fragment_ptr->AddChild(std::move(next_plan_fragment));
}
}
return;
}
case PhysicalOperatorType::kUnionAll:
Expand Down
7 changes: 4 additions & 3 deletions src/executor/operator/physical_fusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,11 @@ PhysicalFusion::PhysicalFusion(const u64 id,
SharedPtr<BaseTableRef> base_table_ref,
UniquePtr<PhysicalOperator> left,
UniquePtr<PhysicalOperator> right,
Vector<UniquePtr<PhysicalOperator>> other_children,
SharedPtr<FusionExpression> fusion_expr,
SharedPtr<Vector<LoadMeta>> load_metas)
: PhysicalOperator(PhysicalOperatorType::kFusion, std::move(left), std::move(right), id, load_metas), base_table_ref_(std::move(base_table_ref)),
fusion_expr_(std::move(fusion_expr)) {}
: PhysicalOperator(PhysicalOperatorType::kFusion, std::move(left), std::move(right), id, load_metas), other_children_(std::move(other_children)),
base_table_ref_(std::move(base_table_ref)), fusion_expr_(std::move(fusion_expr)) {}

PhysicalFusion::~PhysicalFusion() {}

Expand Down Expand Up @@ -117,7 +118,7 @@ void PhysicalFusion::ExecuteRRF(const Map<u64, Vector<UniquePtr<DataBlock>>> &in

Vector<RRFRankDoc> rrf_vec;
Map<RowID, SizeT> rrf_map; // row_id to index of rrf_vec_
Vector<u64> fragment_ids;
Vector<u64> fragment_ids; // index of children, 0 - left, 1 - right, 2.. - other_children
SizeT fragment_idx = 0;
// 1 calculate every doc's ranks
for (const auto &[fragment_id, input_blocks] : input_data_blocks) {
Expand Down
2 changes: 2 additions & 0 deletions src/executor/operator/physical_fusion.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ public:
SharedPtr<BaseTableRef> base_table_ref,
UniquePtr<PhysicalOperator> left,
UniquePtr<PhysicalOperator> right,
Vector<UniquePtr<PhysicalOperator>> other_children,
SharedPtr<FusionExpression> fusion_expr,
SharedPtr<Vector<LoadMeta>> load_metas);
~PhysicalFusion() override;
Expand All @@ -64,6 +65,7 @@ public:

String ToString(i64 &space) const;

Vector<UniquePtr<PhysicalOperator>> other_children_{};
SharedPtr<BaseTableRef> base_table_ref_{};
SharedPtr<FusionExpression> fusion_expr_;

Expand Down
7 changes: 7 additions & 0 deletions src/executor/physical_planner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -978,16 +978,23 @@ UniquePtr<PhysicalOperator> PhysicalPlanner::BuildMatchSparseScan(const SharedPt
UniquePtr<PhysicalOperator> PhysicalPlanner::BuildFusion(const SharedPtr<LogicalNode> &logical_operator) const {
const auto logical_fusion = static_pointer_cast<LogicalFusion>(logical_operator);
UniquePtr<PhysicalOperator> left_phy = nullptr, right_phy = nullptr;
Vector<UniquePtr<PhysicalOperator>> other_children;
if (const auto &left_logical_node = logical_operator->left_node(); left_logical_node.get() != nullptr) {
left_phy = BuildPhysicalOperator(left_logical_node);
}
if (const auto right_logical_node = logical_operator->right_node(); right_logical_node.get() != nullptr) {
right_phy = BuildPhysicalOperator(right_logical_node);
}
SizeT num_other_children = logical_fusion->other_children_.size();
for (SizeT i = 0; i < num_other_children; i++) {
UniquePtr<PhysicalOperator> child_phy = BuildPhysicalOperator(logical_fusion->other_children_[i]);
other_children.push_back(std::move(child_phy));
}
return MakeUnique<PhysicalFusion>(logical_fusion->node_id(),
logical_fusion->base_table_ref_,
std::move(left_phy),
std::move(right_phy),
std::move(other_children),
logical_fusion->fusion_expr_,
logical_operator->load_metas());
}
Expand Down
12 changes: 7 additions & 5 deletions src/planner/bound_select_statement.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,10 +155,6 @@ SharedPtr<LogicalNode> BoundSelectStatement::BuildPlan(QueryContext *query_conte
String error_message = "SEARCH shall have at least one MATCH TEXT or MATCH VECTOR or MATCH TENSOR expression or MATCH SPARSE expression";
LOG_CRITICAL(error_message);
UnrecoverableError(error_message);
} else if (num_children >= 3) {
String error_message = "SEARCH shall have at max two MATCH TEXT or MATCH VECTOR expression";
LOG_CRITICAL(error_message);
UnrecoverableError(error_message);
}
if (table_ref_ptr_->type() != TableRefType::kTable) {
String error_message = "Not base table reference";
Expand Down Expand Up @@ -198,8 +194,14 @@ SharedPtr<LogicalNode> BoundSelectStatement::BuildPlan(QueryContext *query_conte
if (!(search_expr_->fusion_exprs_.empty())) {
auto firstfusionNode = MakeShared<LogicalFusion>(bind_context->GetNewLogicalNodeId(), base_table_ref, search_expr_->fusion_exprs_[0]);
firstfusionNode->set_left_node(match_knn_nodes[0]);
if (match_knn_nodes.size() > 1)
if (match_knn_nodes.size() > 1) {
firstfusionNode->set_right_node(match_knn_nodes[1]);
if (match_knn_nodes.size() > 2) {
for (SizeT i = 2; i < match_knn_nodes.size(); i++) {
firstfusionNode->other_children_.push_back(std::move(match_knn_nodes[i]));
}
}
}
root = std::move(firstfusionNode);
// extra fusion nodes
for (u32 i = 1; i < search_expr_->fusion_exprs_.size(); ++i) {
Expand Down
1 change: 1 addition & 0 deletions src/planner/node/logical_fusion.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public:

inline String name() final { return "LogicalFusion"; }

Vector<SharedPtr<LogicalNode>> other_children_{};
SharedPtr<BaseTableRef> base_table_ref_{};
SharedPtr<FusionExpression> fusion_expr_{};
};
Expand Down
10 changes: 10 additions & 0 deletions test/sql/dql/fusion.slt
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,16 @@ SELECT num FROM enwiki_embedding SEARCH MATCH TEXT ('body^5', 'harmful chemical'
2123
2

query I
SELECT num FROM enwiki_embedding SEARCH MATCH TEXT ('body^5', 'harmful chemical', 'topn=3'), MATCH VECTOR (vec, [0.0, 0.0, 0.0, 0.0], 'float', 'l2', 3), MATCH VECTOR (vec, [0.0, 0.0, 0.0, 0.0], 'float', 'l2', 3), FUSION('rrf');
----
0
1
2
6989
9893
2123

# Clean up
statement ok
DROP TABLE enwiki_embedding;
Expand Down
Loading