From 996315fc8b72168273bb63a58cdff09b4f9ab49f Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Tue, 1 Oct 2024 14:01:14 +0200 Subject: [PATCH] Implement `FROM NAMED` (#1520) This continues work from #1445. QLever now supports SPARQL queries with `FROM NAMED` and `GRAPH` with a variable. --- src/engine/IndexScan.h | 4 + src/engine/QueryExecutionTree.h | 2 +- src/engine/QueryPlanner.cpp | 81 +++++++++++++------ src/engine/QueryPlanner.h | 1 + src/parser/GraphPattern.h | 1 - src/parser/GraphPatternOperation.h | 6 +- .../sparqlParser/SparqlQleverVisitor.cpp | 6 +- test/QueryPlannerTest.cpp | 29 ++++++- test/QueryPlannerTestHelpers.h | 22 +++-- test/SparqlAntlrParserTest.cpp | 7 +- test/SparqlAntlrParserTestHelpers.h | 12 +-- 11 files changed, 125 insertions(+), 46 deletions(-) diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index 029bdac0c1..d63f633f19 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -37,10 +37,14 @@ class IndexScan final : public Operation { ~IndexScan() override = default; + // Const getters for testing. const TripleComponent& predicate() const { return predicate_; } const TripleComponent& subject() const { return subject_; } const TripleComponent& object() const { return object_; } const auto& graphsToFilter() const { return graphsToFilter_; } + const std::vector& additionalVariables() const { + return additionalVariables_; + } const std::vector& additionalColumns() const { return additionalColumns_; diff --git a/src/engine/QueryExecutionTree.h b/src/engine/QueryExecutionTree.h index 7f7871f494..0519082b78 100644 --- a/src/engine/QueryExecutionTree.h +++ b/src/engine/QueryExecutionTree.h @@ -183,7 +183,7 @@ class QueryExecutionTree { // _____________________________________________________________ friend void PrintTo(const QueryExecutionTree& tree, std::ostream* os) { auto& s = *os; - s << nlohmann::ordered_json{tree.getRootOperation()->runtimeInfo()}.dump(2); + s << tree.getRootOperation()->getDescriptor(); } private: diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 74af655866..685ddb8a77 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -185,13 +185,6 @@ std::vector QueryPlanner::createExecutionTrees( checkCancellation(); - const auto& fromNamed = pq.datasetClauses_.namedGraphs_; - if (fromNamed.has_value()) { - AD_CORRECTNESS_CHECK(!fromNamed.value().empty()); - throw std::runtime_error( - "FROM NAMED clauses are not yet supported by QLever"); - } - return lastRow; } @@ -732,18 +725,32 @@ auto QueryPlanner::seedWithScansAndText( continue; } - auto addIndexScan = - [this, pushPlan, node, - &relevantGraphs = activeDatasetClauses_.defaultGraphs_]( - Permutation::Enum permutation, - std::optional triple = std::nullopt) { - if (!triple.has_value()) { - triple = node.triple_.getSimple(); - } + auto addIndexScan = [this, pushPlan, node, + &relevantGraphs = + activeDatasetClauses_.defaultGraphs_]( + Permutation::Enum permutation, + std::optional triple = + std::nullopt) { + if (!triple.has_value()) { + triple = node.triple_.getSimple(); + } + + // We are inside a `GRAPH ?var {...}` clause, so all index scans have + // to add the graph variable as an additional column. + auto& additionalColumns = triple.value().additionalScanColumns_; + AD_CORRECTNESS_CHECK(!ad_utility::contains( + additionalColumns | std::views::keys, ADDITIONAL_COLUMN_GRAPH_ID)); + if (activeGraphVariable_.has_value()) { + additionalColumns.emplace_back(ADDITIONAL_COLUMN_GRAPH_ID, + activeGraphVariable_.value()); + } - pushPlan(makeSubtreePlan( - _qec, permutation, std::move(triple.value()), relevantGraphs)); - }; + // TODO Handle the case, that the Graph variable is also used + // inside the `GRAPH` clause, e.g. by being used inside a triple. + + pushPlan(makeSubtreePlan( + _qec, permutation, std::move(triple.value()), relevantGraphs)); + }; auto addFilter = [&filters = result.filters_](SparqlFilter filter) { filters.push_back(std::move(filter)); @@ -2109,11 +2116,28 @@ void QueryPlanner::GraphPatternPlanner::graphPatternOperationVisitor(Arg& arg) { // default graphs while planning this clause, and reset them when leaving // the clause. std::optional datasetBackup; + std::optional graphVariableBackup = planner_.activeGraphVariable_; if constexpr (std::is_same_v) { - if (arg._graphIri.has_value()) { + if (std::holds_alternative(arg.graphSpec_)) { datasetBackup = planner_.activeDatasetClauses_; planner_.activeDatasetClauses_.defaultGraphs_.emplace( - {arg._graphIri.value()}); + {std::get(arg.graphSpec_)}); + } else if (std::holds_alternative(arg.graphSpec_)) { + const auto& graphVar = std::get(arg.graphSpec_); + if (checkUsePatternTrick::isVariableContainedInGraphPattern( + graphVar, arg._child, nullptr)) { + throw std::runtime_error( + "A variable that is used as the graph specifier of a `GRAPH ?var " + "{...}` clause may not appear in the body of that clause"); + } + datasetBackup = planner_.activeDatasetClauses_; + planner_.activeDatasetClauses_.defaultGraphs_ = + planner_.activeDatasetClauses_.namedGraphs_; + // We already have backed up the `activeGraphVariable_`. + planner_.activeGraphVariable_ = std::get(arg.graphSpec_); + } else { + AD_CORRECTNESS_CHECK( + std::holds_alternative(arg.graphSpec_)); } } @@ -2127,6 +2151,7 @@ void QueryPlanner::GraphPatternPlanner::graphPatternOperationVisitor(Arg& arg) { if (datasetBackup.has_value()) { planner_.activeDatasetClauses_ = std::move(datasetBackup.value()); } + planner_.activeGraphVariable_ = std::move(graphVariableBackup); } else if constexpr (std::is_same_v) { visitUnion(arg); } else if constexpr (std::is_same_v) { @@ -2170,8 +2195,8 @@ void QueryPlanner::GraphPatternPlanner::visitBasicGraphPattern( } } - // Then collect the triples. Transform each triple with a property path to an - // equivalent form without property path (using `seedFromPropertyPath`). + // Then collect the triples. Transform each triple with a property path to + // an equivalent form without property path (using `seedFromPropertyPath`). for (const auto& triple : v._triples) { if (triple.p_._operation == PropertyPath::Operation::IRI) { candidateTriples_._triples.push_back(triple); @@ -2283,8 +2308,18 @@ void QueryPlanner::GraphPatternPlanner::visitSubquery( // Make sure that variables that are not selected by the subquery are // not visible. auto setSelectedVariables = [&](SubtreePlan& plan) { + auto selectedVariables = arg.get().selectClause().getSelectedVariables(); + // TODO Use `optional::transform` + if (planner_.activeGraphVariable_.has_value()) { + const auto& graphVar = planner_.activeGraphVariable_.value(); + AD_CORRECTNESS_CHECK( + !ad_utility::contains(selectedVariables, graphVar), + "This case (variable of GRAPH ?var {...} appears also in the body) " + "should have thrown further up in the call stack"); + selectedVariables.push_back(graphVar); + } plan._qet->getRootOperation()->setSelectedVariablesForSubquery( - arg.get().selectClause().getSelectedVariables()); + selectedVariables); }; std::ranges::for_each(candidatesForSubquery, setSelectedVariables); // A subquery must also respect LIMIT and OFFSET clauses diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index 6e89a8eabc..c7c4428928 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -21,6 +21,7 @@ class QueryPlanner { using vector = std::vector; ParsedQuery::DatasetClauses activeDatasetClauses_; + std::optional activeGraphVariable_; public: explicit QueryPlanner(QueryExecutionContext* qec, diff --git a/src/parser/GraphPattern.h b/src/parser/GraphPattern.h index 6c16b3453c..48bd0c73c9 100644 --- a/src/parser/GraphPattern.h +++ b/src/parser/GraphPattern.h @@ -47,7 +47,6 @@ class GraphPattern { const std::string& languageInQuotes); bool _optional; - std::optional graphIri_; // Filters always apply to the complete GraphPattern, no matter where // they appear. For VALUES and Triples, the order matters, so they diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index c650069d70..df95f80e89 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -76,7 +76,11 @@ struct Values { /// `GraphPattern`. struct GroupGraphPattern { GraphPattern _child; - std::optional _graphIri = std::nullopt; + // If not `monostate`, then this group is a `GRAPH` clause, either with a + // fixed graph IRI, or with a variable. + using GraphSpec = + std::variant; + GraphSpec graphSpec_ = std::monostate{}; }; /// An `OPTIONAL` clause. diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 3f6258b0dd..4e4222a151 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -762,12 +762,14 @@ parsedQuery::Service Visitor::visit(Parser::ServiceGraphPatternContext* ctx) { parsedQuery::GraphPatternOperation Visitor::visit( Parser::GraphGraphPatternContext* ctx) { auto varOrIri = visit(ctx->varOrIri()); + auto group = visit(ctx->groupGraphPattern()); if (std::holds_alternative(varOrIri)) { - reportNotSupported(ctx, "GRAPH clauses with a variable are"); + const auto& graphVar = std::get(varOrIri); + addVisibleVariable(graphVar); + return parsedQuery::GroupGraphPattern{std::move(group), graphVar}; } AD_CORRECTNESS_CHECK(std::holds_alternative(varOrIri)); auto& iri = std::get(varOrIri); - auto group = visit(ctx->groupGraphPattern()); return parsedQuery::GroupGraphPattern{ std::move(group), TripleComponent::Iri::fromIriref(iri.toSparql())}; } diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 0607515718..76ce7a9ed7 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -1191,8 +1191,31 @@ TEST(QueryPlanner, DatasetClause) { scan("", "?p", "", {}, g2), scan("", "?p", "", {}, g2), scan("", "?p", "", {}, g1))); + auto g12 = Graphs{"", ""}; + auto varG = std::vector{Variable{"?g"}}; + std::vector graphCol{ADDITIONAL_COLUMN_GRAPH_ID}; + h::expect( + "SELECT * FROM FROM NAMED FROM NAMED WHERE { GRAPH ?g { " + " }}", + scan("", "", "", {}, g12, varG, graphCol)); + + h::expect("SELECT * FROM WHERE { GRAPH ?g { }}", + scan("", "", "", {}, std::nullopt, varG, graphCol)); + + // A complex example with graph variables. + h::expect( + "SELECT * FROM FROM NAMED { ?p . { ?p } GRAPH ?g " + "{ ?p " + "{SELECT * { ?p }}} ?p }", + h::UnorderedJoins(scan("", "?p", "", {}, g1), + scan("", "?p", "", {}, g1), + scan("", "?p", "", {}, g2, varG, graphCol), + scan("", "?p", "", {}, g2, varG, graphCol), + scan("", "?p", "", {}, g1))); + // We currently don't support repeating the graph variable inside the + // graph clause AD_EXPECT_THROW_WITH_MESSAGE( - h::expect("SELECT * FROM FROM NAMED WHERE { ?x ?y ?z}", - ::testing::_), - ::testing::HasSubstr("FROM NAMED clauses are not yet supported")); + h::expect("SELECT * { GRAPH ?x {?x }}", ::testing::_), + AllOf(HasSubstr("used as the graph specifier"), + HasSubstr("may not appear in the body"))); } diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index 4884f30dae..e8c770063a 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -83,11 +83,13 @@ constexpr auto IndexScan = [](TripleComponent subject, TripleComponent predicate, TripleComponent object, const std::vector& allowedPermutations = {}, - const ScanSpecificationAsTripleComponent::Graphs& graphs = - std::nullopt) -> QetMatcher { + const ScanSpecificationAsTripleComponent::Graphs& graphs = std::nullopt, + const std::vector& additionalVariables = {}, + const std::vector& additionalColumns = {}) -> QetMatcher { size_t numVariables = static_cast(subject.isVariable()) + static_cast(predicate.isVariable()) + - static_cast(object.isVariable()); + static_cast(object.isVariable()) + + additionalColumns.size(); auto permutationMatcher = allowedPermutations.empty() ? ::testing::A() : AnyOfArray(allowedPermutations); @@ -97,6 +99,10 @@ constexpr auto IndexScan = AD_PROPERTY(IndexScan, subject, Eq(subject)), AD_PROPERTY(IndexScan, predicate, Eq(predicate)), AD_PROPERTY(IndexScan, object, Eq(object)), + AD_PROPERTY(IndexScan, additionalVariables, + ElementsAreArray(additionalVariables)), + AD_PROPERTY(IndexScan, additionalColumns, + ElementsAreArray(additionalColumns)), AD_PROPERTY(IndexScan, graphsToFilter, Eq(graphs)))); }; @@ -193,7 +199,9 @@ inline auto IndexScanFromStrings = std::string_view object, const std::vector& allowedPermutations = {}, const std::optional> graphs = - std::nullopt) -> QetMatcher { + std::nullopt, + const std::vector& additionalVariables = {}, + const std::vector& additionalColumns = {}) -> QetMatcher { auto strToComp = [](std::string_view s) -> TripleComponent { if (s.starts_with("?")) { return ::Variable{std::string{s}}; @@ -211,7 +219,8 @@ inline auto IndexScanFromStrings = } } return IndexScan(strToComp(subject), strToComp(predicate), strToComp(object), - allowedPermutations, graphsOut); + allowedPermutations, graphsOut, additionalVariables, + additionalColumns); }; // For the following Join algorithms the order of the children is not important. @@ -231,11 +240,12 @@ inline auto UnorderedJoins = [](auto&&... children) -> QetMatcher { Vec& children, const auto& self) -> void { const Operation* operation = tree.getRootOperation().get(); auto join = dynamic_cast(operation); + auto multiColJoin = dynamic_cast(operation); // Also allow the INTERNAL SORT BY operations that are needed for the joins. // TODO is this the right place to also check that those have the // correct columns? auto sort = dynamic_cast(operation); - if (!join && !sort) { + if (!join && !sort && !multiColJoin) { children.push_back(tree); } else { for (const auto& child : operation->getChildren()) { diff --git a/test/SparqlAntlrParserTest.cpp b/test/SparqlAntlrParserTest.cpp index 30738d07b5..24f52c73a8 100644 --- a/test/SparqlAntlrParserTest.cpp +++ b/test/SparqlAntlrParserTest.cpp @@ -1010,9 +1010,10 @@ TEST(SparqlParser, GroupGraphPattern) { // SERVICE with a variable endpoint is not yet supported. expectGroupGraphPatternFails("{ SERVICE ?endpoint { ?s ?p ?o } }"); - // graphGraphPattern is currently only supported with a fixed graph IRI, not - // with a variable. - expectGroupGraphPatternFails("{ GRAPH ?a { } }"); + expectGraphPattern("{ GRAPH ?g { ?x }}", + m::GraphPattern(m::GroupGraphPatternWithGraph( + Variable("?g"), + m::Triples({{Var{"?x"}, "", iri("")}})))); expectGraphPattern( "{ GRAPH { ?x }}", m::GraphPattern(m::GroupGraphPatternWithGraph( diff --git a/test/SparqlAntlrParserTestHelpers.h b/test/SparqlAntlrParserTestHelpers.h index c69af1e365..d2c8a97def 100644 --- a/test/SparqlAntlrParserTestHelpers.h +++ b/test/SparqlAntlrParserTestHelpers.h @@ -676,11 +676,11 @@ inline auto Optional = inline auto Group = [](auto&& subMatcher, - std::optional graphIri = - std::nullopt) -> Matcher { + p::GroupGraphPattern::GraphSpec graphSpec = + std::monostate{}) -> Matcher { return detail::GraphPatternOperation(::testing::AllOf( AD_FIELD(p::GroupGraphPattern, _child, subMatcher), - AD_FIELD(p::GroupGraphPattern, _graphIri, ::testing::Eq(graphIri)))); + AD_FIELD(p::GroupGraphPattern, graphSpec_, ::testing::Eq(graphSpec)))); }; inline auto Union = @@ -764,9 +764,9 @@ inline auto GroupGraphPattern = [](vector&& filters, return Group(detail::GraphPattern(false, filters, childMatchers...)); }; -inline auto GroupGraphPatternWithGraph = [](vector&& filters, - const TripleComponent::Iri& graph, - const auto&... childMatchers) +inline auto GroupGraphPatternWithGraph = + [](vector&& filters, p::GroupGraphPattern::GraphSpec graph, + const auto&... childMatchers) -> Matcher { return Group(detail::GraphPattern(false, filters, childMatchers...), graph); };