Skip to content

Commit

Permalink
Implement FROM NAMED (#1520)
Browse files Browse the repository at this point in the history
This continues work from #1445. QLever now supports SPARQL queries with `FROM NAMED` and `GRAPH` with a variable.
  • Loading branch information
joka921 authored Oct 1, 2024
1 parent 18d102f commit 996315f
Show file tree
Hide file tree
Showing 11 changed files with 125 additions and 46 deletions.
4 changes: 4 additions & 0 deletions src/engine/IndexScan.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,14 @@ class IndexScan final : public Operation {

~IndexScan() override = default;

// Const getters for testing.
const TripleComponent& predicate() const { return predicate_; }
const TripleComponent& subject() const { return subject_; }
const TripleComponent& object() const { return object_; }
const auto& graphsToFilter() const { return graphsToFilter_; }
const std::vector<Variable>& additionalVariables() const {
return additionalVariables_;
}

const std::vector<ColumnIndex>& additionalColumns() const {
return additionalColumns_;
Expand Down
2 changes: 1 addition & 1 deletion src/engine/QueryExecutionTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ class QueryExecutionTree {
// _____________________________________________________________
friend void PrintTo(const QueryExecutionTree& tree, std::ostream* os) {
auto& s = *os;
s << nlohmann::ordered_json{tree.getRootOperation()->runtimeInfo()}.dump(2);
s << tree.getRootOperation()->getDescriptor();
}

private:
Expand Down
81 changes: 58 additions & 23 deletions src/engine/QueryPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,13 +185,6 @@ std::vector<QueryPlanner::SubtreePlan> QueryPlanner::createExecutionTrees(

checkCancellation();

const auto& fromNamed = pq.datasetClauses_.namedGraphs_;
if (fromNamed.has_value()) {
AD_CORRECTNESS_CHECK(!fromNamed.value().empty());
throw std::runtime_error(
"FROM NAMED clauses are not yet supported by QLever");
}

return lastRow;
}

Expand Down Expand Up @@ -732,18 +725,32 @@ auto QueryPlanner::seedWithScansAndText(
continue;
}

auto addIndexScan =
[this, pushPlan, node,
&relevantGraphs = activeDatasetClauses_.defaultGraphs_](
Permutation::Enum permutation,
std::optional<SparqlTripleSimple> triple = std::nullopt) {
if (!triple.has_value()) {
triple = node.triple_.getSimple();
}
auto addIndexScan = [this, pushPlan, node,
&relevantGraphs =
activeDatasetClauses_.defaultGraphs_](
Permutation::Enum permutation,
std::optional<SparqlTripleSimple> triple =
std::nullopt) {
if (!triple.has_value()) {
triple = node.triple_.getSimple();
}

// We are inside a `GRAPH ?var {...}` clause, so all index scans have
// to add the graph variable as an additional column.
auto& additionalColumns = triple.value().additionalScanColumns_;
AD_CORRECTNESS_CHECK(!ad_utility::contains(
additionalColumns | std::views::keys, ADDITIONAL_COLUMN_GRAPH_ID));
if (activeGraphVariable_.has_value()) {
additionalColumns.emplace_back(ADDITIONAL_COLUMN_GRAPH_ID,
activeGraphVariable_.value());
}

pushPlan(makeSubtreePlan<IndexScan>(
_qec, permutation, std::move(triple.value()), relevantGraphs));
};
// TODO<joka921> Handle the case, that the Graph variable is also used
// inside the `GRAPH` clause, e.g. by being used inside a triple.

pushPlan(makeSubtreePlan<IndexScan>(
_qec, permutation, std::move(triple.value()), relevantGraphs));
};

auto addFilter = [&filters = result.filters_](SparqlFilter filter) {
filters.push_back(std::move(filter));
Expand Down Expand Up @@ -2109,11 +2116,28 @@ void QueryPlanner::GraphPatternPlanner::graphPatternOperationVisitor(Arg& arg) {
// default graphs while planning this clause, and reset them when leaving
// the clause.
std::optional<ParsedQuery::DatasetClauses> datasetBackup;
std::optional<Variable> graphVariableBackup = planner_.activeGraphVariable_;
if constexpr (std::is_same_v<T, p::GroupGraphPattern>) {
if (arg._graphIri.has_value()) {
if (std::holds_alternative<TripleComponent::Iri>(arg.graphSpec_)) {
datasetBackup = planner_.activeDatasetClauses_;
planner_.activeDatasetClauses_.defaultGraphs_.emplace(
{arg._graphIri.value()});
{std::get<TripleComponent::Iri>(arg.graphSpec_)});
} else if (std::holds_alternative<Variable>(arg.graphSpec_)) {
const auto& graphVar = std::get<Variable>(arg.graphSpec_);
if (checkUsePatternTrick::isVariableContainedInGraphPattern(
graphVar, arg._child, nullptr)) {
throw std::runtime_error(
"A variable that is used as the graph specifier of a `GRAPH ?var "
"{...}` clause may not appear in the body of that clause");
}
datasetBackup = planner_.activeDatasetClauses_;
planner_.activeDatasetClauses_.defaultGraphs_ =
planner_.activeDatasetClauses_.namedGraphs_;
// We already have backed up the `activeGraphVariable_`.
planner_.activeGraphVariable_ = std::get<Variable>(arg.graphSpec_);
} else {
AD_CORRECTNESS_CHECK(
std::holds_alternative<std::monostate>(arg.graphSpec_));
}
}

Expand All @@ -2127,6 +2151,7 @@ void QueryPlanner::GraphPatternPlanner::graphPatternOperationVisitor(Arg& arg) {
if (datasetBackup.has_value()) {
planner_.activeDatasetClauses_ = std::move(datasetBackup.value());
}
planner_.activeGraphVariable_ = std::move(graphVariableBackup);
} else if constexpr (std::is_same_v<T, p::Union>) {
visitUnion(arg);
} else if constexpr (std::is_same_v<T, p::Subquery>) {
Expand Down Expand Up @@ -2170,8 +2195,8 @@ void QueryPlanner::GraphPatternPlanner::visitBasicGraphPattern(
}
}

// Then collect the triples. Transform each triple with a property path to an
// equivalent form without property path (using `seedFromPropertyPath`).
// Then collect the triples. Transform each triple with a property path to
// an equivalent form without property path (using `seedFromPropertyPath`).
for (const auto& triple : v._triples) {
if (triple.p_._operation == PropertyPath::Operation::IRI) {
candidateTriples_._triples.push_back(triple);
Expand Down Expand Up @@ -2283,8 +2308,18 @@ void QueryPlanner::GraphPatternPlanner::visitSubquery(
// Make sure that variables that are not selected by the subquery are
// not visible.
auto setSelectedVariables = [&](SubtreePlan& plan) {
auto selectedVariables = arg.get().selectClause().getSelectedVariables();
// TODO<C++23> Use `optional::transform`
if (planner_.activeGraphVariable_.has_value()) {
const auto& graphVar = planner_.activeGraphVariable_.value();
AD_CORRECTNESS_CHECK(
!ad_utility::contains(selectedVariables, graphVar),
"This case (variable of GRAPH ?var {...} appears also in the body) "
"should have thrown further up in the call stack");
selectedVariables.push_back(graphVar);
}
plan._qet->getRootOperation()->setSelectedVariablesForSubquery(
arg.get().selectClause().getSelectedVariables());
selectedVariables);
};
std::ranges::for_each(candidatesForSubquery, setSelectedVariables);
// A subquery must also respect LIMIT and OFFSET clauses
Expand Down
1 change: 1 addition & 0 deletions src/engine/QueryPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class QueryPlanner {
using vector = std::vector<T>;

ParsedQuery::DatasetClauses activeDatasetClauses_;
std::optional<Variable> activeGraphVariable_;

public:
explicit QueryPlanner(QueryExecutionContext* qec,
Expand Down
1 change: 0 additions & 1 deletion src/parser/GraphPattern.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ class GraphPattern {
const std::string& languageInQuotes);

bool _optional;
std::optional<TripleComponent::Iri> graphIri_;

// Filters always apply to the complete GraphPattern, no matter where
// they appear. For VALUES and Triples, the order matters, so they
Expand Down
6 changes: 5 additions & 1 deletion src/parser/GraphPatternOperation.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,11 @@ struct Values {
/// `GraphPattern`.
struct GroupGraphPattern {
GraphPattern _child;
std::optional<TripleComponent::Iri> _graphIri = std::nullopt;
// If not `monostate`, then this group is a `GRAPH` clause, either with a
// fixed graph IRI, or with a variable.
using GraphSpec =
std::variant<std::monostate, TripleComponent::Iri, Variable>;
GraphSpec graphSpec_ = std::monostate{};
};

/// An `OPTIONAL` clause.
Expand Down
6 changes: 4 additions & 2 deletions src/parser/sparqlParser/SparqlQleverVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -762,12 +762,14 @@ parsedQuery::Service Visitor::visit(Parser::ServiceGraphPatternContext* ctx) {
parsedQuery::GraphPatternOperation Visitor::visit(
Parser::GraphGraphPatternContext* ctx) {
auto varOrIri = visit(ctx->varOrIri());
auto group = visit(ctx->groupGraphPattern());
if (std::holds_alternative<Variable>(varOrIri)) {
reportNotSupported(ctx, "GRAPH clauses with a variable are");
const auto& graphVar = std::get<Variable>(varOrIri);
addVisibleVariable(graphVar);
return parsedQuery::GroupGraphPattern{std::move(group), graphVar};
}
AD_CORRECTNESS_CHECK(std::holds_alternative<Iri>(varOrIri));
auto& iri = std::get<Iri>(varOrIri);
auto group = visit(ctx->groupGraphPattern());
return parsedQuery::GroupGraphPattern{
std::move(group), TripleComponent::Iri::fromIriref(iri.toSparql())};
}
Expand Down
29 changes: 26 additions & 3 deletions test/QueryPlannerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1191,8 +1191,31 @@ TEST(QueryPlanner, DatasetClause) {
scan("<c>", "?p", "<z>", {}, g2), scan("<d>", "?p", "<z2>", {}, g2),
scan("<e>", "?p", "<z3>", {}, g1)));

auto g12 = Graphs{"<g1>", "<g2>"};
auto varG = std::vector{Variable{"?g"}};
std::vector<ColumnIndex> graphCol{ADDITIONAL_COLUMN_GRAPH_ID};
h::expect(
"SELECT * FROM <x> FROM NAMED <g1> FROM NAMED <g2> WHERE { GRAPH ?g {<a> "
"<b> <c>}}",
scan("<a>", "<b>", "<c>", {}, g12, varG, graphCol));

h::expect("SELECT * FROM <x> WHERE { GRAPH ?g {<a> <b> <c>}}",
scan("<a>", "<b>", "<c>", {}, std::nullopt, varG, graphCol));

// A complex example with graph variables.
h::expect(
"SELECT * FROM <g1> FROM NAMED <g2> { <a> ?p <x>. {<b> ?p <y>} GRAPH ?g "
"{ <c> ?p <z> "
"{SELECT * {<d> ?p <z2>}}} <e> ?p <z3> }",
h::UnorderedJoins(scan("<a>", "?p", "<x>", {}, g1),
scan("<b>", "?p", "<y>", {}, g1),
scan("<c>", "?p", "<z>", {}, g2, varG, graphCol),
scan("<d>", "?p", "<z2>", {}, g2, varG, graphCol),
scan("<e>", "?p", "<z3>", {}, g1)));
// We currently don't support repeating the graph variable inside the
// graph clause
AD_EXPECT_THROW_WITH_MESSAGE(
h::expect("SELECT * FROM <x> FROM NAMED <y> WHERE { ?x ?y ?z}",
::testing::_),
::testing::HasSubstr("FROM NAMED clauses are not yet supported"));
h::expect("SELECT * { GRAPH ?x {?x <b> <c>}}", ::testing::_),
AllOf(HasSubstr("used as the graph specifier"),
HasSubstr("may not appear in the body")));
}
22 changes: 16 additions & 6 deletions test/QueryPlannerTestHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,13 @@ constexpr auto IndexScan =
[](TripleComponent subject, TripleComponent predicate,
TripleComponent object,
const std::vector<Permutation::Enum>& allowedPermutations = {},
const ScanSpecificationAsTripleComponent::Graphs& graphs =
std::nullopt) -> QetMatcher {
const ScanSpecificationAsTripleComponent::Graphs& graphs = std::nullopt,
const std::vector<Variable>& additionalVariables = {},
const std::vector<ColumnIndex>& additionalColumns = {}) -> QetMatcher {
size_t numVariables = static_cast<size_t>(subject.isVariable()) +
static_cast<size_t>(predicate.isVariable()) +
static_cast<size_t>(object.isVariable());
static_cast<size_t>(object.isVariable()) +
additionalColumns.size();
auto permutationMatcher = allowedPermutations.empty()
? ::testing::A<Permutation::Enum>()
: AnyOfArray(allowedPermutations);
Expand All @@ -97,6 +99,10 @@ constexpr auto IndexScan =
AD_PROPERTY(IndexScan, subject, Eq(subject)),
AD_PROPERTY(IndexScan, predicate, Eq(predicate)),
AD_PROPERTY(IndexScan, object, Eq(object)),
AD_PROPERTY(IndexScan, additionalVariables,
ElementsAreArray(additionalVariables)),
AD_PROPERTY(IndexScan, additionalColumns,
ElementsAreArray(additionalColumns)),
AD_PROPERTY(IndexScan, graphsToFilter, Eq(graphs))));
};

Expand Down Expand Up @@ -193,7 +199,9 @@ inline auto IndexScanFromStrings =
std::string_view object,
const std::vector<Permutation::Enum>& allowedPermutations = {},
const std::optional<ad_utility::HashSet<std::string>> graphs =
std::nullopt) -> QetMatcher {
std::nullopt,
const std::vector<Variable>& additionalVariables = {},
const std::vector<ColumnIndex>& additionalColumns = {}) -> QetMatcher {
auto strToComp = [](std::string_view s) -> TripleComponent {
if (s.starts_with("?")) {
return ::Variable{std::string{s}};
Expand All @@ -211,7 +219,8 @@ inline auto IndexScanFromStrings =
}
}
return IndexScan(strToComp(subject), strToComp(predicate), strToComp(object),
allowedPermutations, graphsOut);
allowedPermutations, graphsOut, additionalVariables,
additionalColumns);
};

// For the following Join algorithms the order of the children is not important.
Expand All @@ -231,11 +240,12 @@ inline auto UnorderedJoins = [](auto&&... children) -> QetMatcher {
Vec& children, const auto& self) -> void {
const Operation* operation = tree.getRootOperation().get();
auto join = dynamic_cast<const ::Join*>(operation);
auto multiColJoin = dynamic_cast<const ::MultiColumnJoin*>(operation);
// Also allow the INTERNAL SORT BY operations that are needed for the joins.
// TODO<joka921> is this the right place to also check that those have the
// correct columns?
auto sort = dynamic_cast<const ::Sort*>(operation);
if (!join && !sort) {
if (!join && !sort && !multiColJoin) {
children.push_back(tree);
} else {
for (const auto& child : operation->getChildren()) {
Expand Down
7 changes: 4 additions & 3 deletions test/SparqlAntlrParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1010,9 +1010,10 @@ TEST(SparqlParser, GroupGraphPattern) {
// SERVICE with a variable endpoint is not yet supported.
expectGroupGraphPatternFails("{ SERVICE ?endpoint { ?s ?p ?o } }");

// graphGraphPattern is currently only supported with a fixed graph IRI, not
// with a variable.
expectGroupGraphPatternFails("{ GRAPH ?a { } }");
expectGraphPattern("{ GRAPH ?g { ?x <is-a> <Actor> }}",
m::GraphPattern(m::GroupGraphPatternWithGraph(
Variable("?g"),
m::Triples({{Var{"?x"}, "<is-a>", iri("<Actor>")}}))));
expectGraphPattern(
"{ GRAPH <foo> { ?x <is-a> <Actor> }}",
m::GraphPattern(m::GroupGraphPatternWithGraph(
Expand Down
12 changes: 6 additions & 6 deletions test/SparqlAntlrParserTestHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -676,11 +676,11 @@ inline auto Optional =

inline auto Group =
[](auto&& subMatcher,
std::optional<TripleComponent::Iri> graphIri =
std::nullopt) -> Matcher<const p::GraphPatternOperation&> {
p::GroupGraphPattern::GraphSpec graphSpec =
std::monostate{}) -> Matcher<const p::GraphPatternOperation&> {
return detail::GraphPatternOperation<p::GroupGraphPattern>(::testing::AllOf(
AD_FIELD(p::GroupGraphPattern, _child, subMatcher),
AD_FIELD(p::GroupGraphPattern, _graphIri, ::testing::Eq(graphIri))));
AD_FIELD(p::GroupGraphPattern, graphSpec_, ::testing::Eq(graphSpec))));
};

inline auto Union =
Expand Down Expand Up @@ -764,9 +764,9 @@ inline auto GroupGraphPattern = [](vector<std::string>&& filters,
return Group(detail::GraphPattern(false, filters, childMatchers...));
};

inline auto GroupGraphPatternWithGraph = [](vector<std::string>&& filters,
const TripleComponent::Iri& graph,
const auto&... childMatchers)
inline auto GroupGraphPatternWithGraph =
[](vector<std::string>&& filters, p::GroupGraphPattern::GraphSpec graph,
const auto&... childMatchers)
-> Matcher<const p::GraphPatternOperation&> {
return Group(detail::GraphPattern(false, filters, childMatchers...), graph);
};
Expand Down

0 comments on commit 996315f

Please sign in to comment.