Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 103 additions & 25 deletions src/fluree/db/flake/optimize.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -199,22 +199,89 @@
[]
where-clause))

(defn- pattern-vars
"Returns a set of variables referenced by `pattern`."
[pattern]
(where/clause-variables (where/pattern-data pattern)))

(defn- pattern->selectivity-meta
"Builds the metadata map used by the optimizer and explain.
Includes :vars for greedy join ordering."
[db stats pattern]
(let [{:keys [score inputs]} (calculate-selectivity-with-details db stats pattern)]
{:pattern pattern
:score (or score default-selectivity)
:inputs inputs
:vars (pattern-vars pattern)}))

(defn- pattern-meta-compare
"Total order for pattern-meta maps: (score, then stable tie-breaker)."
[{sa :score pa :pattern} {sb :score pb :pattern}]
(let [c (compare sa sb)]
(if (zero? c)
(compare-triples pa pb)
c)))

(defn- shares-var?
"Returns true if `pattern-meta` shares at least one variable with `bound-vars`."
[bound-vars {:keys [vars]}]
(boolean (some bound-vars vars)))

(defn- pick-best
"Returns the best element of `xs` by `cmp` (like (first (sort cmp xs)), but
without sorting)."
[cmp xs]
(reduce (fn [best x]
(if (neg? (cmp x best))
x
best))
xs))

(defn- remove-first
"Remove the first element from vector `v` matching `pred`."
[pred v]
(if-let [idx (first (keep-indexed (fn [i x]
(when (pred x) i))
v))]
(into (subvec v 0 idx) (subvec v (inc idx)))
v))

(defn- greedy-order
"Greedy join ordering:
- Prefer patterns that share vars with what is already bound (avoid cartesian explosions).
- Within that set, prefer the lowest selectivity score (then stable tie-breaker).

Note: `bound-vars` is best-effort; it represents vars already in scope from
previously executed patterns."
[cmp bound-vars pattern-metas]
(loop [bound (or bound-vars #{})
remaining pattern-metas
ordered []]
(if (empty? remaining)
ordered
(let [candidates (filterv (partial shares-var? bound) remaining)
pool (if (seq candidates) candidates remaining)
chosen (pick-best cmp pool)]
(recur (into bound (:vars chosen))
(remove-first #(= % chosen) remaining)
(conj ordered chosen))))))

(defn optimize-segment-with-metadata
"Optimize a single segment and return patterns with their scores and detailed inputs.
Returns vector of maps with :pattern, :score, and :inputs (for explain)."
[db stats patterns]
(let [with-details (mapv (fn [pattern]
(let [{:keys [score inputs]} (calculate-selectivity-with-details db stats pattern)]
{:pattern pattern
:score (or score default-selectivity)
:inputs inputs}))
patterns)
cmp (fn [{sa :score pa :pattern} {sb :score pb :pattern}]
(let [c (compare sa sb)]
(if (zero? c)
(compare-triples pa pb)
c)))]
(vec (sort cmp with-details))))
[db stats patterns bound-vars]
(let [pattern-metas (mapv (partial pattern->selectivity-meta db stats) patterns)]
(greedy-order pattern-meta-compare bound-vars pattern-metas)))

(defn- boundary-produced-vars
"Best-effort: vars that could be introduced into the solution *after* executing
a boundary pattern.

We keep this conservative. Filters, for example, don't introduce vars."
[pattern]
(case (where/pattern-type pattern)
(:bind :values :optional :union) (pattern-vars pattern)
#{}))

(defn optimize-patterns-with-metadata
"Reorder patterns for optimal execution and return rich metadata for explain.
Expand All @@ -229,18 +296,29 @@
[db where-clause]
(let [stats (:stats db)
segments (split-by-optimization-boundaries where-clause)
;; Process each segment and collect metadata
processed-segments
(mapv (fn [segment]
(if (= :optimizable (:type segment))
(let [patterns (:data segment)
optimized-with-meta (optimize-segment-with-metadata db stats patterns)]
{:type :optimizable
:original patterns
:optimized optimized-with-meta})
{:type :boundary
:pattern (:data segment)}))
segments)
;; Process each segment in order, carrying forward an approximate
;; 'vars-in-scope' set so later segments prefer patterns that join with
;; existing bindings.
[processed-segments _vars-in-scope]
(reduce
(fn [[processed vars-in-scope] segment]
(if (= :optimizable (:type segment))
(let [patterns (:data segment)
optimized-with-meta (optimize-segment-with-metadata db stats patterns vars-in-scope)
vars-in-scope' (into vars-in-scope (mapcat :vars optimized-with-meta))]
[(conj processed
{:type :optimizable
:original patterns
:optimized optimized-with-meta})
vars-in-scope'])
(let [pattern (:data segment)
vars-in-scope' (into vars-in-scope (boundary-produced-vars pattern))]
[(conj processed
{:type :boundary
:pattern pattern})
vars-in-scope'])))
[[] #{}]
segments)
;; Extract just the optimized patterns for the optimized clause
optimized-clause
(into []
Expand Down
Loading