Skip to content

Commit

Permalink
No pushing filters below projections that cast to a lower logical typ…
Browse files Browse the repository at this point in the history
…e id (duckdb#13617)

Fixes duckdb#12577

It was also important to realize that if the cast is to a higher logical
type, than the filter can be pushed down, since all values of the lower
logical type can always be cast to the higher logical type (i.e all INT
values can be cast to VARCHAR values).

The other way around, however, does not work, and when such a cast
occurs (i.e VARCHAR to INT) the filter cannot be pushed down.
  • Loading branch information
Mytherin authored Oct 28, 2024
2 parents baf4304 + 4b08ad3 commit b83a0be
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 1 deletion.
2 changes: 2 additions & 0 deletions src/include/duckdb/planner/expression.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@ class Expression : public BaseExpression {
bool HasSubquery() const override;
bool IsScalar() const override;
bool HasParameter() const override;

virtual bool IsVolatile() const;
virtual bool IsConsistent() const;
virtual bool PropagatesNullValues() const;
virtual bool IsFoldable() const;
virtual bool CanThrow() const;

hash_t Hash() const override;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ class BoundCastExpression : public Expression {

unique_ptr<Expression> Copy() const override;

bool CanThrow() const override;

void Serialize(Serializer &serializer) const override;
static unique_ptr<Expression> Deserialize(Deserializer &deserializer);

Expand Down
4 changes: 3 additions & 1 deletion src/optimizer/pushdown/pushdown_projection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include "duckdb/planner/expression_iterator.hpp"
#include "duckdb/planner/operator/logical_empty_result.hpp"
#include "duckdb/planner/operator/logical_projection.hpp"
#include "duckdb/planner/expression/bound_cast_expression.hpp"
#include "duckdb/common/types.hpp"

namespace duckdb {

Expand Down Expand Up @@ -51,7 +53,7 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownProjection(unique_ptr<Logica
auto &f = *filter;
D_ASSERT(f.bindings.size() <= 1);
bool is_volatile = IsVolatile(proj, f.filter);
if (is_volatile) {
if (is_volatile || f.filter->CanThrow()) {
// We can't push down related expressions if the column in the
// expression is generated by the functions which have side effects
remain_expressions.push_back(std::move(f.filter));
Expand Down
6 changes: 6 additions & 0 deletions src/planner/expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ bool Expression::IsConsistent() const {
return is_consistent;
}

bool Expression::CanThrow() const {
bool can_throw = false;
ExpressionIterator::EnumerateChildren(*this, [&](const Expression &child) { can_throw |= child.CanThrow(); });
return can_throw;
}

bool Expression::PropagatesNullValues() const {
if (type == ExpressionType::OPERATOR_IS_NULL || type == ExpressionType::OPERATOR_IS_NOT_NULL ||
type == ExpressionType::COMPARE_NOT_DISTINCT_FROM || type == ExpressionType::COMPARE_DISTINCT_FROM ||
Expand Down
12 changes: 12 additions & 0 deletions src/planner/expression/bound_cast_expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "duckdb/planner/expression/bound_default_expression.hpp"
#include "duckdb/planner/expression/bound_parameter_expression.hpp"
#include "duckdb/planner/expression/bound_constant_expression.hpp"
#include "duckdb/planner/expression_iterator.hpp"
#include "duckdb/function/cast_rules.hpp"
#include "duckdb/function/cast/cast_function_set.hpp"
#include "duckdb/main/config.hpp"
Expand Down Expand Up @@ -217,4 +218,15 @@ unique_ptr<Expression> BoundCastExpression::Copy() const {
return std::move(copy);
}

bool BoundCastExpression::CanThrow() const {
const auto child_type = child->return_type;
if (return_type.id() != child_type.id() &&
LogicalType::ForceMaxLogicalType(return_type, child_type) == child_type.id()) {
return true;
}
bool changes_type = false;
ExpressionIterator::EnumerateChildren(*this, [&](const Expression &child) { changes_type |= child.CanThrow(); });
return changes_type;
}

} // namespace duckdb
74 changes: 74 additions & 0 deletions test/sql/optimizer/test_no_pushdown_cast_into_cte.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# name: test/sql/optimizer/test_no_pushdown_cast_into_cte.test
# description: No Pushdown cast into cte
# group: [optimizer]

statement ok
pragma explain_output='optimized_only';

query II
WITH t(a, b) AS (
SELECT a :: int, b :: int
FROM (VALUES
('1', '4'),
('5', '3'),
('2', '*'),
('3', '8'),
('7', '*')) AS _(a, b)
WHERE position('*' in b) = 0
)
SELECT a, b
FROM t
WHERE a < b;
----
1 4
3 8


# check filter is above projection that casts the varchar to int
query II
EXPLAIN WITH t(a, b) AS (
SELECT a :: int, b :: int
FROM (VALUES
('1', '4'),
('5', '3'),
('2', '*'),
('3', '8'),
('7', '*')) AS _(a, b)
WHERE position('*' in b) = 0
)
SELECT a, b
FROM t
WHERE a < b;
----
logical_opt <REGEX>:.*FILTER.*CAST\(a AS INTEGER.*<.*b AS INTEGER\).*PROJECTION.*FILTER.*position.*


# INT can always be cast to varchar, so the filter a[1] = '1'
# can be pushed down
query II
with t(a, b) as (
select a :: varchar, b :: varchar
FROM VALUES
(1, 2),
(3, 3),
(5, 6),
(7, 6) as
_(a, b) where a <= b
) select a, b from t where a[1] = '1';
----
1 2


# we should not see two filters, since the filter can be pushed to just above the column data scan
query II
explain with t(a, b) as (
select a :: varchar, b :: varchar
FROM VALUES
(1, 2),
(3, 3),
(5, 6),
(7, 6) as
_(a, b) where a <= b
) select a, b from t where a[1] = '1';
----
logical_opt <!REGEX>:.*FILTER.*PROJECTION.*FILTER.*

0 comments on commit b83a0be

Please sign in to comment.