-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix datatype of case expression #5734
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
|
||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
|
||
onlyif postgres | ||
statement ok | ||
CREATE TABLE aggregate_test_100_by_sql | ||
( | ||
c1 character varying NOT NULL, | ||
c2 smallint NOT NULL, | ||
c3 smallint NOT NULL, | ||
c4 smallint, | ||
c5 integer, | ||
c6 bigint NOT NULL, | ||
c7 smallint NOT NULL, | ||
c8 integer NOT NULL, | ||
c9 bigint NOT NULL, | ||
c10 character varying NOT NULL, | ||
c11 real NOT NULL, | ||
c12 double precision NOT NULL, | ||
c13 character varying NOT NULL | ||
); | ||
|
||
|
||
# Copy the data | ||
onlyif postgres | ||
statement ok | ||
COPY aggregate_test_100_by_sql | ||
FROM '../../testing/data/csv/aggregate_test_100.csv' | ||
DELIMITER ',' | ||
CSV HEADER; | ||
|
||
|
||
### | ||
## Setup test for datafusion | ||
### | ||
onlyif DataFusion | ||
statement ok | ||
CREATE EXTERNAL TABLE aggregate_test_100_by_sql ( | ||
c1 VARCHAR NOT NULL, | ||
c2 TINYINT NOT NULL, | ||
c3 SMALLINT NOT NULL, | ||
c4 SMALLINT, | ||
c5 INT, | ||
c6 BIGINT NOT NULL, | ||
c7 SMALLINT NOT NULL, | ||
c8 INT NOT NULL, | ||
c9 BIGINT UNSIGNED NOT NULL, | ||
c10 VARCHAR NOT NULL, | ||
c11 FLOAT NOT NULL, | ||
c12 DOUBLE NOT NULL, | ||
c13 VARCHAR NOT NULL | ||
) | ||
STORED AS CSV | ||
WITH HEADER ROW | ||
LOCATION '../../testing/data/csv/aggregate_test_100.csv' | ||
|
||
|
||
statement ok | ||
CREATE TABLE aggregate_test_100_nullable_by_sql AS | ||
SELECT | ||
*, | ||
CASE | ||
WHEN c4 % 3 = 0 THEN NULL | ||
ELSE c5 | ||
END AS n5, | ||
CASE | ||
WHEN c3 % 3 != 0 THEN c9 | ||
ELSE NULL | ||
END AS n9 | ||
FROM aggregate_test_100_by_sql | ||
|
||
|
||
query III | ||
SELECT | ||
COUNT(*), COUNT(n5), COUNT(n9) | ||
FROM aggregate_test_100_nullable_by_sql | ||
---- | ||
100 66 72 | ||
|
||
|
||
######## | ||
# Clean up after the test | ||
######## | ||
statement ok | ||
DROP TABLE aggregate_test_100_by_sql | ||
|
||
statement ok | ||
DROP TABLE aggregate_test_100_nullable_by_sql |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ use crate::expr::{ | |
}; | ||
use crate::field_util::get_indexed_field; | ||
use crate::type_coercion::binary::binary_operator_data_type; | ||
use crate::type_coercion::other::get_coerce_type_for_case_when; | ||
use crate::{aggregate_function, function, window_function}; | ||
use arrow::compute::can_cast_types; | ||
use arrow::datatypes::DataType; | ||
|
@@ -68,7 +69,26 @@ impl ExprSchemable for Expr { | |
Expr::OuterReferenceColumn(ty, _) => Ok(ty.clone()), | ||
Expr::ScalarVariable(ty, _) => Ok(ty.clone()), | ||
Expr::Literal(l) => Ok(l.get_datatype()), | ||
Expr::Case(case) => case.when_then_expr[0].1.get_type(schema), | ||
Expr::Case(case) => { | ||
// when #5681 will be fixed, this code can be reverted to: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about moving TypeCoersion to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree that it's a duck-taping. I'm afraid the mentioned issue won't be resolved quickly by cut'n'paste... (see #5681 (comment) ) So the question is - should we expose our users to the bug 🧐 until #5681 is fixed? (putting Datafusion's reputation at risk?) What is more, this PR features sqlogictest ⭐️, so we won't have a regression caused by the fix #5681. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree it would be best to have this code in Analyzer -- but if we can't get it in there quickly we can merge this PR as is. Thank you for the test case @mslapek Thank you for leaving the link to #5733 in the code Do you know why the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The type coercion is implemented as an optimizer - and it changes schemas of particular But Essentially this PR makes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
// case.when_then_expr[0].1.get_type(schema) | ||
let then_types = case | ||
.when_then_expr | ||
.iter() | ||
.map(|when_then| when_then.1.get_type(schema)) | ||
.collect::<Result<Vec<_>>>()?; | ||
let else_type = match &case.else_expr { | ||
None => Ok(None), | ||
Some(expr) => expr.get_type(schema).map(Some), | ||
}?; | ||
get_coerce_type_for_case_when(&then_types, else_type.as_ref()).ok_or_else( | ||
|| { | ||
DataFusionError::Internal(String::from( | ||
"Cannot infer type for CASE statement", | ||
)) | ||
}, | ||
) | ||
} | ||
Expr::Cast(Cast { data_type, .. }) | ||
| Expr::TryCast(TryCast { data_type, .. }) => Ok(data_type.clone()), | ||
Expr::ScalarUDF { fun, args } => { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Slightly concerned if this test is actually needed, the PR is to fix schema mismatch on case statement
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The test involves CASE statement, and it was crashing before the fix (the CREATE TABLE one).
The file itself might be useful for other null-related bugfixes.
Nevertheless there could exist better ways to test this... Suggestions welcome!