Skip to content

Support Utf8View to numeric coercion #14377

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions datafusion/expr-common/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -684,8 +684,10 @@ fn string_numeric_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<D
match (lhs_type, rhs_type) {
(Utf8, _) if rhs_type.is_numeric() => Some(Utf8),
(LargeUtf8, _) if rhs_type.is_numeric() => Some(LargeUtf8),
(Utf8View, _) if rhs_type.is_numeric() => Some(Utf8View),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the only code change

(_, Utf8) if lhs_type.is_numeric() => Some(Utf8),
(_, LargeUtf8) if lhs_type.is_numeric() => Some(LargeUtf8),
(_, Utf8View) if lhs_type.is_numeric() => Some(Utf8View),
_ => None,
}
}
Expand Down
12 changes: 12 additions & 0 deletions datafusion/sqllogictest/test_files/string/dictionary_utf8.slt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@ statement ok
create table test_substr as
select arrow_cast(col1, 'Dictionary(Int32, Utf8)') as c1 from test_substr_base;

statement ok
create table test_datetime as
select
arrow_cast(column1, 'Dictionary(Int32, Utf8)') as ts,
arrow_cast(column2, 'Dictionary(Int32, Utf8)') as d,
arrow_cast(column3, 'Dictionary(Int32, Utf8)') as t
from test_datetime_base;


statement ok
drop table test_source

Expand All @@ -56,3 +65,6 @@ drop table test_basic_operator;

statement ok
drop table test_substr_base;

statement ok
drop table test_datetime_base;
11 changes: 11 additions & 0 deletions datafusion/sqllogictest/test_files/string/init_data.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,14 @@ statement ok
create table test_substr_base (
col1 VARCHAR
) as values ('foo'), ('hello🌏世界'), ('💩'), ('ThisIsAVeryLongASCIIString'), (''), (NULL);


# --------------------------------------
# Setup test tables with date/time values to test coercion
# --------------------------------------
statement ok
create table test_datetime_base as values
('2024-08-09T12:13:14', '2024-08-09', '12:13:14'),
('2024-08-09T12:13:15', '2024-09-09', '12:14:14'),
(NULL, NULL, NULL)
;
12 changes: 12 additions & 0 deletions datafusion/sqllogictest/test_files/string/large_string.slt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@ statement ok
create table test_substr as
select arrow_cast(col1, 'LargeUtf8') as c1 from test_substr_base;

statement ok
create table test_datetime as
select
arrow_cast(column1, 'LargeUtf8') as ts,
arrow_cast(column2, 'LargeUtf8') as d,
arrow_cast(column3, 'LargeUtf8') as t
from test_datetime_base;


# select
query TTTT
SELECT ascii_1, ascii_2, unicode_1, unicode_2 FROM test_basic_operator
Expand Down Expand Up @@ -64,3 +73,6 @@ drop table test_basic_operator;

statement ok
drop table test_substr_base;

statement ok
drop table test_datetime_base;
10 changes: 10 additions & 0 deletions datafusion/sqllogictest/test_files/string/string.slt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ statement ok
create table test_substr as
select arrow_cast(col1, 'Utf8') as c1 from test_substr_base;

statement ok
create table test_datetime as
select
arrow_cast(column1, 'Utf8') as ts,
arrow_cast(column2, 'Utf8') as d,
arrow_cast(column3, 'Utf8') as t
from test_datetime_base;


#
Expand Down Expand Up @@ -186,3 +193,6 @@ drop table test_basic_operator;

statement ok
drop table test_substr;

statement ok
drop table test_datetime;
47 changes: 47 additions & 0 deletions datafusion/sqllogictest/test_files/string/string_query.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
# with standard values, but different types in string columns
# (String, StringView, etc.)

# --------------------------------------
# Show the input data
# --------------------------------------

# select
query TTTT
SELECT ascii_1, ascii_2, unicode_1, unicode_2 FROM test_basic_operator
Expand All @@ -35,6 +39,49 @@ _ \_ (empty) (empty)
NULL % NULL NULL
NULL R NULL 🔥

# --------------------------------------
# test type coercion (compare to int)
# queries should not error
# --------------------------------------

query BB
select ascii_1 = 1 as col1, 1 = ascii_1 as col2 from test_basic_operator;
----
false false
false false
false false
false false
false false
false false
false false
false false
false false
NULL NULL
NULL NULL

query BB
select ascii_1 <> 1 as col1, 1 <> ascii_1 as col2 from test_basic_operator;
----
true true
true true
true true
true true
true true
true true
true true
true true
true true
NULL NULL
NULL NULL

# Coercion to date/time
query BBB
select ts = '2024-08-09T12:13:14'::timestamp, d = '2024-08-08'::date, t = '12:13:14'::time from test_datetime;
----
true false true
false false false
NULL NULL NULL

# --------------------------------------
# column comparison as filters
# --------------------------------------
Expand Down
11 changes: 11 additions & 0 deletions datafusion/sqllogictest/test_files/string/string_view.slt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ statement ok
create table test_substr as
select arrow_cast(col1, 'Utf8View') as c1 from test_substr_base;

statement ok
create table test_datetime as
select
arrow_cast(column1, 'Utf8View') as ts,
arrow_cast(column2, 'Utf8View') as d,
arrow_cast(column3, 'Utf8View') as t
from test_datetime_base;

statement ok
drop table test_source

Expand All @@ -51,6 +59,9 @@ drop table test_basic_operator;
statement ok
drop table test_substr_base;

statement ok
drop table test_datetime_base;


# --------------------------------------
# String_view specific tests
Expand Down