Skip to content

Commit

Permalink
update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Oct 16, 2024
1 parent 124f0c7 commit 97b686a
Showing 1 changed file with 84 additions and 45 deletions.
129 changes: 84 additions & 45 deletions datafusion/sqllogictest/test_files/parquet.slt
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,13 @@ DROP TABLE listing_table;
## Create a table with a binary column

query I
COPY (SELECT arrow_cast(string_col, 'Binary') as binary_col FROM src_table)
COPY (
SELECT
arrow_cast(string_col, 'Binary') as binary_col,
arrow_cast(string_col, 'LargeBinary') as largebinary_col,
arrow_cast(string_col, 'BinaryView') as binaryview_col
FROM src_table
)
TO 'test_files/scratch/parquet/binary_as_string.parquet'
STORED AS PARQUET;
----
Expand All @@ -371,75 +377,95 @@ CREATE EXTERNAL TABLE binary_as_string_default
STORED AS PARQUET LOCATION 'test_files/scratch/parquet/binary_as_string.parquet'

# NB the data is read and displayed as binary
query T?
select arrow_typeof(binary_col), binary_col from binary_as_string_default;
query T?T?T?
select
arrow_typeof(binary_col), binary_col,
arrow_typeof(largebinary_col), largebinary_col,
arrow_typeof(binaryview_col), binaryview_col
FROM binary_as_string_default;
----
Binary 616161
Binary 626262
Binary 636363
Binary 646464
Binary 656565
Binary 666666
Binary 676767
Binary 686868
Binary 696969

# Run an explain plan to show the cast happens in the plan (a CAST is needed for the predicate)
Binary 616161 Binary 616161 Binary 616161
Binary 626262 Binary 626262 Binary 626262
Binary 636363 Binary 636363 Binary 636363
Binary 646464 Binary 646464 Binary 646464
Binary 656565 Binary 656565 Binary 656565
Binary 666666 Binary 666666 Binary 666666
Binary 676767 Binary 676767 Binary 676767
Binary 686868 Binary 686868 Binary 686868
Binary 696969 Binary 696969 Binary 696969

# Run an explain plan to show the cast happens in the plan (a CAST is needed for the predicates)
query TT
EXPLAIN SELECT binary_col FROM binary_as_string_default WHERE binary_col LIKE '%a%';
EXPLAIN
SELECT binary_col, largebinary_col, binaryview_col
FROM binary_as_string_default
WHERE
binary_col LIKE '%a%' AND
largebinary_col LIKE '%a%' AND
binaryview_col LIKE '%a%';
----
logical_plan
01)Filter: CAST(binary_as_string_default.binary_col AS Utf8) LIKE Utf8("%a%")
02)--TableScan: binary_as_string_default projection=[binary_col], partial_filters=[CAST(binary_as_string_default.binary_col AS Utf8) LIKE Utf8("%a%")]
01)Filter: CAST(binary_as_string_default.binary_col AS Utf8) LIKE Utf8("%a%") AND CAST(binary_as_string_default.largebinary_col AS Utf8) LIKE Utf8("%a%") AND CAST(binary_as_string_default.binaryview_col AS Utf8) LIKE Utf8("%a%")
02)--TableScan: binary_as_string_default projection=[binary_col, largebinary_col, binaryview_col], partial_filters=[CAST(binary_as_string_default.binary_col AS Utf8) LIKE Utf8("%a%"), CAST(binary_as_string_default.largebinary_col AS Utf8) LIKE Utf8("%a%"), CAST(binary_as_string_default.binaryview_col AS Utf8) LIKE Utf8("%a%")]
physical_plan
01)CoalesceBatchesExec: target_batch_size=8192
02)--FilterExec: CAST(binary_col@0 AS Utf8) LIKE %a%
02)--FilterExec: CAST(binary_col@0 AS Utf8) LIKE %a% AND CAST(largebinary_col@1 AS Utf8) LIKE %a% AND CAST(binaryview_col@2 AS Utf8) LIKE %a%
03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
04)------ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col], predicate=CAST(binary_col@0 AS Utf8) LIKE %a%
04)------ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col, largebinary_col, binaryview_col], predicate=CAST(binary_col@0 AS Utf8) LIKE %a% AND CAST(largebinary_col@1 AS Utf8) LIKE %a% AND CAST(binaryview_col@2 AS Utf8) LIKE %a%


statement ok
DROP TABLE binary_as_string_default;

## Test 2: Read table with binary_as_string option
## Test 2: Read table using the binary_as_string option

statement ok
CREATE EXTERNAL TABLE binary_as_string_option
STORED AS PARQUET LOCATION 'test_files/scratch/parquet/binary_as_string.parquet'
OPTIONS ('binary_as_string' 'true');

# NB the data is read and displayed as string
query TT
select arrow_typeof(binary_col), binary_col from binary_as_string_option;
query TTTTTT
select
arrow_typeof(binary_col), binary_col,
arrow_typeof(largebinary_col), largebinary_col,
arrow_typeof(binaryview_col), binaryview_col
FROM binary_as_string_option;
----
Utf8 aaa
Utf8 bbb
Utf8 ccc
Utf8 ddd
Utf8 eee
Utf8 fff
Utf8 ggg
Utf8 hhh
Utf8 iii
Utf8 aaa Utf8 aaa Utf8 aaa
Utf8 bbb Utf8 bbb Utf8 bbb
Utf8 ccc Utf8 ccc Utf8 ccc
Utf8 ddd Utf8 ddd Utf8 ddd
Utf8 eee Utf8 eee Utf8 eee
Utf8 fff Utf8 fff Utf8 fff
Utf8 ggg Utf8 ggg Utf8 ggg
Utf8 hhh Utf8 hhh Utf8 hhh
Utf8 iii Utf8 iii Utf8 iii

# Run an explain plan to show the cast happens in the plan (there should be no casts)
query TT
EXPLAIN SELECT binary_col FROM binary_as_string_option WHERE binary_col LIKE '%a%';
EXPLAIN
SELECT binary_col, largebinary_col, binaryview_col
FROM binary_as_string_option
WHERE
binary_col LIKE '%a%' AND
largebinary_col LIKE '%a%' AND
binaryview_col LIKE '%a%';
----
logical_plan
01)Filter: binary_as_string_option.binary_col LIKE Utf8("%a%")
02)--TableScan: binary_as_string_option projection=[binary_col], partial_filters=[binary_as_string_option.binary_col LIKE Utf8("%a%")]
01)Filter: binary_as_string_option.binary_col LIKE Utf8("%a%") AND binary_as_string_option.largebinary_col LIKE Utf8("%a%") AND binary_as_string_option.binaryview_col LIKE Utf8("%a%")
02)--TableScan: binary_as_string_option projection=[binary_col, largebinary_col, binaryview_col], partial_filters=[binary_as_string_option.binary_col LIKE Utf8("%a%"), binary_as_string_option.largebinary_col LIKE Utf8("%a%"), binary_as_string_option.binaryview_col LIKE Utf8("%a%")]
physical_plan
01)CoalesceBatchesExec: target_batch_size=8192
02)--FilterExec: binary_col@0 LIKE %a%
02)--FilterExec: binary_col@0 LIKE %a% AND largebinary_col@1 LIKE %a% AND binaryview_col@2 LIKE %a%
03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
04)------ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col], predicate=binary_col@0 LIKE %a%
04)------ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col, largebinary_col, binaryview_col], predicate=binary_col@0 LIKE %a% AND largebinary_col@1 LIKE %a% AND binaryview_col@2 LIKE %a%


statement ok
DROP TABLE binary_as_string_option;

## Test 3: Read table with binary_as_string option and schema_force_view_types
## Test 3: Read table with binary_as_string option AND schema_force_view_types

statement ok
CREATE EXTERNAL TABLE binary_as_string_both
Expand All @@ -450,9 +476,13 @@ OPTIONS (
);

# NB the data is read and displayed a StringView
query TT
select arrow_typeof(binary_col), binary_col from binary_as_string_both;
----
query error DataFusion error: SQL error: ParserError\("Expected: an SQL statement, found: Utf8View"\)
select
arrow_typeof(binary_col), binary_col,
arrow_typeof(largebinary_col), largebinary_col,
arrow_typeof(binaryview_col), binaryview_col
FROM binary_as_string_both;
----
Utf8View aaa
Utf8View bbb
Utf8View ccc
Expand All @@ -465,17 +495,26 @@ Utf8View iii

# Run an explain plan to show the cast happens in the plan (there should be no casts)
query TT
EXPLAIN SELECT binary_col FROM binary_as_string_both WHERE binary_col LIKE '%a%';
EXPLAIN
SELECT binary_col, largebinary_col, binaryview_col
FROM binary_as_string_both
WHERE
binary_col LIKE '%a%' AND
largebinary_col LIKE '%a%' AND
binaryview_col LIKE '%a%';
----
logical_plan
01)Filter: binary_as_string_both.binary_col LIKE Utf8View("%a%")
02)--TableScan: binary_as_string_both projection=[binary_col], partial_filters=[binary_as_string_both.binary_col LIKE Utf8View("%a%")]
01)Filter: binary_as_string_both.binary_col LIKE Utf8View("%a%") AND binary_as_string_both.largebinary_col LIKE Utf8View("%a%") AND binary_as_string_both.binaryview_col LIKE Utf8View("%a%")
02)--TableScan: binary_as_string_both projection=[binary_col, largebinary_col, binaryview_col], partial_filters=[binary_as_string_both.binary_col LIKE Utf8View("%a%"), binary_as_string_both.largebinary_col LIKE Utf8View("%a%"), binary_as_string_both.binaryview_col LIKE Utf8View("%a%")]
physical_plan
01)CoalesceBatchesExec: target_batch_size=8192
02)--FilterExec: binary_col@0 LIKE %a%
02)--FilterExec: binary_col@0 LIKE %a% AND largebinary_col@1 LIKE %a% AND binaryview_col@2 LIKE %a%
03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
04)------ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col], predicate=binary_col@0 LIKE %a%
04)------ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col, largebinary_col, binaryview_col], predicate=binary_col@0 LIKE %a% AND largebinary_col@1 LIKE %a% AND binaryview_col@2 LIKE %a%


statement ok
drop table binary_as_string_both;

# Read a parquet file with binary data in a FixedSizeBinary column

Expand Down

0 comments on commit 97b686a

Please sign in to comment.