Skip to content

Column support for array functions #6804

Closed
@izveigor

Description

@izveigor

Is your feature request related to a problem or challenge?

#6709
#6693

After the PR (#6384) was merged there are a lot of array functions became part of arrow-datafusion. The first implementation was designed only for scalar data, but most of the intended operations will be performed using columns.

So, here is a list of all array functions 🎯:

Describe the solution you'd like

The behavior of columns with array functions should be identical with:

tables

postgres=# select * from arrays;
       column1       |     column2      |    column3     
---------------------+------------------+----------------
 {{NULL,2},{3,NULL}} | {1.1,2.2,3.3}    | {L,o,r,e,m}
 {{3,4},{5,6}}       | {NULL,5.5,6.6}   | {i,p,NULL,u,m}
 {{5,6},{7,8}}       | {7.7,8.8,9.9}    | {d,NULL,l,o,r}
 {{7,NULL},{9,10}}   | {10.1,NULL,12.2} | {s,i,t}
                     | {13.3,14.4,15.5} | {a,m,e,t}
 {{11,12},{13,14}}   |                  | {","}
 {{15,16},{NULL,18}} | {16.6,17.7,18.8} | 
(7 rows)
postgres=# select * from values;
 a | b  | c  |  d  |      e      
---+----+----+-----+-------------
 1 |  1 |  2 | 1.1 | Lorem
 2 |  3 |  4 | 2.2 | ipsum
 3 |  5 |  6 | 3.3 | dolor
 4 |  7 |  8 | 4.4 | sit
   |  9 | 10 | 5.5 | amet
 5 |    | 12 | 6.6 | ,
 6 | 11 |    | 7.7 | consectetur
 7 | 13 | 14 |     | adipiscing
 8 | 15 | 16 | 8.8 | 
(9 rows)
postgres=# select * from arrays_values;
              column1              | column2 | column3 | column4 
-----------------------------------+---------+---------+---------
 {NULL,2,3,4,5,6,7,8,9,10}         |       1 |       1 | ,
 {11,12,13,14,15,16,17,18,NULL,20} |      12 |       2 | .
 {21,22,23,NULL,25,26,27,28,29,30} |      23 |       3 | -
 {31,32,33,34,35,NULL,37,38,39,40} |      34 |       4 | ok
                                   |      44 |       5 | @
 {41,42,43,44,45,46,47,48,49,50}   |         |       6 | $
 {51,52,NULL,54,55,56,57,58,59,60} |      55 |         | ^
 {61,62,63,64,65,66,67,68,69,70}   |      66 |       7 | 
(8 rows)
postgres=# select * from arrays_values_without_nulls;
             column1             | column2 | column3 | column4 
---------------------------------+---------+---------+---------
 {NULL,2,3,4,5,6,7,8,9,10}       |       1 |       1 | ,
 {11,12,13,14,15,16,17,18,19,20} |      12 |       2 | .
 {21,22,23,24,25,26,27,28,29,30} |      23 |       3 | -
 {31,32,33,34,35,26,37,38,39,40} |      34 |       4 | ok
(4 rows)

array_append

postgres=# select array_append(column1, column2) from arrays_values;
             array_append             
--------------------------------------
 {NULL,2,3,4,5,6,7,8,9,10,1}
 {11,12,13,14,15,16,17,18,NULL,20,12}
 {21,22,23,NULL,25,26,27,28,29,30,23}
 {31,32,33,34,35,NULL,37,38,39,40,34}
 {44}
 {41,42,43,44,45,46,47,48,49,50,NULL}
 {51,52,NULL,54,55,56,57,58,59,60,55}
 {61,62,63,64,65,66,67,68,69,70,66}
(8 rows)

postgres=# select array_append(column2, 100.1), array_append(column3, '.') from arrays;
      array_append      |   array_append   
------------------------+------------------
 {1.1,2.2,3.3,100.1}    | {L,o,r,e,m,.}
 {NULL,5.5,6.6,100.1}   | {i,p,NULL,u,m,.}
 {7.7,8.8,9.9,100.1}    | {d,NULL,l,o,r,.}
 {10.1,NULL,12.2,100.1} | {s,i,t,.}
 {13.3,14.4,15.5,100.1} | {a,m,e,t,.}
 {100.1}                | {",",.}
 {16.6,17.7,18.8,100.1} | {.}
(7 rows)

array_prepend

postgres=# select array_prepend(column2, column1) from arrays_values;
            array_prepend             
--------------------------------------
 {1,NULL,2,3,4,5,6,7,8,9,10}
 {12,11,12,13,14,15,16,17,18,NULL,20}
 {23,21,22,23,NULL,25,26,27,28,29,30}
 {34,31,32,33,34,35,NULL,37,38,39,40}
 {44}
 {NULL,41,42,43,44,45,46,47,48,49,50}
 {55,51,52,NULL,54,55,56,57,58,59,60}
 {66,61,62,63,64,65,66,67,68,69,70}
(8 rows)

postgres=# select array_prepend(100.1, column2), array_prepend('.', column3) from arrays;
     array_prepend      |  array_prepend   
------------------------+------------------
 {100.1,1.1,2.2,3.3}    | {.,L,o,r,e,m}
 {100.1,NULL,5.5,6.6}   | {.,i,p,NULL,u,m}
 {100.1,7.7,8.8,9.9}    | {.,d,NULL,l,o,r}
 {100.1,10.1,NULL,12.2} | {.,s,i,t}
 {100.1,13.3,14.4,15.5} | {.,a,m,e,t}
 {100.1}                | {.,","}
 {100.1,16.6,17.7,18.8} | {.}
(7 rows)

array_concat

postgres=# select array_cat(column1, column1), array_cat(column2, column2), array_cat(column3, column3) from arrays;
               array_cat               |            array_cat            |          array_cat          
---------------------------------------+---------------------------------+-----------------------------
 {{NULL,2},{3,NULL},{NULL,2},{3,NULL}} | {1.1,2.2,3.3,1.1,2.2,3.3}       | {L,o,r,e,m,L,o,r,e,m}
 {{3,4},{5,6},{3,4},{5,6}}             | {NULL,5.5,6.6,NULL,5.5,6.6}     | {i,p,NULL,u,m,i,p,NULL,u,m}
 {{5,6},{7,8},{5,6},{7,8}}             | {7.7,8.8,9.9,7.7,8.8,9.9}       | {d,NULL,l,o,r,d,NULL,l,o,r}
 {{7,NULL},{9,10},{7,NULL},{9,10}}     | {10.1,NULL,12.2,10.1,NULL,12.2} | {s,i,t,s,i,t}
                                       | {13.3,14.4,15.5,13.3,14.4,15.5} | {a,m,e,t,a,m,e,t}
 {{11,12},{13,14},{11,12},{13,14}}     |                                 | {",",","}
 {{15,16},{NULL,18},{15,16},{NULL,18}} | {16.6,17.7,18.8,16.6,17.7,18.8} | 
(7 rows)

postgres=# select array_cat(column1, array[array[1, 2], array[3, 4]]), array_cat(column2, array[1.1, 2.2, 3.3]), array_cat(column3, array['.', '.', '.']) from arrays;
            array_cat            |          array_cat           |      array_cat       
---------------------------------+------------------------------+----------------------
 {{NULL,2},{3,NULL},{1,2},{3,4}} | {1.1,2.2,3.3,1.1,2.2,3.3}    | {L,o,r,e,m,.,.,.}
 {{3,4},{5,6},{1,2},{3,4}}       | {NULL,5.5,6.6,1.1,2.2,3.3}   | {i,p,NULL,u,m,.,.,.}
 {{5,6},{7,8},{1,2},{3,4}}       | {7.7,8.8,9.9,1.1,2.2,3.3}    | {d,NULL,l,o,r,.,.,.}
 {{7,NULL},{9,10},{1,2},{3,4}}   | {10.1,NULL,12.2,1.1,2.2,3.3} | {s,i,t,.,.,.}
 {{1,2},{3,4}}                   | {13.3,14.4,15.5,1.1,2.2,3.3} | {a,m,e,t,.,.,.}
 {{11,12},{13,14},{1,2},{3,4}}   | {1.1,2.2,3.3}                | {",",.,.,.}
 {{15,16},{NULL,18},{1,2},{3,4}} | {16.6,17.7,18.8,1.1,2.2,3.3} | {.,.,.}
(7 rows)

array_position

postgres=# select array_position(column1, column2), array_position(column1, column2, column3) from arrays_values_without_nulls;
 array_position | array_position 
----------------+----------------
              1 |              1
              2 |              2
              3 |              3
              4 |              4
(4 rows)

postgres=# select array_position(column1, 3), array_position(column1, 3, 5) from arrays_values_without_nulls;
 array_position | array_position 
----------------+----------------
              3 |               
                |               
                |               
                |               
(4 rows)

array_positions

postgres=# select array_positions(column1, column2) from arrays_values_without_nulls;
 array_positions 
-----------------
 {1}
 {2}
 {3}
 {4}
(4 rows)

postgres=# select array_positions(column1, 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from arrays_values_without_nulls;
 array_positions | array_positions 
-----------------+-----------------
 {4}             | {1}
 {}              | {}
 {}              | {3}
 {}              | {}
(4 rows)

array_replace

postgres=# select array_replace(column1, column2, column3) from arrays_values;
            array_replace            
-------------------------------------
 {NULL,2,3,4,5,6,7,8,9,10}
 {11,2,13,14,15,16,17,18,NULL,20}
 {21,22,3,NULL,25,26,27,28,29,30}
 {31,32,33,4,35,NULL,37,38,39,40}
 
 {41,42,43,44,45,46,47,48,49,50}
 {51,52,NULL,54,NULL,56,57,58,59,60}
 {61,62,63,64,65,7,67,68,69,70}
(8 rows)

postgres=# select array_replace(column1, column2, 55), array_replace(column1, 22, column3), array_replace(array[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], column2, column3) from arrays_values;
           array_replace           |           array_replace           |     array_replace      
-----------------------------------+-----------------------------------+------------------------
 {NULL,2,3,4,5,6,7,8,9,10}         | {NULL,2,3,4,5,6,7,8,9,10}         | {1,2,3,4,5,6,7,8,9,10}
 {11,55,13,14,15,16,17,18,NULL,20} | {11,12,13,14,15,16,17,18,NULL,20} | {1,2,3,4,5,6,7,8,9,10}
 {21,22,55,NULL,25,26,27,28,29,30} | {21,3,23,NULL,25,26,27,28,29,30}  | {1,2,3,4,5,6,7,8,9,10}
 {31,32,33,55,35,NULL,37,38,39,40} | {31,32,33,34,35,NULL,37,38,39,40} | {1,2,3,4,5,6,7,8,9,10}
                                   |                                   | {1,2,3,4,5,6,7,8,9,10}
 {41,42,43,44,45,46,47,48,49,50}   | {41,42,43,44,45,46,47,48,49,50}   | {1,2,3,4,5,6,7,8,9,10}
 {51,52,NULL,54,55,56,57,58,59,60} | {51,52,NULL,54,55,56,57,58,59,60} | {1,2,3,4,5,6,7,8,9,10}
 {61,62,63,64,65,55,67,68,69,70}   | {61,62,63,64,65,66,67,68,69,70}   | {1,2,3,4,5,6,7,8,9,10}
(8 rows)

array_to_string

postgres=# select array_to_string(column1, column4) from arrays_values;
          array_to_string           
------------------------------------
 2,3,4,5,6,7,8,9,10
 11.12.13.14.15.16.17.18.20
 21-22-23-25-26-27-28-29-30
 31ok32ok33ok34ok35ok37ok38ok39ok40
 
 41$42$43$44$45$46$47$48$49$50
 51^52^54^55^56^57^58^59^60
 
(8 rows)

postgres=# select array_to_string(column1, '_'), array_to_string(array[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], column4) from a
rrays_values;
        array_to_string        |        array_to_string        
-------------------------------+-------------------------------
 2_3_4_5_6_7_8_9_10            | 1,2,3,4,5,6,7,8,9,10
 11_12_13_14_15_16_17_18_20    | 1.2.3.4.5.6.7.8.9.10
 21_22_23_25_26_27_28_29_30    | 1-2-3-4-5-6-7-8-9-10
 31_32_33_34_35_37_38_39_40    | 1ok2ok3ok4ok5ok6ok7ok8ok9ok10
                               | 1@2@3@4@5@6@7@8@9@10
 41_42_43_44_45_46_47_48_49_50 | 1$2$3$4$5$6$7$8$9$10
 51_52_54_55_56_57_58_59_60    | 1^2^3^4^5^6^7^8^9^10
 61_62_63_64_65_66_67_68_69_70 | 
(8 rows)

cardinality

postgres=# select cardinality(column1), cardinality(column2), cardinality(column3) from arrays;
 cardinality | cardinality | cardinality 
-------------+-------------+-------------
           4 |           3 |           5
           4 |           3 |           5
           4 |           3 |           5
           4 |           3 |           3
             |           3 |           4
           4 |             |           1
           4 |           3 |            
(7 rows)

trim_array

postgres=# select trim_array(column1, column3) from arrays_values;
        trim_array         
---------------------------
 {NULL,2,3,4,5,6,7,8,9}
 {11,12,13,14,15,16,17,18}
 {21,22,23,NULL,25,26,27}
 {31,32,33,34,35,NULL}
 
 {41,42,43,44}
 
 {61,62,63}
(8 rows)

postgres=# select trim_array(column1, 5), trim_array(array[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], column3) from arrays_values;
     trim_array     |     trim_array      
--------------------+---------------------
 {NULL,2,3,4,5}     | {1,2,3,4,5,6,7,8,9}
 {11,12,13,14,15}   | {1,2,3,4,5,6,7,8}
 {21,22,23,NULL,25} | {1,2,3,4,5,6,7}
 {31,32,33,34,35}   | {1,2,3,4,5,6}
                    | {1,2,3,4,5}
 {41,42,43,44,45}   | {1,2,3,4}
 {51,52,NULL,54,55} | 
 {61,62,63,64,65}   | {1,2,3}
(8 rows)

array_length

postgres=# select array_length(column1, column3) from arrays_values;
 array_length 
--------------
           10
             
             
             
             
             
             
             
(8 rows)

postgres=# select array_length(array[array[1, 2], array[3, 4]], column3), array_length(column1, 1) from arrays_values;
 array_length | array_length 
--------------+--------------
            2 |           10
            2 |           10
              |           10
              |           10
              |             
              |           10
              |           10
              |           10
(8 rows)

array_dims

postgres=# select array_dims(column1), array_dims(column2), array_dims(column3) from arrays;
 array_dims | array_dims | array_dims 
------------+------------+------------
 [1:2][1:2] | [1:3]      | [1:5]
 [1:2][1:2] | [1:3]      | [1:5]
 [1:2][1:2] | [1:3]      | [1:5]
 [1:2][1:2] | [1:3]      | [1:3]
            | [1:3]      | [1:4]
 [1:2][1:2] |            | [1:1]
 [1:2][1:2] | [1:3]      | 
(7 rows)

array_ndims

postgres=# select array_ndims(column1), array_ndims(column2), array_ndims(column3) from arrays;
 array_ndims | array_ndims | array_ndims 
-------------+-------------+-------------
           2 |           1 |           1
           2 |           1 |           1
           2 |           1 |           1
           2 |           1 |           1
             |           1 |           1
           2 |             |           1
           2 |           1 |            
(7 rows)

array_contains

postgres=# select column1 @> column1, column2 @> column2, column3 @> column3 from arrays;
 ?column? | ?column? | ?column? 
----------+----------+----------
 f        | t        | t
 t        | f        | f
 t        | t        | f
 f        | f        | t
          | t        | t
 t        |          | t
 f        | t        | 
(7 rows)

postgres=# select column1 @> array[1, 2], array[5, 6, 7, 8] @> column1 from arrays;
 ?column? | ?column? 
----------+----------
 f        | f
 f        | f
 f        | t
 f        | f
          | 
 f        | f
 f        | f
(7 rows)

Describe alternatives you've considered

No response

Additional context

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or request

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions