Skip to content

Commit cf7d687

Browse files
shruti2522emilk
authored andcommitted
GroupsAccumulator for Duration (apache#15322)
1 parent 68f2903 commit cf7d687

File tree

2 files changed

+92
-19
lines changed

2 files changed

+92
-19
lines changed

datafusion/functions-aggregate/src/min_max.rs

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,10 @@ use arrow::array::{
3232
};
3333
use arrow::compute;
3434
use arrow::datatypes::{
35-
DataType, Decimal128Type, Decimal256Type, Float16Type, Float32Type, Float64Type,
36-
Int16Type, Int32Type, Int64Type, Int8Type, IntervalUnit, UInt16Type, UInt32Type,
37-
UInt64Type, UInt8Type,
35+
DataType, Decimal128Type, Decimal256Type, DurationMicrosecondType,
36+
DurationMillisecondType, DurationNanosecondType, DurationSecondType, Float16Type,
37+
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntervalUnit,
38+
UInt16Type, UInt32Type, UInt64Type, UInt8Type,
3839
};
3940
use datafusion_common::stats::Precision;
4041
use datafusion_common::{
@@ -263,6 +264,7 @@ impl AggregateUDFImpl for Max {
263264
| Binary
264265
| LargeBinary
265266
| BinaryView
267+
| Duration(_)
266268
)
267269
}
268270

@@ -317,6 +319,18 @@ impl AggregateUDFImpl for Max {
317319
Timestamp(Nanosecond, _) => {
318320
primitive_max_accumulator!(data_type, i64, TimestampNanosecondType)
319321
}
322+
Duration(Second) => {
323+
primitive_max_accumulator!(data_type, i64, DurationSecondType)
324+
}
325+
Duration(Millisecond) => {
326+
primitive_max_accumulator!(data_type, i64, DurationMillisecondType)
327+
}
328+
Duration(Microsecond) => {
329+
primitive_max_accumulator!(data_type, i64, DurationMicrosecondType)
330+
}
331+
Duration(Nanosecond) => {
332+
primitive_max_accumulator!(data_type, i64, DurationNanosecondType)
333+
}
320334
Decimal128(_, _) => {
321335
primitive_max_accumulator!(data_type, i128, Decimal128Type)
322336
}
@@ -1090,6 +1104,7 @@ impl AggregateUDFImpl for Min {
10901104
| Binary
10911105
| LargeBinary
10921106
| BinaryView
1107+
| Duration(_)
10931108
)
10941109
}
10951110

@@ -1144,6 +1159,18 @@ impl AggregateUDFImpl for Min {
11441159
Timestamp(Nanosecond, _) => {
11451160
primitive_min_accumulator!(data_type, i64, TimestampNanosecondType)
11461161
}
1162+
Duration(Second) => {
1163+
primitive_min_accumulator!(data_type, i64, DurationSecondType)
1164+
}
1165+
Duration(Millisecond) => {
1166+
primitive_min_accumulator!(data_type, i64, DurationMillisecondType)
1167+
}
1168+
Duration(Microsecond) => {
1169+
primitive_min_accumulator!(data_type, i64, DurationMicrosecondType)
1170+
}
1171+
Duration(Nanosecond) => {
1172+
primitive_min_accumulator!(data_type, i64, DurationNanosecondType)
1173+
}
11471174
Decimal128(_, _) => {
11481175
primitive_min_accumulator!(data_type, i128, Decimal128Type)
11491176
}

datafusion/sqllogictest/test_files/aggregate.slt

Lines changed: 62 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ SELECT approx_percentile_cont(c12, 0.95, c5) FROM aggregate_test_100
162162

163163
# Not supported over sliding windows
164164
query error This feature is not implemented: Aggregate can not be used as a sliding accumulator because `retract_batch` is not implemented
165-
SELECT approx_percentile_cont(c3, 0.5) OVER (ROWS BETWEEN 4 PRECEDING AND CURRENT ROW)
165+
SELECT approx_percentile_cont(c3, 0.5) OVER (ROWS BETWEEN 4 PRECEDING AND CURRENT ROW)
166166
FROM aggregate_test_100
167167

168168
# array agg can use order by
@@ -2194,7 +2194,7 @@ drop table t;
21942194

21952195
# test count with largeutf8
21962196
statement ok
2197-
create table t (c string) as values
2197+
create table t (c string) as values
21982198
(arrow_cast('a', 'LargeUtf8')),
21992199
(arrow_cast('b', 'LargeUtf8')),
22002200
(arrow_cast(null, 'LargeUtf8')),
@@ -3807,6 +3807,52 @@ SELECT MIN(value), MAX(value) FROM timestampmicrosecond
38073807
statement ok
38083808
DROP TABLE timestampmicrosecond;
38093809

3810+
# min_duration, max_duration
3811+
statement ok
3812+
create table d
3813+
as values
3814+
(arrow_cast(1, 'Duration(Second)'), arrow_cast(2, 'Duration(Millisecond)'), arrow_cast(3, 'Duration(Microsecond)'), arrow_cast(4, 'Duration(Nanosecond)'), 1),
3815+
(arrow_cast(11, 'Duration(Second)'),arrow_cast(22, 'Duration(Millisecond)'), arrow_cast(33, 'Duration(Microsecond)'), arrow_cast(44, 'Duration(Nanosecond)'), 1);
3816+
3817+
query ????
3818+
SELECT min(column1), min(column2), min(column3), min(column4) FROM d;
3819+
----
3820+
0 days 0 hours 0 mins 1 secs 0 days 0 hours 0 mins 0.002 secs 0 days 0 hours 0 mins 0.000003 secs 0 days 0 hours 0 mins 0.000000004 secs
3821+
3822+
query ????
3823+
SELECT max(column1), max(column2), max(column3), max(column4) FROM d;
3824+
----
3825+
0 days 0 hours 0 mins 11 secs 0 days 0 hours 0 mins 0.022 secs 0 days 0 hours 0 mins 0.000033 secs 0 days 0 hours 0 mins 0.000000044 secs
3826+
3827+
# GROUP BY follows a different code path
3828+
query ????I
3829+
SELECT min(column1), min(column2), min(column3), min(column4), column5 FROM d GROUP BY column5;
3830+
----
3831+
0 days 0 hours 0 mins 1 secs 0 days 0 hours 0 mins 0.002 secs 0 days 0 hours 0 mins 0.000003 secs 0 days 0 hours 0 mins 0.000000004 secs 1
3832+
3833+
query ????I
3834+
SELECT max(column1), max(column2), max(column3), max(column4), column5 FROM d GROUP BY column5;
3835+
----
3836+
0 days 0 hours 0 mins 11 secs 0 days 0 hours 0 mins 0.022 secs 0 days 0 hours 0 mins 0.000033 secs 0 days 0 hours 0 mins 0.000000044 secs 1
3837+
3838+
statement ok
3839+
INSERT INTO d VALUES
3840+
(arrow_cast(3, 'Duration(Second)'), arrow_cast(1, 'Duration(Millisecond)'), arrow_cast(7, 'Duration(Microsecond)'), arrow_cast(2, 'Duration(Nanosecond)'), 1),
3841+
(arrow_cast(0, 'Duration(Second)'), arrow_cast(9, 'Duration(Millisecond)'), arrow_cast(5, 'Duration(Microsecond)'), arrow_cast(8, 'Duration(Nanosecond)'), 1);
3842+
3843+
query ????I
3844+
SELECT max(column1), max(column2), max(column3), max(column4), column5 FROM d GROUP BY column5 ORDER BY column5;
3845+
----
3846+
0 days 0 hours 0 mins 11 secs 0 days 0 hours 0 mins 0.022 secs 0 days 0 hours 0 mins 0.000033 secs 0 days 0 hours 0 mins 0.000000044 secs 1
3847+
3848+
query ????I
3849+
SELECT min(column1), min(column2), min(column3), min(column4), column5 FROM d GROUP BY column5 ORDER BY column5;
3850+
----
3851+
0 days 0 hours 0 mins 0 secs 0 days 0 hours 0 mins 0.001 secs 0 days 0 hours 0 mins 0.000003 secs 0 days 0 hours 0 mins 0.000000002 secs 1
3852+
3853+
statement ok
3854+
drop table d;
3855+
38103856
# max_bool
38113857
statement ok
38123858
CREATE TABLE max_bool (value BOOLEAN);
@@ -4647,7 +4693,7 @@ statement ok
46474693
create table t (c1 decimal(10, 0), c2 int) as values (null, null), (null, null), (null, null);
46484694

46494695
query RTIT
4650-
select
4696+
select
46514697
sum(c1), arrow_typeof(sum(c1)),
46524698
sum(c2), arrow_typeof(sum(c2))
46534699
from t;
@@ -5299,7 +5345,7 @@ NULL NULL 3 NULL 1 4 0 8 0
52995345

53005346
# regr_*() basic tests
53015347
query RRIRRRRRR
5302-
select
5348+
select
53035349
regr_slope(column2, column1),
53045350
regr_intercept(column2, column1),
53055351
regr_count(column2, column1),
@@ -5314,7 +5360,7 @@ from (values (1,2), (2,4), (3,6));
53145360
2 0 3 1 2 4 2 8 4
53155361

53165362
query RRIRRRRRR
5317-
select
5363+
select
53185364
regr_slope(c12, c11),
53195365
regr_intercept(c12, c11),
53205366
regr_count(c12, c11),
@@ -5332,7 +5378,7 @@ from aggregate_test_100;
53325378

53335379
# regr_*() functions ignore NULLs
53345380
query RRIRRRRRR
5335-
select
5381+
select
53365382
regr_slope(column2, column1),
53375383
regr_intercept(column2, column1),
53385384
regr_count(column2, column1),
@@ -5347,7 +5393,7 @@ from (values (1,NULL), (2,4), (3,6));
53475393
2 0 2 1 2.5 5 0.5 2 1
53485394

53495395
query RRIRRRRRR
5350-
select
5396+
select
53515397
regr_slope(column2, column1),
53525398
regr_intercept(column2, column1),
53535399
regr_count(column2, column1),
@@ -5362,7 +5408,7 @@ from (values (1,NULL), (NULL,4), (3,6));
53625408
NULL NULL 1 NULL 3 6 0 0 0
53635409

53645410
query RRIRRRRRR
5365-
select
5411+
select
53665412
regr_slope(column2, column1),
53675413
regr_intercept(column2, column1),
53685414
regr_count(column2, column1),
@@ -5377,8 +5423,8 @@ from (values (1,NULL), (NULL,4), (NULL,NULL));
53775423
NULL NULL 0 NULL NULL NULL NULL NULL NULL
53785424

53795425
query TRRIRRRRRR rowsort
5380-
select
5381-
column3,
5426+
select
5427+
column3,
53825428
regr_slope(column2, column1),
53835429
regr_intercept(column2, column1),
53845430
regr_count(column2, column1),
@@ -5402,7 +5448,7 @@ statement ok
54025448
set datafusion.execution.batch_size = 1;
54035449

54045450
query RRIRRRRRR
5405-
select
5451+
select
54065452
regr_slope(c12, c11),
54075453
regr_intercept(c12, c11),
54085454
regr_count(c12, c11),
@@ -5420,7 +5466,7 @@ statement ok
54205466
set datafusion.execution.batch_size = 2;
54215467

54225468
query RRIRRRRRR
5423-
select
5469+
select
54245470
regr_slope(c12, c11),
54255471
regr_intercept(c12, c11),
54265472
regr_count(c12, c11),
@@ -5438,7 +5484,7 @@ statement ok
54385484
set datafusion.execution.batch_size = 3;
54395485

54405486
query RRIRRRRRR
5441-
select
5487+
select
54425488
regr_slope(c12, c11),
54435489
regr_intercept(c12, c11),
54445490
regr_count(c12, c11),
@@ -5592,7 +5638,7 @@ CREATE TABLE float_table (
55925638

55935639
# Test string_agg with largeutf8
55945640
statement ok
5595-
create table string_agg_large_utf8 (c string) as values
5641+
create table string_agg_large_utf8 (c string) as values
55965642
(arrow_cast('a', 'LargeUtf8')),
55975643
(arrow_cast('b', 'LargeUtf8')),
55985644
(arrow_cast('c', 'LargeUtf8'))
@@ -5647,7 +5693,7 @@ select count(*) from (select count(*) a, count(*) b from (select 1));
56475693

56485694
# UTF8 string matters for string to &[u8] conversion, add it to prevent regression
56495695
statement ok
5650-
create table distinct_count_string_table as values
5696+
create table distinct_count_string_table as values
56515697
(1, 'a', 'longstringtest_a', '台灣'),
56525698
(2, 'b', 'longstringtest_b1', '日本'),
56535699
(2, 'b', 'longstringtest_b2', '中國'),
@@ -6625,7 +6671,7 @@ group1 0.0003
66256671
# median with all nulls
66266672
statement ok
66276673
create table group_median_all_nulls(
6628-
a STRING NOT NULL,
6674+
a STRING NOT NULL,
66296675
b INT
66306676
) AS VALUES
66316677
( 'group0', NULL),

0 commit comments

Comments
 (0)