Skip to content

Commit ea525fe

Browse files
yaooqinncloud-fan
authored andcommitted
[SPARK-31597][SQL] extracting day from intervals should be interval.days + days in interval.microsecond
### What changes were proposed in this pull request? With suggestion from cloud-fan #28222 (comment) I Checked with both Presto and PostgresSQL, one is implemented intervals with ANSI style year-month/day-time, and the other is mixed and Non-ANSI. They both add the exceeded days in interval time part to the total days of the operation which extracts day from interval values. ```sql presto> SELECT EXTRACT(DAY FROM (cast('2020-01-15 00:00:00' as timestamp) - cast('2020-01-01 00:00:00' as timestamp))); _col0 ------- 14 (1 row) Query 20200428_135239_00000_ahn7x, FINISHED, 1 node Splits: 17 total, 17 done (100.00%) 0:01 [0 rows, 0B] [0 rows/s, 0B/s] presto> SELECT EXTRACT(DAY FROM (cast('2020-01-15 00:00:00' as timestamp) - cast('2020-01-01 00:00:01' as timestamp))); _col0 ------- 13 (1 row) Query 20200428_135246_00001_ahn7x, FINISHED, 1 node Splits: 17 total, 17 done (100.00%) 0:00 [0 rows, 0B] [0 rows/s, 0B/s] presto> ``` ```sql postgres=# SELECT EXTRACT(DAY FROM (cast('2020-01-15 00:00:00' as timestamp) - cast('2020-01-01 00:00:00' as timestamp))); date_part ----------- 14 (1 row) postgres=# SELECT EXTRACT(DAY FROM (cast('2020-01-15 00:00:00' as timestamp) - cast('2020-01-01 00:00:01' as timestamp))); date_part ----------- 13 ``` ``` spark-sql> SELECT EXTRACT(DAY FROM (cast('2020-01-15 00:00:00' as timestamp) - cast('2020-01-01 00:00:01' as timestamp))); 0 spark-sql> SELECT EXTRACT(DAY FROM (cast('2020-01-15 00:00:00' as timestamp) - cast('2020-01-01 00:00:00' as timestamp))); 0 ``` In ANSI standard, the day is exact 24 hours, so we don't need to worry about the conceptual day for interval extraction. The meaning of the conceptual day only takes effect when we add it to a zoned timestamp value. ### Why are the changes needed? Both satisfy the ANSI standard and common use cases in modern SQL platforms ### Does this PR introduce any user-facing change? No, it new in 3.0 ### How was this patch tested? add more uts Closes #28396 from yaooqinn/SPARK-31597. Authored-by: Kent Yao <yaooqinn@hotmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent 6bc8d84 commit ea525fe

File tree

3 files changed

+22
-19
lines changed

3 files changed

+22
-19
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
package org.apache.spark.sql.catalyst.util
1919

20-
import java.math.BigDecimal
2120
import java.util.concurrent.TimeUnit
2221

2322
import scala.util.control.NonFatal
@@ -55,11 +54,12 @@ object IntervalUtils {
5554
}
5655

5756
def getDays(interval: CalendarInterval): Int = {
58-
interval.days
57+
val daysInMicroseconds = (interval.microseconds / MICROS_PER_DAY).toInt
58+
Math.addExact(interval.days, daysInMicroseconds)
5959
}
6060

6161
def getHours(interval: CalendarInterval): Long = {
62-
interval.microseconds / MICROS_PER_HOUR
62+
(interval.microseconds % MICROS_PER_DAY) / MICROS_PER_HOUR
6363
}
6464

6565
def getMinutes(interval: CalendarInterval): Byte = {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
6868
// Years and months must not be taken into account
6969
checkEvaluation(ExtractIntervalDays("100 year 10 months 5 days"), 5)
7070
checkEvaluation(ExtractIntervalDays(largeInterval), 31)
71+
checkEvaluation(ExtractIntervalDays("25 hours"), 1)
7172
}
7273

7374
test("hours") {
@@ -81,6 +82,8 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
8182
// Minutes should be taken into account
8283
checkEvaluation(ExtractIntervalHours("10 hours 100 minutes"), 11L)
8384
checkEvaluation(ExtractIntervalHours(largeInterval), 11L)
85+
checkEvaluation(ExtractIntervalHours("25 hours"), 1L)
86+
8487
}
8588

8689
test("minutes") {

sql/core/src/test/resources/sql-tests/results/extract.sql.out

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -135,23 +135,23 @@ select extract(day from c), extract(day from i) from t
135135
-- !query schema
136136
struct<extract('day' FROM t.`c`):int,extract('day' FROM t.`i`):int>
137137
-- !query output
138-
6 30
138+
6 31
139139

140140

141141
-- !query
142142
select extract(d from c), extract(d from i) from t
143143
-- !query schema
144144
struct<extract('d' FROM t.`c`):int,extract('d' FROM t.`i`):int>
145145
-- !query output
146-
6 30
146+
6 31
147147

148148

149149
-- !query
150150
select extract(days from c), extract(days from i) from t
151151
-- !query schema
152152
struct<extract('days' FROM t.`c`):int,extract('days' FROM t.`i`):int>
153153
-- !query output
154-
6 30
154+
6 31
155155

156156

157157
-- !query
@@ -199,39 +199,39 @@ select extract(hour from c), extract(hour from i) from t
199199
-- !query schema
200200
struct<extract('hour' FROM t.`c`):int,extract('hour' FROM t.`i`):bigint>
201201
-- !query output
202-
7 40
202+
7 16
203203

204204

205205
-- !query
206206
select extract(h from c), extract(h from i) from t
207207
-- !query schema
208208
struct<extract('h' FROM t.`c`):int,extract('h' FROM t.`i`):bigint>
209209
-- !query output
210-
7 40
210+
7 16
211211

212212

213213
-- !query
214214
select extract(hours from c), extract(hours from i) from t
215215
-- !query schema
216216
struct<extract('hours' FROM t.`c`):int,extract('hours' FROM t.`i`):bigint>
217217
-- !query output
218-
7 40
218+
7 16
219219

220220

221221
-- !query
222222
select extract(hr from c), extract(hr from i) from t
223223
-- !query schema
224224
struct<extract('hr' FROM t.`c`):int,extract('hr' FROM t.`i`):bigint>
225225
-- !query output
226-
7 40
226+
7 16
227227

228228

229229
-- !query
230230
select extract(hrs from c), extract(hrs from i) from t
231231
-- !query schema
232232
struct<extract('hrs' FROM t.`c`):int,extract('hrs' FROM t.`i`):bigint>
233233
-- !query output
234-
7 40
234+
7 16
235235

236236

237237
-- !query
@@ -457,23 +457,23 @@ select date_part('day', c), date_part('day', i) from t
457457
-- !query schema
458458
struct<date_part('day', t.`c`):int,date_part('day', t.`i`):int>
459459
-- !query output
460-
6 30
460+
6 31
461461

462462

463463
-- !query
464464
select date_part('d', c), date_part('d', i) from t
465465
-- !query schema
466466
struct<date_part('d', t.`c`):int,date_part('d', t.`i`):int>
467467
-- !query output
468-
6 30
468+
6 31
469469

470470

471471
-- !query
472472
select date_part('days', c), date_part('days', i) from t
473473
-- !query schema
474474
struct<date_part('days', t.`c`):int,date_part('days', t.`i`):int>
475475
-- !query output
476-
6 30
476+
6 31
477477

478478

479479
-- !query
@@ -521,39 +521,39 @@ select date_part('hour', c), date_part('hour', i) from t
521521
-- !query schema
522522
struct<date_part('hour', t.`c`):int,date_part('hour', t.`i`):bigint>
523523
-- !query output
524-
7 40
524+
7 16
525525

526526

527527
-- !query
528528
select date_part('h', c), date_part('h', i) from t
529529
-- !query schema
530530
struct<date_part('h', t.`c`):int,date_part('h', t.`i`):bigint>
531531
-- !query output
532-
7 40
532+
7 16
533533

534534

535535
-- !query
536536
select date_part('hours', c), date_part('hours', i) from t
537537
-- !query schema
538538
struct<date_part('hours', t.`c`):int,date_part('hours', t.`i`):bigint>
539539
-- !query output
540-
7 40
540+
7 16
541541

542542

543543
-- !query
544544
select date_part('hr', c), date_part('hr', i) from t
545545
-- !query schema
546546
struct<date_part('hr', t.`c`):int,date_part('hr', t.`i`):bigint>
547547
-- !query output
548-
7 40
548+
7 16
549549

550550

551551
-- !query
552552
select date_part('hrs', c), date_part('hrs', i) from t
553553
-- !query schema
554554
struct<date_part('hrs', t.`c`):int,date_part('hrs', t.`i`):bigint>
555555
-- !query output
556-
7 40
556+
7 16
557557

558558

559559
-- !query

0 commit comments

Comments
 (0)