Skip to content

Commit 697083c

Browse files
yaooqinncloud-fan
authored andcommitted
[SPARK-31469][SQL] Make extract interval field ANSI compliance
### What changes were proposed in this pull request? Currently, we can extract `millennium/century/decade/year/quarter/month/week/day/hour/minute/second(with fractions)//millisecond/microseconds` and `epoch` from interval values While getting the `millennium/century/decade/year`, it means how many the interval `months` part can be converted to that unit-value. The content of `millennium/century/decade` will overlap `year` and each other. While getting `month/day` and so on, it means the integral remainder of the previous unit. Here all the units including `year` are individual. So while extracting `year`, `month`, `day`, `hour`, `minute`, `second`, which are ANSI primary datetime units, the semantic is `extracting`, but others might refer to `transforming`. While getting epoch we have treat month as 30 days which varies the natural Calendar rules we use. To avoid ambiguity, I suggest we should only support those extract field defined ANSI with their abbreviations. ### Why are the changes needed? Extracting `millennium`, `century` etc does not obey the meaning of extracting, and they are not so useful and worth maintaining. The `extract` is ANSI standard expression and `date_part` is its pg-specific alias function. The current support extract-fields are fully bought from PostgreSQL. With a look at other systems like Presto/Hive, they don't support those ambiguous fields too. e.g. Hive 2.2.x also take it from PostgreSQL but without introducing those ambiguous fields https://issues.apache.org/jira/secure/attachment/12828349/HIVE-14579 e.g. presto ```sql presto> select extract(quater from interval '10-0' year to month); Query 20200417_094723_00020_m8xq4 failed: line 1:8: Invalid EXTRACT field: quater select extract(quater from interval '10-0' year to month) presto> select extract(decade from interval '10-0' year to month); Query 20200417_094737_00021_m8xq4 failed: line 1:8: Invalid EXTRACT field: decade select extract(decade from interval '10-0' year to month) ``` ### Does this PR introduce any user-facing change? Yes, as we already have previews versions, this PR will remove support for extracting `millennium/century/decade/quarter/week/millisecond/microseconds` and `epoch` from intervals with `date_part` function ### How was this patch tested? rm some used tests Closes #28242 from yaooqinn/SPARK-31469. Authored-by: Kent Yao <yaooqinn@hotmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent f1489e6 commit 697083c

File tree

10 files changed

+120
-540
lines changed

10 files changed

+120
-540
lines changed

common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/DateTimeConstants.java

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,9 @@
1919

2020
public class DateTimeConstants {
2121

22-
public static final int YEARS_PER_DECADE = 10;
23-
public static final int YEARS_PER_CENTURY = 100;
24-
public static final int YEARS_PER_MILLENNIUM = 1000;
25-
26-
public static final byte MONTHS_PER_QUARTER = 3;
2722
public static final int MONTHS_PER_YEAR = 12;
2823

2924
public static final byte DAYS_PER_WEEK = 7;
30-
public static final long DAYS_PER_MONTH = 30L;
3125

3226
public static final long HOURS_PER_DAY = 24L;
3327

@@ -47,9 +41,6 @@ public class DateTimeConstants {
4741
public static final long MICROS_PER_MINUTE = SECONDS_PER_MINUTE * MICROS_PER_SECOND;
4842
public static final long MICROS_PER_HOUR = MINUTES_PER_HOUR * MICROS_PER_MINUTE;
4943
public static final long MICROS_PER_DAY = HOURS_PER_DAY * MICROS_PER_HOUR;
50-
public static final long MICROS_PER_MONTH = DAYS_PER_MONTH * MICROS_PER_DAY;
51-
/* 365.25 days per year assumes leap year every four years */
52-
public static final long MICROS_PER_YEAR = (36525L * MICROS_PER_DAY) / 100;
5344

5445
public static final long NANOS_PER_MICROS = 1000L;
5546
public static final long NANOS_PER_MILLIS = MICROS_PER_MILLIS * NANOS_PER_MICROS;

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2131,19 +2131,12 @@ object DatePart {
21312131
"MICROSECONDS", ("USEC", "USECS", "USECONDS", "MICROSECON", "US"),
21322132
"EPOCH"]
21332133
Supported string values of `field` for intervals are:
2134-
["MILLENNIUM", ("MILLENNIA", "MIL", "MILS"),
2135-
"CENTURY", ("CENTURIES", "C", "CENT"),
2136-
"DECADE", ("DECADES", "DEC", "DECS"),
2137-
"YEAR", ("Y", "YEARS", "YR", "YRS"),
2138-
"QUARTER", ("QTR"),
2139-
"MONTH", ("MON", "MONS", "MONTHS"),
2140-
"DAY", ("D", "DAYS"),
2141-
"HOUR", ("H", "HOURS", "HR", "HRS"),
2142-
"MINUTE", ("M", "MIN", "MINS", "MINUTES"),
2143-
"SECOND", ("S", "SEC", "SECONDS", "SECS"),
2144-
"MILLISECONDS", ("MSEC", "MSECS", "MILLISECON", "MSECONDS", "MS"),
2145-
"MICROSECONDS", ("USEC", "USECS", "USECONDS", "MICROSECON", "US"),
2146-
"EPOCH"]
2134+
["YEAR", ("Y", "YEARS", "YR", "YRS"),
2135+
"MONTH", ("MON", "MONS", "MONTHS"),
2136+
"DAY", ("D", "DAYS"),
2137+
"HOUR", ("H", "HOURS", "HR", "HRS"),
2138+
"MINUTE", ("M", "MIN", "MINS", "MINUTES"),
2139+
"SECOND", ("S", "SEC", "SECONDS", "SECS")]
21472140
* source - a date/timestamp or interval column from where `field` should be extracted
21482141
""",
21492142
examples = """

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -45,21 +45,9 @@ abstract class ExtractIntervalPart(
4545
}
4646
}
4747

48-
case class ExtractIntervalMillenniums(child: Expression)
49-
extends ExtractIntervalPart(child, IntegerType, getMillenniums, "getMillenniums")
50-
51-
case class ExtractIntervalCenturies(child: Expression)
52-
extends ExtractIntervalPart(child, IntegerType, getCenturies, "getCenturies")
53-
54-
case class ExtractIntervalDecades(child: Expression)
55-
extends ExtractIntervalPart(child, IntegerType, getDecades, "getDecades")
56-
5748
case class ExtractIntervalYears(child: Expression)
5849
extends ExtractIntervalPart(child, IntegerType, getYears, "getYears")
5950

60-
case class ExtractIntervalQuarters(child: Expression)
61-
extends ExtractIntervalPart(child, ByteType, getQuarters, "getQuarters")
62-
6351
case class ExtractIntervalMonths(child: Expression)
6452
extends ExtractIntervalPart(child, ByteType, getMonths, "getMonths")
6553

@@ -75,38 +63,18 @@ case class ExtractIntervalMinutes(child: Expression)
7563
case class ExtractIntervalSeconds(child: Expression)
7664
extends ExtractIntervalPart(child, DecimalType(8, 6), getSeconds, "getSeconds")
7765

78-
case class ExtractIntervalMilliseconds(child: Expression)
79-
extends ExtractIntervalPart(child, DecimalType(8, 3), getMilliseconds, "getMilliseconds")
80-
81-
case class ExtractIntervalMicroseconds(child: Expression)
82-
extends ExtractIntervalPart(child, LongType, getMicroseconds, "getMicroseconds")
83-
84-
// Number of seconds in 10000 years is 315576000001 (30 days per one month)
85-
// which is 12 digits + 6 digits for the fractional part of seconds.
86-
case class ExtractIntervalEpoch(child: Expression)
87-
extends ExtractIntervalPart(child, DecimalType(18, 6), getEpoch, "getEpoch")
88-
8966
object ExtractIntervalPart {
9067

9168
def parseExtractField(
9269
extractField: String,
9370
source: Expression,
9471
errorHandleFunc: => Nothing): Expression = extractField.toUpperCase(Locale.ROOT) match {
95-
case "MILLENNIUM" | "MILLENNIA" | "MIL" | "MILS" => ExtractIntervalMillenniums(source)
96-
case "CENTURY" | "CENTURIES" | "C" | "CENT" => ExtractIntervalCenturies(source)
97-
case "DECADE" | "DECADES" | "DEC" | "DECS" => ExtractIntervalDecades(source)
9872
case "YEAR" | "Y" | "YEARS" | "YR" | "YRS" => ExtractIntervalYears(source)
99-
case "QUARTER" | "QTR" => ExtractIntervalQuarters(source)
10073
case "MONTH" | "MON" | "MONS" | "MONTHS" => ExtractIntervalMonths(source)
10174
case "DAY" | "D" | "DAYS" => ExtractIntervalDays(source)
10275
case "HOUR" | "H" | "HOURS" | "HR" | "HRS" => ExtractIntervalHours(source)
10376
case "MINUTE" | "M" | "MIN" | "MINS" | "MINUTES" => ExtractIntervalMinutes(source)
10477
case "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => ExtractIntervalSeconds(source)
105-
case "MILLISECONDS" | "MSEC" | "MSECS" | "MILLISECON" | "MSECONDS" | "MS" =>
106-
ExtractIntervalMilliseconds(source)
107-
case "MICROSECONDS" | "USEC" | "USECS" | "USECONDS" | "MICROSECON" | "US" =>
108-
ExtractIntervalMicroseconds(source)
109-
case "EPOCH" => ExtractIntervalEpoch(source)
11078
case _ => errorHandleFunc
11179
}
11280
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala

Lines changed: 1 addition & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -50,26 +50,10 @@ object IntervalUtils {
5050
interval.months / MONTHS_PER_YEAR
5151
}
5252

53-
def getMillenniums(interval: CalendarInterval): Int = {
54-
getYears(interval) / YEARS_PER_MILLENNIUM
55-
}
56-
57-
def getCenturies(interval: CalendarInterval): Int = {
58-
getYears(interval) / YEARS_PER_CENTURY
59-
}
60-
61-
def getDecades(interval: CalendarInterval): Int = {
62-
getYears(interval) / YEARS_PER_DECADE
63-
}
64-
6553
def getMonths(interval: CalendarInterval): Byte = {
6654
(interval.months % MONTHS_PER_YEAR).toByte
6755
}
6856

69-
def getQuarters(interval: CalendarInterval): Byte = {
70-
(getMonths(interval) / MONTHS_PER_QUARTER + 1).toByte
71-
}
72-
7357
def getDays(interval: CalendarInterval): Int = {
7458
interval.days
7559
}
@@ -82,25 +66,8 @@ object IntervalUtils {
8266
((interval.microseconds % MICROS_PER_HOUR) / MICROS_PER_MINUTE).toByte
8367
}
8468

85-
def getMicroseconds(interval: CalendarInterval): Long = {
86-
interval.microseconds % MICROS_PER_MINUTE
87-
}
88-
8969
def getSeconds(interval: CalendarInterval): Decimal = {
90-
Decimal(getMicroseconds(interval), 8, 6)
91-
}
92-
93-
def getMilliseconds(interval: CalendarInterval): Decimal = {
94-
Decimal(getMicroseconds(interval), 8, 3)
95-
}
96-
97-
// Returns total number of seconds with microseconds fractional part in the given interval.
98-
def getEpoch(interval: CalendarInterval): Decimal = {
99-
var result = interval.microseconds
100-
result += MICROS_PER_DAY * interval.days
101-
result += MICROS_PER_YEAR * (interval.months / MONTHS_PER_YEAR)
102-
result += MICROS_PER_MONTH * (interval.months % MONTHS_PER_YEAR)
103-
Decimal(result, 18, 6)
70+
Decimal(interval.microseconds % MICROS_PER_MINUTE, 8, 6)
10471
}
10572

10673
private def toLongWithRange(

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala

Lines changed: 0 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -34,42 +34,6 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
3434
Literal(stringToInterval( "interval " + s))
3535
}
3636

37-
test("millenniums") {
38-
checkEvaluation(ExtractIntervalMillenniums("0 years"), 0)
39-
checkEvaluation(ExtractIntervalMillenniums("9999 years"), 9)
40-
checkEvaluation(ExtractIntervalMillenniums("1000 years"), 1)
41-
checkEvaluation(ExtractIntervalMillenniums("-2000 years"), -2)
42-
// Microseconds part must not be taken into account
43-
checkEvaluation(ExtractIntervalMillenniums("999 years 400 days"), 0)
44-
// Millennium must be taken from years and months
45-
checkEvaluation(ExtractIntervalMillenniums("999 years 12 months"), 1)
46-
checkEvaluation(ExtractIntervalMillenniums("1000 years -1 months"), 0)
47-
}
48-
49-
test("centuries") {
50-
checkEvaluation(ExtractIntervalCenturies("0 years"), 0)
51-
checkEvaluation(ExtractIntervalCenturies("9999 years"), 99)
52-
checkEvaluation(ExtractIntervalCenturies("1000 years"), 10)
53-
checkEvaluation(ExtractIntervalCenturies("-2000 years"), -20)
54-
// Microseconds part must not be taken into account
55-
checkEvaluation(ExtractIntervalCenturies("99 years 400 days"), 0)
56-
// Century must be taken from years and months
57-
checkEvaluation(ExtractIntervalCenturies("99 years 12 months"), 1)
58-
checkEvaluation(ExtractIntervalCenturies("100 years -1 months"), 0)
59-
}
60-
61-
test("decades") {
62-
checkEvaluation(ExtractIntervalDecades("0 years"), 0)
63-
checkEvaluation(ExtractIntervalDecades("9999 years"), 999)
64-
checkEvaluation(ExtractIntervalDecades("1000 years"), 100)
65-
checkEvaluation(ExtractIntervalDecades("-2000 years"), -200)
66-
// Microseconds part must not be taken into account
67-
checkEvaluation(ExtractIntervalDecades("9 years 400 days"), 0)
68-
// Decade must be taken from years and months
69-
checkEvaluation(ExtractIntervalDecades("9 years 12 months"), 1)
70-
checkEvaluation(ExtractIntervalDecades("10 years -1 months"), 0)
71-
}
72-
7337
test("years") {
7438
checkEvaluation(ExtractIntervalYears("0 years"), 0)
7539
checkEvaluation(ExtractIntervalYears("9999 years"), 9999)
@@ -82,19 +46,6 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
8246
checkEvaluation(ExtractIntervalYears("10 years -1 months"), 9)
8347
}
8448

85-
test("quarters") {
86-
checkEvaluation(ExtractIntervalQuarters("0 months"), 1.toByte)
87-
checkEvaluation(ExtractIntervalQuarters("1 months"), 1.toByte)
88-
checkEvaluation(ExtractIntervalQuarters("-1 months"), 1.toByte)
89-
checkEvaluation(ExtractIntervalQuarters("2 months"), 1.toByte)
90-
checkEvaluation(ExtractIntervalQuarters("-2 months"), 1.toByte)
91-
checkEvaluation(ExtractIntervalQuarters("1 years -1 months"), 4.toByte)
92-
checkEvaluation(ExtractIntervalQuarters("-1 years 1 months"), -2.toByte)
93-
checkEvaluation(ExtractIntervalQuarters("2 years 3 months"), 2.toByte)
94-
checkEvaluation(ExtractIntervalQuarters("-2 years -3 months"), 0.toByte)
95-
checkEvaluation(ExtractIntervalQuarters("9999 years"), 1.toByte)
96-
}
97-
9849
test("months") {
9950
checkEvaluation(ExtractIntervalMonths("0 year"), 0.toByte)
10051
for (m <- -24 to 24) {
@@ -158,46 +109,6 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
158109
checkEvaluation(ExtractIntervalSeconds("61 seconds 1 microseconds"), Decimal(1000001, 8, 6))
159110
}
160111

161-
test("milliseconds") {
162-
checkEvaluation(ExtractIntervalMilliseconds("0 milliseconds"), Decimal(0, 8, 3))
163-
checkEvaluation(ExtractIntervalMilliseconds("1 milliseconds"), Decimal(1.0, 8, 3))
164-
checkEvaluation(ExtractIntervalMilliseconds("-1 milliseconds"), Decimal(-1.0, 8, 3))
165-
checkEvaluation(
166-
ExtractIntervalMilliseconds("1 second 999 milliseconds"),
167-
Decimal(1999.0, 8, 3))
168-
checkEvaluation(
169-
ExtractIntervalMilliseconds("999 milliseconds 1 microsecond"),
170-
Decimal(999.001, 8, 3))
171-
checkEvaluation(
172-
ExtractIntervalMilliseconds("-1 second -999 milliseconds"),
173-
Decimal(-1999.0, 8, 3))
174-
// Years and months must not be taken into account
175-
checkEvaluation(ExtractIntervalMilliseconds("100 year 1 millisecond"), Decimal(1.0, 8, 3))
176-
checkEvaluation(ExtractIntervalMilliseconds(largeInterval), Decimal(59999.999, 8, 3))
177-
}
178-
179-
test("microseconds") {
180-
checkEvaluation(ExtractIntervalMicroseconds("0 microseconds"), 0L)
181-
checkEvaluation(ExtractIntervalMicroseconds("1 microseconds"), 1L)
182-
checkEvaluation(ExtractIntervalMicroseconds("-1 microseconds"), -1L)
183-
checkEvaluation(ExtractIntervalMicroseconds("1 second 999 microseconds"), 1000999L)
184-
checkEvaluation(ExtractIntervalMicroseconds("999 milliseconds 1 microseconds"), 999001L)
185-
checkEvaluation(ExtractIntervalMicroseconds("-1 second -999 microseconds"), -1000999L)
186-
// Years and months must not be taken into account
187-
checkEvaluation(ExtractIntervalMicroseconds("11 year 1 microseconds"), 1L)
188-
checkEvaluation(ExtractIntervalMicroseconds(largeInterval), 59999999L)
189-
}
190-
191-
test("epoch") {
192-
checkEvaluation(ExtractIntervalEpoch("0 months"), Decimal(0.0, 18, 6))
193-
checkEvaluation(ExtractIntervalEpoch("10000 years"), Decimal(315576000000.0, 18, 6))
194-
checkEvaluation(ExtractIntervalEpoch("1 year"), Decimal(31557600.0, 18, 6))
195-
checkEvaluation(ExtractIntervalEpoch("-1 year"), Decimal(-31557600.0, 18, 6))
196-
checkEvaluation(
197-
ExtractIntervalEpoch("1 second 1 millisecond 1 microsecond"),
198-
Decimal(1.001001, 18, 6))
199-
}
200-
201112
test("multiply") {
202113
def check(
203114
interval: String,

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ class RebaseDateTimeSuite extends SparkFunSuite with Matchers with SQLHelper {
213213
val rebased = rebaseGregorianToJulianMicros(zid, micros)
214214
val rebasedAndOptimized = rebaseGregorianToJulianMicros(micros)
215215
assert(rebasedAndOptimized === rebased)
216-
micros += (MICROS_PER_MONTH * (0.5 + Math.random())).toLong
216+
micros += (MICROS_PER_DAY * 30 * (0.5 + Math.random())).toLong
217217
} while (micros <= end)
218218
}
219219
}
@@ -233,7 +233,7 @@ class RebaseDateTimeSuite extends SparkFunSuite with Matchers with SQLHelper {
233233
val rebased = rebaseJulianToGregorianMicros(zid, micros)
234234
val rebasedAndOptimized = rebaseJulianToGregorianMicros(micros)
235235
assert(rebasedAndOptimized === rebased)
236-
micros += (MICROS_PER_MONTH * (0.5 + Math.random())).toLong
236+
micros += (MICROS_PER_DAY * 30 * (0.5 + Math.random())).toLong
237237
} while (micros <= end)
238238
}
239239
}

sql/core/src/test/resources/sql-tests/inputs/date_part.sql

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -71,30 +71,12 @@ select date_part(null, c) from t;
7171

7272
CREATE TEMPORARY VIEW t2 AS select interval 1010 year 9 month 8 day 7 hour 6 minute 5 second 4 millisecond 3 microsecond as c;
7373

74-
select date_part('millennium', c) from t2;
75-
select date_part('millennia', c) from t2;
76-
select date_part('mil', c) from t2;
77-
select date_part('mils', c) from t2;
78-
79-
select date_part('century', c) from t2;
80-
select date_part('centuries', c) from t2;
81-
select date_part('c', c) from t2;
82-
select date_part('cent', c) from t2;
83-
84-
select date_part('decade', c) from t2;
85-
select date_part('decades', c) from t2;
86-
select date_part('dec', c) from t2;
87-
select date_part('decs', c) from t2;
88-
8974
select date_part('year', c) from t2;
9075
select date_part('y', c) from t2;
9176
select date_part('years', c) from t2;
9277
select date_part('yr', c) from t2;
9378
select date_part('yrs', c) from t2;
9479

95-
select date_part('quarter', c) from t2;
96-
select date_part('qtr', c) from t2;
97-
9880
select date_part('month', c) from t2;
9981
select date_part('mon', c) from t2;
10082
select date_part('mons', c) from t2;
@@ -122,22 +104,6 @@ select date_part('sec', c) from t2;
122104
select date_part('seconds', c) from t2;
123105
select date_part('secs', c) from t2;
124106

125-
select date_part('milliseconds', c) from t2;
126-
select date_part('msec', c) from t2;
127-
select date_part('msecs', c) from t2;
128-
select date_part('millisecon', c) from t2;
129-
select date_part('mseconds', c) from t2;
130-
select date_part('ms', c) from t2;
131-
132-
select date_part('microseconds', c) from t2;
133-
select date_part('usec', c) from t2;
134-
select date_part('usecs', c) from t2;
135-
select date_part('useconds', c) from t2;
136-
select date_part('microsecon', c) from t2;
137-
select date_part('us', c) from t2;
138-
139-
select date_part('epoch', c) from t2;
140-
141107
select date_part('not_supported', c) from t2;
142108

143109
select date_part(c, c) from t2;

0 commit comments

Comments
 (0)