Skip to content

Commit ff13f57

Browse files
gengliangwangcloud-fan
authored andcommitted
[SPARK-20044][SQL] Add new function DATE_FROM_UNIX_DATE and UNIX_DATE
### What changes were proposed in this pull request? Add new functions DATE_FROM_UNIX_DATE and UNIX_DATE for conversion between Date type and Numeric types. ### Why are the changes needed? 1. Explicit conversion between Date type and Numeric types is disallowed in ANSI mode. We need to provide new functions for users to complete the conversion. 2. We have introduced new functions from Bigquery for conversion between Timestamp type and Numeric types: TIMESTAMP_SECONDS, TIMESTAMP_MILLIS, TIMESTAMP_MICROS , UNIX_SECONDS, UNIX_MILLIS, and UNIX_MICROS. It makes sense to add functions for conversion between Date type and Numeric types as well. ### Does this PR introduce _any_ user-facing change? Yes, two new datetime functions are added. ### How was this patch tested? Unit tests Closes #30588 from gengliangwang/dateToNumber. Authored-by: Gengliang Wang <gengliang.wang@databricks.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent 3b2ff16 commit ff13f57

File tree

8 files changed

+130
-5
lines changed

8 files changed

+130
-5
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,8 @@ object FunctionRegistry {
427427
expression[MakeInterval]("make_interval"),
428428
expression[DatePart]("date_part"),
429429
expression[Extract]("extract"),
430+
expression[DateFromUnixDate]("date_from_unix_date"),
431+
expression[UnixDate]("unix_date"),
430432
expression[SecondsToTimestamp]("timestamp_seconds"),
431433
expression[MillisToTimestamp]("timestamp_millis"),
432434
expression[MicrosToTimestamp]("timestamp_micros"),

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,52 @@ case class DayOfYear(child: Expression) extends GetDateField {
400400
override val funcName = "getDayInYear"
401401
}
402402

403+
@ExpressionDescription(
404+
usage = "_FUNC_(days) - Create date from the number of days since 1970-01-01.",
405+
examples = """
406+
Examples:
407+
> SELECT _FUNC_(1);
408+
1970-01-02
409+
""",
410+
group = "datetime_funcs",
411+
since = "3.1.0")
412+
case class DateFromUnixDate(child: Expression) extends UnaryExpression
413+
with ImplicitCastInputTypes with NullIntolerant {
414+
override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType)
415+
416+
override def dataType: DataType = DateType
417+
418+
override def nullSafeEval(input: Any): Any = input.asInstanceOf[Int]
419+
420+
override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
421+
defineCodeGen(ctx, ev, c => c)
422+
423+
override def prettyName: String = "date_from_unix_date"
424+
}
425+
426+
@ExpressionDescription(
427+
usage = "_FUNC_(date) - Returns the number of days since 1970-01-01.",
428+
examples = """
429+
Examples:
430+
> SELECT _FUNC_(DATE("1970-01-02"));
431+
1
432+
""",
433+
group = "datetime_funcs",
434+
since = "3.1.0")
435+
case class UnixDate(child: Expression) extends UnaryExpression
436+
with ExpectsInputTypes with NullIntolerant {
437+
override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
438+
439+
override def dataType: DataType = IntegerType
440+
441+
override def nullSafeEval(input: Any): Any = input.asInstanceOf[Int]
442+
443+
override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
444+
defineCodeGen(ctx, ev, c => c)
445+
446+
override def prettyName: String = "unix_date"
447+
}
448+
403449
abstract class IntegralToTimestampBase extends UnaryExpression
404450
with ExpectsInputTypes with NullIntolerant {
405451

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1245,6 +1245,30 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
12451245
checkResult(Int.MinValue.toLong - 100)
12461246
}
12471247

1248+
test("DATE_FROM_UNIX_DATE") {
1249+
def testIntegralFunc(value: Number): Unit = {
1250+
checkEvaluation(
1251+
DateFromUnixDate(Literal(value.intValue())),
1252+
LocalDate.ofEpochDay(value.intValue()))
1253+
}
1254+
// test null input
1255+
checkEvaluation(DateFromUnixDate(Literal(null, IntegerType)), null)
1256+
// test integral input
1257+
testIntegralInput(testIntegralFunc)
1258+
}
1259+
1260+
test("UNIX_DATE") {
1261+
def testIntegralFunc(value: Number): Unit = {
1262+
checkEvaluation(
1263+
UnixDate(Literal(LocalDate.ofEpochDay(value.intValue()))),
1264+
value.intValue())
1265+
}
1266+
// test null input
1267+
checkEvaluation(UnixDate(Literal(null, DateType)), null)
1268+
// test various inputs
1269+
testIntegralInput(testIntegralFunc)
1270+
}
1271+
12481272
test("UNIX_SECONDS") {
12491273
checkEvaluation(UnixSeconds(Literal(null, TimestampType)), null)
12501274
var timestamp = Literal(new Timestamp(0L))

sql/core/src/test/resources/sql-functions/sql-expression-schema.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<!-- Automatically generated by ExpressionsSchemaSuite -->
22
## Summary
3-
- Number of queries: 345
3+
- Number of queries: 347
44
- Number of expressions that missing example: 13
55
- Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,window
66
## Schema of Built-in Functions
@@ -91,6 +91,7 @@
9191
| org.apache.spark.sql.catalyst.expressions.DateAdd | date_add | SELECT date_add('2016-07-30', 1) | struct<date_add(CAST(2016-07-30 AS DATE), 1):date> |
9292
| org.apache.spark.sql.catalyst.expressions.DateDiff | datediff | SELECT datediff('2009-07-31', '2009-07-30') | struct<datediff(CAST(2009-07-31 AS DATE), CAST(2009-07-30 AS DATE)):int> |
9393
| org.apache.spark.sql.catalyst.expressions.DateFormatClass | date_format | SELECT date_format('2016-04-08', 'y') | struct<date_format(CAST(2016-04-08 AS TIMESTAMP), y):string> |
94+
| org.apache.spark.sql.catalyst.expressions.DateFromUnixDate | date_from_unix_date | SELECT date_from_unix_date(1) | struct<date_from_unix_date(1):date> |
9495
| org.apache.spark.sql.catalyst.expressions.DatePart | date_part | SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') | struct<date_part(YEAR, TIMESTAMP '2019-08-12 01:00:00.123456'):int> |
9596
| org.apache.spark.sql.catalyst.expressions.DateSub | date_sub | SELECT date_sub('2016-07-30', 1) | struct<date_sub(CAST(2016-07-30 AS DATE), 1):date> |
9697
| org.apache.spark.sql.catalyst.expressions.DayOfMonth | day | SELECT day('2009-07-30') | struct<day(CAST(2009-07-30 AS DATE)):int> |
@@ -289,6 +290,7 @@
289290
| org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct<negative(1):int> |
290291
| org.apache.spark.sql.catalyst.expressions.UnaryPositive | positive | SELECT positive(1) | struct<(+ 1):int> |
291292
| org.apache.spark.sql.catalyst.expressions.Unhex | unhex | SELECT decode(unhex('537061726B2053514C'), 'UTF-8') | struct<decode(unhex(537061726B2053514C), UTF-8):string> |
293+
| org.apache.spark.sql.catalyst.expressions.UnixDate | unix_date | SELECT unix_date(DATE("1970-01-02")) | struct<unix_date(CAST(1970-01-02 AS DATE)):int> |
292294
| org.apache.spark.sql.catalyst.expressions.UnixMicros | unix_micros | SELECT unix_micros(TIMESTAMP('1970-01-01 00:00:01Z')) | struct<unix_micros(CAST(1970-01-01 00:00:01Z AS TIMESTAMP)):bigint> |
293295
| org.apache.spark.sql.catalyst.expressions.UnixMillis | unix_millis | SELECT unix_millis(TIMESTAMP('1970-01-01 00:00:01Z')) | struct<unix_millis(CAST(1970-01-01 00:00:01Z AS TIMESTAMP)):bigint> |
294296
| org.apache.spark.sql.catalyst.expressions.UnixSeconds | unix_seconds | SELECT unix_seconds(TIMESTAMP('1970-01-01 00:00:01Z')) | struct<unix_seconds(CAST(1970-01-01 00:00:01Z AS TIMESTAMP)):bigint> |

sql/core/src/test/resources/sql-tests/inputs/datetime.sql

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@ select TIMESTAMP_SECONDS(0.1234567d), TIMESTAMP_SECONDS(FLOAT(0.1234567));
1818
select UNIX_SECONDS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_SECONDS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_SECONDS(null);
1919
select UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MILLIS(null);
2020
select UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MICROS(null);
21-
21+
-- DATE_FROM_UNIX_DATE
22+
select DATE_FROM_UNIX_DATE(0), DATE_FROM_UNIX_DATE(1000), DATE_FROM_UNIX_DATE(null);
23+
-- UNIX_DATE
24+
select UNIX_DATE(DATE('1970-01-01')), UNIX_DATE(DATE('2020-12-04')), UNIX_DATE(null);
2225
-- [SPARK-16836] current_date and current_timestamp literals
2326
select current_date = current_date(), current_timestamp = current_timestamp();
2427

sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- Automatically generated by SQLQueryTestSuite
2-
-- Number of queries: 120
2+
-- Number of queries: 122
33

44

55
-- !query
@@ -111,6 +111,22 @@ struct<unix_micros(CAST(2020-12-01 14:30:08Z AS TIMESTAMP)):bigint,unix_micros(C
111111
1606833008000000 1606833008999999 NULL
112112

113113

114+
-- !query
115+
select DATE_FROM_UNIX_DATE(0), DATE_FROM_UNIX_DATE(1000), DATE_FROM_UNIX_DATE(null)
116+
-- !query schema
117+
struct<date_from_unix_date(0):date,date_from_unix_date(1000):date,date_from_unix_date(CAST(NULL AS INT)):date>
118+
-- !query output
119+
1970-01-01 1972-09-27 NULL
120+
121+
122+
-- !query
123+
select UNIX_DATE(DATE('1970-01-01')), UNIX_DATE(DATE('2020-12-04')), UNIX_DATE(null)
124+
-- !query schema
125+
struct<unix_date(CAST(1970-01-01 AS DATE)):int,unix_date(CAST(2020-12-04 AS DATE)):int,unix_date(CAST(NULL AS DATE)):int>
126+
-- !query output
127+
0 18600 NULL
128+
129+
114130
-- !query
115131
select current_date = current_date(), current_timestamp = current_timestamp()
116132
-- !query schema

sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- Automatically generated by SQLQueryTestSuite
2-
-- Number of queries: 120
2+
-- Number of queries: 122
33

44

55
-- !query
@@ -111,6 +111,22 @@ struct<unix_micros(CAST(2020-12-01 14:30:08Z AS TIMESTAMP)):bigint,unix_micros(C
111111
1606833008000000 1606833008999999 NULL
112112

113113

114+
-- !query
115+
select DATE_FROM_UNIX_DATE(0), DATE_FROM_UNIX_DATE(1000), DATE_FROM_UNIX_DATE(null)
116+
-- !query schema
117+
struct<date_from_unix_date(0):date,date_from_unix_date(1000):date,date_from_unix_date(CAST(NULL AS INT)):date>
118+
-- !query output
119+
1970-01-01 1972-09-27 NULL
120+
121+
122+
-- !query
123+
select UNIX_DATE(DATE('1970-01-01')), UNIX_DATE(DATE('2020-12-04')), UNIX_DATE(null)
124+
-- !query schema
125+
struct<unix_date(CAST(1970-01-01 AS DATE)):int,unix_date(CAST(2020-12-04 AS DATE)):int,unix_date(CAST(NULL AS DATE)):int>
126+
-- !query output
127+
0 18600 NULL
128+
129+
114130
-- !query
115131
select current_date = current_date(), current_timestamp = current_timestamp()
116132
-- !query schema

sql/core/src/test/resources/sql-tests/results/datetime.sql.out

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- Automatically generated by SQLQueryTestSuite
2-
-- Number of queries: 120
2+
-- Number of queries: 122
33

44

55
-- !query
@@ -111,6 +111,22 @@ struct<unix_micros(CAST(2020-12-01 14:30:08Z AS TIMESTAMP)):bigint,unix_micros(C
111111
1606833008000000 1606833008999999 NULL
112112

113113

114+
-- !query
115+
select DATE_FROM_UNIX_DATE(0), DATE_FROM_UNIX_DATE(1000), DATE_FROM_UNIX_DATE(null)
116+
-- !query schema
117+
struct<date_from_unix_date(0):date,date_from_unix_date(1000):date,date_from_unix_date(CAST(NULL AS INT)):date>
118+
-- !query output
119+
1970-01-01 1972-09-27 NULL
120+
121+
122+
-- !query
123+
select UNIX_DATE(DATE('1970-01-01')), UNIX_DATE(DATE('2020-12-04')), UNIX_DATE(null)
124+
-- !query schema
125+
struct<unix_date(CAST(1970-01-01 AS DATE)):int,unix_date(CAST(2020-12-04 AS DATE)):int,unix_date(CAST(NULL AS DATE)):int>
126+
-- !query output
127+
0 18600 NULL
128+
129+
114130
-- !query
115131
select current_date = current_date(), current_timestamp = current_timestamp()
116132
-- !query schema

0 commit comments

Comments
 (0)