Skip to content

Commit 051e691

Browse files
MaxGekkHyukjinKwon
authored andcommitted
[SPARK-28141][SQL] Support special date values
### What changes were proposed in this pull request? Supported special string values for `DATE` type. They are simply notational shorthands that will be converted to ordinary date values when read. The following string values are supported: - `epoch [zoneId]` - `1970-01-01` - `today [zoneId]` - the current date in the time zone specified by `spark.sql.session.timeZone`. - `yesterday [zoneId]` - the current date -1 - `tomorrow [zoneId]` - the current date + 1 - `now` - the date of running the current query. It has the same notion as `today`. For example: ```sql spark-sql> SELECT date 'tomorrow' - date 'yesterday'; 2 ``` ### Why are the changes needed? To maintain feature parity with PostgreSQL, see [8.5.1.4. Special Values](https://www.postgresql.org/docs/12/datatype-datetime.html) ### Does this PR introduce any user-facing change? Previously, the parser fails on the special values with the error: ```sql spark-sql> select date 'today'; Error in query: Cannot parse the DATE value: today(line 1, pos 7) ``` After the changes, the special values are converted to appropriate dates: ```sql spark-sql> select date 'today'; 2019-09-06 ``` ### How was this patch tested? - Added tests to `DateFormatterSuite` to check parsing special values from regular strings. - Tests in `DateTimeUtilsSuite` check parsing those values from `UTF8String` - Uncommented tests in `date.sql` Closes #25708 from MaxGekk/datetime-special-values. Authored-by: Maxim Gekk <max.gekk@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
1 parent e2c4787 commit 051e691

File tree

23 files changed

+400
-225
lines changed

23 files changed

+400
-225
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ object CatalogColumnStat extends Logging {
491491
dataType match {
492492
case BooleanType => s.toBoolean
493493
case DateType if version == 1 => DateTimeUtils.fromJavaDate(java.sql.Date.valueOf(s))
494-
case DateType => DateFormatter().parse(s)
494+
case DateType => DateFormatter(ZoneOffset.UTC).parse(s)
495495
case TimestampType if version == 1 =>
496496
DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf(s))
497497
case TimestampType => getTimestampFormatter().parse(s)
@@ -516,7 +516,7 @@ object CatalogColumnStat extends Logging {
516516
*/
517517
def toExternalString(v: Any, colName: String, dataType: DataType): String = {
518518
val externalValue = dataType match {
519-
case DateType => DateFormatter().format(v.asInstanceOf[Int])
519+
case DateType => DateFormatter(ZoneOffset.UTC).format(v.asInstanceOf[Int])
520520
case TimestampType => getTimestampFormatter().format(v.asInstanceOf[Long])
521521
case BooleanType | _: IntegralType | FloatType | DoubleType => v
522522
case _: DecimalType => v.asInstanceOf[Decimal].toJavaBigDecimal

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,10 @@ class UnivocityGenerator(
4545
options.timestampFormat,
4646
options.zoneId,
4747
options.locale)
48-
private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
48+
private val dateFormatter = DateFormatter(
49+
options.dateFormat,
50+
options.zoneId,
51+
options.locale)
4952

5053
private def makeConverter(dataType: DataType): ValueConverter = dataType match {
5154
case DateType =>

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,10 @@ class UnivocityParser(
7878
options.timestampFormat,
7979
options.zoneId,
8080
options.locale)
81-
private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
81+
private val dateFormatter = DateFormatter(
82+
options.dateFormat,
83+
options.zoneId,
84+
options.locale)
8285

8386
// Retrieve the raw record string.
8487
private def getCurrentInput: UTF8String = {

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ object Cast {
106106
* * Cast.castToTimestamp
107107
*/
108108
def needsTimeZone(from: DataType, to: DataType): Boolean = (from, to) match {
109-
case (StringType, TimestampType) => true
109+
case (StringType, TimestampType | DateType) => true
110110
case (DateType, TimestampType) => true
111111
case (TimestampType, StringType) => true
112112
case (TimestampType, DateType) => true
@@ -287,7 +287,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
287287
// [[func]] assumes the input is no longer null because eval already does the null check.
288288
@inline private[this] def buildCast[T](a: Any, func: T => Any): Any = func(a.asInstanceOf[T])
289289

290-
private lazy val dateFormatter = DateFormatter()
290+
private lazy val dateFormatter = DateFormatter(zoneId)
291291
private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId)
292292
private val failOnIntegralTypeOverflow = SQLConf.get.ansiEnabled
293293

@@ -469,7 +469,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
469469
// DateConverter
470470
private[this] def castToDate(from: DataType): Any => Any = from match {
471471
case StringType =>
472-
buildCast[UTF8String](_, s => DateTimeUtils.stringToDate(s).orNull)
472+
buildCast[UTF8String](_, s => DateTimeUtils.stringToDate(s, zoneId).orNull)
473473
case TimestampType =>
474474
// throw valid precision more than seconds, according to Hive.
475475
// Timestamp.nanos is in 0 to 999,999,999, no more than a second.
@@ -1056,28 +1056,35 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
10561056

10571057
private[this] def castToDateCode(
10581058
from: DataType,
1059-
ctx: CodegenContext): CastFunction = from match {
1060-
case StringType =>
1061-
val intOpt = ctx.freshVariable("intOpt", classOf[Option[Integer]])
1062-
(c, evPrim, evNull) => code"""
1063-
scala.Option<Integer> $intOpt =
1064-
org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDate($c);
1065-
if ($intOpt.isDefined()) {
1066-
$evPrim = ((Integer) $intOpt.get()).intValue();
1067-
} else {
1068-
$evNull = true;
1069-
}
1070-
"""
1071-
case TimestampType =>
1059+
ctx: CodegenContext): CastFunction = {
1060+
def getZoneId() = {
10721061
val zoneIdClass = classOf[ZoneId]
1073-
val zid = JavaCode.global(
1062+
JavaCode.global(
10741063
ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName),
10751064
zoneIdClass)
1076-
(c, evPrim, evNull) =>
1077-
code"""$evPrim =
1078-
org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToEpochDays($c, $zid);"""
1079-
case _ =>
1080-
(c, evPrim, evNull) => code"$evNull = true;"
1065+
}
1066+
from match {
1067+
case StringType =>
1068+
val intOpt = ctx.freshVariable("intOpt", classOf[Option[Integer]])
1069+
val zid = getZoneId()
1070+
(c, evPrim, evNull) =>
1071+
code"""
1072+
scala.Option<Integer> $intOpt =
1073+
org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDate($c, $zid);
1074+
if ($intOpt.isDefined()) {
1075+
$evPrim = ((Integer) $intOpt.get()).intValue();
1076+
} else {
1077+
$evNull = true;
1078+
}
1079+
"""
1080+
case TimestampType =>
1081+
val zid = getZoneId()
1082+
(c, evPrim, evNull) =>
1083+
code"""$evPrim =
1084+
org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToEpochDays($c, $zid);"""
1085+
case _ =>
1086+
(c, evPrim, evNull) => code"$evNull = true;"
1087+
}
10811088
}
10821089

10831090
private[this] def changePrecision(d: ExprValue, decimalType: DecimalType,

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,7 @@ case class CurrentDate(timeZoneId: Option[String] = None)
7676
override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
7777
copy(timeZoneId = Option(timeZoneId))
7878

79-
override def eval(input: InternalRow): Any = {
80-
localDateToDays(LocalDate.now(zoneId))
81-
}
79+
override def eval(input: InternalRow): Any = currentDate(zoneId)
8280

8381
override def prettyName: String = "current_date"
8482
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,9 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression {
371371
case _ => v + "D"
372372
}
373373
case (v: Decimal, t: DecimalType) => v + "BD"
374-
case (v: Int, DateType) => s"DATE '${DateFormatter().format(v)}'"
374+
case (v: Int, DateType) =>
375+
val formatter = DateFormatter(DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone))
376+
s"DATE '${formatter.format(v)}'"
375377
case (v: Long, TimestampType) =>
376378
val formatter = TimestampFormatter.getFractionFormatter(
377379
DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone))

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,10 @@ private[sql] class JacksonGenerator(
8181
options.timestampFormat,
8282
options.zoneId,
8383
options.locale)
84-
private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
84+
private val dateFormatter = DateFormatter(
85+
options.dateFormat,
86+
options.zoneId,
87+
options.locale)
8588

8689
private def makeWriter(dataType: DataType): ValueWriter = dataType match {
8790
case NullType =>

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,10 @@ class JacksonParser(
5959
options.timestampFormat,
6060
options.zoneId,
6161
options.locale)
62-
private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
62+
private val dateFormatter = DateFormatter(
63+
options.dateFormat,
64+
options.zoneId,
65+
options.locale)
6366

6467
/**
6568
* Create a converter which converts the JSON documents held by the `JsonParser`

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1734,7 +1734,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
17341734
}
17351735
try {
17361736
valueType match {
1737-
case "DATE" => toLiteral(stringToDate, DateType)
1737+
case "DATE" =>
1738+
toLiteral(stringToDate(_, getZoneId(SQLConf.get.sessionLocalTimeZone)), DateType)
17381739
case "TIMESTAMP" =>
17391740
val zoneId = getZoneId(SQLConf.get.sessionLocalTimeZone)
17401741
toLiteral(stringToTimestamp(_, zoneId), TimestampType)

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,24 +17,30 @@
1717

1818
package org.apache.spark.sql.catalyst.util
1919

20-
import java.time.LocalDate
20+
import java.time.{LocalDate, ZoneId}
2121
import java.util.Locale
2222

23+
import DateTimeUtils.{convertSpecialDate, localDateToDays}
24+
2325
sealed trait DateFormatter extends Serializable {
2426
def parse(s: String): Int // returns days since epoch
2527
def format(days: Int): String
2628
}
2729

2830
class Iso8601DateFormatter(
2931
pattern: String,
32+
zoneId: ZoneId,
3033
locale: Locale) extends DateFormatter with DateTimeFormatterHelper {
3134

3235
@transient
3336
private lazy val formatter = getOrCreateFormatter(pattern, locale)
3437

3538
override def parse(s: String): Int = {
36-
val localDate = LocalDate.parse(s, formatter)
37-
DateTimeUtils.localDateToDays(localDate)
39+
val specialDate = convertSpecialDate(s.trim, zoneId)
40+
specialDate.getOrElse {
41+
val localDate = LocalDate.parse(s, formatter)
42+
localDateToDays(localDate)
43+
}
3844
}
3945

4046
override def format(days: Int): String = {
@@ -46,11 +52,13 @@ object DateFormatter {
4652
val defaultPattern: String = "uuuu-MM-dd"
4753
val defaultLocale: Locale = Locale.US
4854

49-
def apply(format: String, locale: Locale): DateFormatter = {
50-
new Iso8601DateFormatter(format, locale)
55+
def apply(format: String, zoneId: ZoneId, locale: Locale): DateFormatter = {
56+
new Iso8601DateFormatter(format, zoneId, locale)
5157
}
5258

53-
def apply(format: String): DateFormatter = apply(format, defaultLocale)
59+
def apply(format: String, zoneId: ZoneId): DateFormatter = {
60+
apply(format, zoneId, defaultLocale)
61+
}
5462

55-
def apply(): DateFormatter = apply(defaultPattern)
63+
def apply(zoneId: ZoneId): DateFormatter = apply(defaultPattern, zoneId)
5664
}

0 commit comments

Comments
 (0)