-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-28141][SQL] Support special date values #25708
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c5034a8
695a6c6
ebc5b02
2663542
edda1f2
8d023ec
775c7ec
be787f4
f356b72
864e456
e237b27
0055a8a
4fb8834
9a2349b
6da7905
d57e11e
33befed
e2275d4
dabb6ec
ce7e04d
0fd86a0
92c5509
254567d
d61fdc7
03d3126
106524b
0b0e5d4
ff92531
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -106,7 +106,7 @@ object Cast { | |
| * * Cast.castToTimestamp | ||
| */ | ||
| def needsTimeZone(from: DataType, to: DataType): Boolean = (from, to) match { | ||
| case (StringType, TimestampType) => true | ||
| case (StringType, TimestampType | DateType) => true | ||
| case (DateType, TimestampType) => true | ||
| case (TimestampType, StringType) => true | ||
| case (TimestampType, DateType) => true | ||
|
|
@@ -287,7 +287,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String | |
| // [[func]] assumes the input is no longer null because eval already does the null check. | ||
| @inline private[this] def buildCast[T](a: Any, func: T => Any): Any = func(a.asInstanceOf[T]) | ||
|
|
||
| private lazy val dateFormatter = DateFormatter() | ||
| private lazy val dateFormatter = DateFormatter(zoneId) | ||
| private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId) | ||
| private val failOnIntegralTypeOverflow = SQLConf.get.failOnIntegralTypeOverflow | ||
|
|
||
|
|
@@ -469,7 +469,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String | |
| // DateConverter | ||
| private[this] def castToDate(from: DataType): Any => Any = from match { | ||
| case StringType => | ||
| buildCast[UTF8String](_, s => DateTimeUtils.stringToDate(s).orNull) | ||
| buildCast[UTF8String](_, s => DateTimeUtils.stringToDate(s, zoneId).orNull) | ||
| case TimestampType => | ||
| // throw valid precision more than seconds, according to Hive. | ||
| // Timestamp.nanos is in 0 to 999,999,999, no more than a second. | ||
|
|
@@ -1056,28 +1056,35 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String | |
|
|
||
| private[this] def castToDateCode( | ||
| from: DataType, | ||
| ctx: CodegenContext): CastFunction = from match { | ||
| case StringType => | ||
| val intOpt = ctx.freshVariable("intOpt", classOf[Option[Integer]]) | ||
| (c, evPrim, evNull) => code""" | ||
| scala.Option<Integer> $intOpt = | ||
| org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDate($c); | ||
| if ($intOpt.isDefined()) { | ||
| $evPrim = ((Integer) $intOpt.get()).intValue(); | ||
| } else { | ||
| $evNull = true; | ||
| } | ||
| """ | ||
| case TimestampType => | ||
| ctx: CodegenContext): CastFunction = { | ||
| def getZoneId() = { | ||
| val zoneIdClass = classOf[ZoneId] | ||
| val zid = JavaCode.global( | ||
| JavaCode.global( | ||
| ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), | ||
| zoneIdClass) | ||
| (c, evPrim, evNull) => | ||
| code"""$evPrim = | ||
| org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToEpochDays($c, $zid);""" | ||
| case _ => | ||
| (c, evPrim, evNull) => code"$evNull = true;" | ||
| } | ||
| from match { | ||
| case StringType => | ||
| val intOpt = ctx.freshVariable("intOpt", classOf[Option[Integer]]) | ||
| val zid = getZoneId() | ||
| (c, evPrim, evNull) => | ||
| code""" | ||
| scala.Option<Integer> $intOpt = | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. btw, (this is not related to this pr though), |
||
| org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDate($c, $zid); | ||
| if ($intOpt.isDefined()) { | ||
| $evPrim = ((Integer) $intOpt.get()).intValue(); | ||
| } else { | ||
| $evNull = true; | ||
| } | ||
| """ | ||
| case TimestampType => | ||
| val zid = getZoneId() | ||
| (c, evPrim, evNull) => | ||
| code"""$evPrim = | ||
| org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToEpochDays($c, $zid);""" | ||
| case _ => | ||
| (c, evPrim, evNull) => code"$evNull = true;" | ||
| } | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just put the gen'd code for other reviewers; |
||
|
|
||
| private[this] def changePrecision(d: ExprValue, decimalType: DecimalType, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,7 +24,6 @@ import java.time.temporal.{ChronoField, ChronoUnit, IsoFields} | |
| import java.util.{Locale, TimeZone} | ||
| import java.util.concurrent.TimeUnit._ | ||
|
|
||
| import scala.util.Try | ||
| import scala.util.control.NonFatal | ||
|
|
||
| import org.apache.spark.sql.types.Decimal | ||
|
|
@@ -379,14 +378,16 @@ object DateTimeUtils { | |
| * `yyyy-[m]m-[d]d *` | ||
| * `yyyy-[m]m-[d]dT*` | ||
| */ | ||
| def stringToDate(s: UTF8String): Option[SQLDate] = { | ||
| def stringToDate(s: UTF8String, zoneId: ZoneId): Option[SQLDate] = { | ||
| if (s == null) { | ||
| return None | ||
| } | ||
| val segments: Array[Int] = Array[Int](1, 1, 1) | ||
| var i = 0 | ||
| var currentSegmentValue = 0 | ||
| val bytes = s.trim.getBytes | ||
| val specialDate = convertSpecialDate(bytes, zoneId) | ||
| if (specialDate.isDefined) return specialDate | ||
| var j = 0 | ||
| while (j < bytes.length && (i < 3 && !(bytes(j) == ' ' || bytes(j) == 'T'))) { | ||
| val b = bytes(j) | ||
|
|
@@ -855,6 +856,8 @@ object DateTimeUtils { | |
|
|
||
| def currentTimestamp(): SQLTimestamp = instantToMicros(Instant.now()) | ||
|
|
||
| def currentDate(zoneId: ZoneId): SQLDate = localDateToDays(LocalDate.now(zoneId)) | ||
|
|
||
| private def today(zoneId: ZoneId): ZonedDateTime = { | ||
| Instant.now().atZone(zoneId).`with`(LocalTime.MIDNIGHT) | ||
| } | ||
|
|
@@ -915,4 +918,28 @@ object DateTimeUtils { | |
| None | ||
| } | ||
| } | ||
|
|
||
| /** | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. super nit: add a blank line.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added and renamed |
||
| * Converts notational shorthands that are converted to ordinary dates. | ||
| * @param input - a trimmed string | ||
| * @param zoneId - zone identifier used to get the current date. | ||
| * @return some of days since the epoch if the conversion completed successfully otherwise None. | ||
| */ | ||
| def convertSpecialDate(input: String, zoneId: ZoneId): Option[SQLDate] = { | ||
| extractSpecialValue(input, zoneId).flatMap { | ||
| case "epoch" => Some(0) | ||
| case "now" | "today" => Some(currentDate(zoneId)) | ||
| case "tomorrow" => Some(Math.addExact(currentDate(zoneId), 1)) | ||
| case "yesterday" => Some(Math.subtractExact(currentDate(zoneId), 1)) | ||
| case _ => None | ||
| } | ||
| } | ||
|
|
||
| private def convertSpecialDate(bytes: Array[Byte], zoneId: ZoneId): Option[SQLDate] = { | ||
| if (bytes.length > 0 && Character.isAlphabetic(bytes(0))) { | ||
| convertSpecialDate(new String(bytes, StandardCharsets.UTF_8), zoneId) | ||
| } else { | ||
| None | ||
| } | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.