Skip to content

Commit 60be6d2

Browse files
MaxGekksrowen
authored andcommitted
[SPARK-27109][SQL] Refactoring of TimestampFormatter and DateFormatter
## What changes were proposed in this pull request? In PR, I propose to refactor the `parse()` method of `Iso8601DateFormatter`/`Iso8601DateFormatter` and `toInstantWithZoneId` of `toInstantWithZoneId` to achieve the following: - Avoid unnecessary conversion of parsed input to `java.time.Instant` before converting it to micros and days. Necessary information exists in `ZonedDateTime` already, and micros/days can be extracted from the former one. - Avoid additional extraction of LocalTime from parsed object, more precisely, double query of `TemporalQueries.localTime` from `temporalAccessor`. - Avoid additional extraction of zone id from parsed object, in particular, double query of `TemporalQueries.offset()`. - Using `ZoneOffset.UTC` instead of `ZoneId.of` in `DateFormatter`. This allows to avoid looking for zone offset by zone id. ## How was this patch tested? By existing test suite `DateTimeUtilsSuite`, `TimestampFormatterSuite` and `DateFormatterSuite`. Closes #24030 from MaxGekk/query-localtime. Authored-by: Maxim Gekk <max.gekk@gmail.com> Signed-off-by: Sean Owen <sean.owen@databricks.com>
1 parent 1029bf9 commit 60be6d2

File tree

4 files changed

+33
-33
lines changed

4 files changed

+33
-33
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,9 @@
1717

1818
package org.apache.spark.sql.catalyst.util
1919

20-
import java.time.{Instant, ZoneId}
20+
import java.time.{Instant, ZoneOffset}
2121
import java.util.Locale
22-
23-
import org.apache.spark.sql.catalyst.util.DateTimeUtils.instantToDays
22+
import java.util.concurrent.TimeUnit.SECONDS
2423

2524
sealed trait DateFormatter extends Serializable {
2625
def parse(s: String): Int // returns days since epoch
@@ -33,18 +32,17 @@ class Iso8601DateFormatter(
3332

3433
@transient
3534
private lazy val formatter = getOrCreateFormatter(pattern, locale)
36-
private val UTC = ZoneId.of("UTC")
3735

38-
private def toInstant(s: String): Instant = {
39-
val temporalAccessor = formatter.parse(s)
40-
toInstantWithZoneId(temporalAccessor, UTC)
36+
override def parse(s: String): Int = {
37+
val parsed = formatter.parse(s)
38+
val zonedDateTime = toZonedDateTime(parsed, ZoneOffset.UTC)
39+
val seconds = zonedDateTime.toEpochSecond
40+
SECONDS.toDays(seconds).toInt
4141
}
4242

43-
override def parse(s: String): Int = instantToDays(toInstant(s))
44-
4543
override def format(days: Int): String = {
4644
val instant = Instant.ofEpochSecond(days * DateTimeUtils.SECONDS_PER_DAY)
47-
formatter.withZone(UTC).format(instant)
45+
formatter.withZone(ZoneOffset.UTC).format(instant)
4846
}
4947
}
5048

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,18 @@ import com.google.common.cache.CacheBuilder
2828
import org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper._
2929

3030
trait DateTimeFormatterHelper {
31-
protected def toInstantWithZoneId(temporalAccessor: TemporalAccessor, zoneId: ZoneId): Instant = {
32-
val localTime = if (temporalAccessor.query(TemporalQueries.localTime) == null) {
33-
LocalTime.ofNanoOfDay(0)
34-
} else {
35-
LocalTime.from(temporalAccessor)
36-
}
37-
val localDate = LocalDate.from(temporalAccessor)
38-
val localDateTime = LocalDateTime.of(localDate, localTime)
39-
val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
40-
Instant.from(zonedDateTime)
31+
// Converts the parsed temporal object to ZonedDateTime. It sets time components to zeros
32+
// if they does not exist in the parsed object.
33+
protected def toZonedDateTime(
34+
temporalAccessor: TemporalAccessor,
35+
zoneId: ZoneId): ZonedDateTime = {
36+
// Parsed input might not have time related part. In that case, time component is set to zeros.
37+
val parsedLocalTime = temporalAccessor.query(TemporalQueries.localTime)
38+
val localTime = if (parsedLocalTime == null) LocalTime.MIDNIGHT else parsedLocalTime
39+
// Parsed input must have date component. At least, year must present in temporalAccessor.
40+
val localDate = temporalAccessor.query(TemporalQueries.localDate)
41+
42+
ZonedDateTime.of(localDate, localTime, zoneId)
4143
}
4244

4345
// Gets a formatter from the cache or creates new one. The buildFormatter method can be called

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ package org.apache.spark.sql.catalyst.util
2020
import java.text.ParseException
2121
import java.time._
2222
import java.time.format.DateTimeParseException
23+
import java.time.temporal.ChronoField.MICRO_OF_SECOND
2324
import java.time.temporal.TemporalQueries
2425
import java.util.{Locale, TimeZone}
25-
26-
import org.apache.spark.sql.catalyst.util.DateTimeUtils.instantToMicros
26+
import java.util.concurrent.TimeUnit.SECONDS
2727

2828
sealed trait TimestampFormatter extends Serializable {
2929
/**
@@ -48,21 +48,22 @@ class Iso8601TimestampFormatter(
4848
locale: Locale) extends TimestampFormatter with DateTimeFormatterHelper {
4949
@transient
5050
protected lazy val formatter = getOrCreateFormatter(pattern, locale)
51+
private val timeZoneId = timeZone.toZoneId
5152

52-
private def toInstant(s: String): Instant = {
53-
val temporalAccessor = formatter.parse(s)
54-
if (temporalAccessor.query(TemporalQueries.offset()) == null) {
55-
toInstantWithZoneId(temporalAccessor, timeZone.toZoneId)
56-
} else {
57-
Instant.from(temporalAccessor)
58-
}
59-
}
53+
override def parse(s: String): Long = {
54+
val parsed = formatter.parse(s)
55+
val parsedZoneId = parsed.query(TemporalQueries.zone())
56+
val zoneId = if (parsedZoneId == null) timeZoneId else parsedZoneId
57+
val zonedDateTime = toZonedDateTime(parsed, zoneId)
58+
val epochSeconds = zonedDateTime.toEpochSecond
59+
val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND)
6060

61-
override def parse(s: String): Long = instantToMicros(toInstant(s))
61+
Math.addExact(SECONDS.toMicros(epochSeconds), microsOfSecond)
62+
}
6263

6364
override def format(us: Long): String = {
6465
val instant = DateTimeUtils.microsToInstant(us)
65-
formatter.withZone(timeZone.toZoneId).format(instant)
66+
formatter.withZone(timeZoneId).format(instant)
6667
}
6768
}
6869

sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
package org.apache.spark.sql.util
1919

2020
import java.time.LocalDate
21-
import java.util.Locale
2221

2322
import org.apache.spark.SparkFunSuite
2423
import org.apache.spark.sql.catalyst.plans.SQLHelper

0 commit comments

Comments
 (0)