Skip to content

[SPARK-26546][SQL] Caching of java.time.format.DateTimeFormatter #23462

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class Iso8601DateFormatter(
locale: Locale) extends DateFormatter with DateTimeFormatterHelper {

@transient
private lazy val formatter = buildFormatter(pattern, locale)
private lazy val formatter = getOrCreateFormatter(pattern, locale)
private val UTC = ZoneId.of("UTC")

private def toInstant(s: String): Instant = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,46 @@ import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder, ResolverSt
import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries}
import java.util.Locale

import com.google.common.cache.CacheBuilder

import org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper._

trait DateTimeFormatterHelper {
protected def toInstantWithZoneId(temporalAccessor: TemporalAccessor, zoneId: ZoneId): Instant = {
val localTime = if (temporalAccessor.query(TemporalQueries.localTime) == null) {
LocalTime.ofNanoOfDay(0)
} else {
LocalTime.from(temporalAccessor)
}
val localDate = LocalDate.from(temporalAccessor)
val localDateTime = LocalDateTime.of(localDate, localTime)
val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
Instant.from(zonedDateTime)
}

// Gets a formatter from the cache or creates new one. The buildFormatter method can be called
// a few times with the same parameters in parallel if the cache does not contain values
// associated to those parameters. Since the formatter is immutable, it does not matter.
// In this way, synchronised is intentionally omitted in this method to make parallel calls
// less synchronised.
// The Cache.get method is not used here to avoid creation of additional instances of Callable.
protected def getOrCreateFormatter(pattern: String, locale: Locale): DateTimeFormatter = {
val key = (pattern, locale)
var formatter = cache.getIfPresent(key)
if (formatter == null) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's add a comment to say that, we intentionally drop the synchronized here, as the worst case is we create the same formatter more than once, which doesn't matter.

without the comment, I'm afraid people may open PRs to add the synchronized later, as they don't know the context.

formatter = buildFormatter(pattern, locale)
cache.put(key, formatter)
}
formatter
}
}

protected def buildFormatter(pattern: String, locale: Locale): DateTimeFormatter = {
private object DateTimeFormatterHelper {
val cache = CacheBuilder.newBuilder()
.maximumSize(128)
.build[(String, Locale), DateTimeFormatter]()

def buildFormatter(pattern: String, locale: Locale): DateTimeFormatter = {
new DateTimeFormatterBuilder()
.parseCaseInsensitive()
.appendPattern(pattern)
Expand All @@ -38,16 +75,4 @@ trait DateTimeFormatterHelper {
.withChronology(IsoChronology.INSTANCE)
.withResolverStyle(ResolverStyle.STRICT)
}

protected def toInstantWithZoneId(temporalAccessor: TemporalAccessor, zoneId: ZoneId): Instant = {
val localTime = if (temporalAccessor.query(TemporalQueries.localTime) == null) {
LocalTime.ofNanoOfDay(0)
} else {
LocalTime.from(temporalAccessor)
}
val localDate = LocalDate.from(temporalAccessor)
val localDateTime = LocalDateTime.of(localDate, localTime)
val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
Instant.from(zonedDateTime)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class Iso8601TimestampFormatter(
timeZone: TimeZone,
locale: Locale) extends TimestampFormatter with DateTimeFormatterHelper {
@transient
private lazy val formatter = buildFormatter(pattern, locale)
private lazy val formatter = getOrCreateFormatter(pattern, locale)

private def toInstant(s: String): Instant = {
val temporalAccessor = formatter.parse(s)
Expand Down