Skip to content

Commit 20d411b

Browse files
Davies Liuhvanhovell
authored andcommitted
[SPARK-16078][SQL] from_utc_timestamp/to_utc_timestamp should not depends on local timezone
## What changes were proposed in this pull request? Currently, we use local timezone to parse or format a timestamp (TimestampType), then use Long as the microseconds since epoch UTC. In from_utc_timestamp() and to_utc_timestamp(), we did not consider the local timezone, they could return different results with different local timezone. This PR will do the conversion based on human time (in local timezone), it should return same result in whatever timezone. But because the mapping from absolute timestamp to human time is not exactly one-to-one mapping, it will still return wrong result in some timezone (also in the begging or ending of DST). This PR is kind of the best effort fix. In long term, we should make the TimestampType be timezone aware to fix this totally. ## How was this patch tested? Tested these function in all timezone. Author: Davies Liu <davies@databricks.com> Closes #13784 from davies/convert_tz.
1 parent 43b04b7 commit 20d411b

File tree

3 files changed

+73
-36
lines changed

3 files changed

+73
-36
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -730,16 +730,17 @@ case class FromUTCTimestamp(left: Expression, right: Expression)
730730
""".stripMargin)
731731
} else {
732732
val tzTerm = ctx.freshName("tz")
733+
val utcTerm = ctx.freshName("utc")
733734
val tzClass = classOf[TimeZone].getName
734735
ctx.addMutableState(tzClass, tzTerm, s"""$tzTerm = $tzClass.getTimeZone("$tz");""")
736+
ctx.addMutableState(tzClass, utcTerm, s"""$utcTerm = $tzClass.getTimeZone("UTC");""")
735737
val eval = left.genCode(ctx)
736738
ev.copy(code = s"""
737739
|${eval.code}
738740
|boolean ${ev.isNull} = ${eval.isNull};
739741
|long ${ev.value} = 0;
740742
|if (!${ev.isNull}) {
741-
| ${ev.value} = ${eval.value} +
742-
| ${tzTerm}.getOffset(${eval.value} / 1000) * 1000L;
743+
| ${ev.value} = $dtu.convertTz(${eval.value}, $utcTerm, $tzTerm);
743744
|}
744745
""".stripMargin)
745746
}
@@ -869,16 +870,17 @@ case class ToUTCTimestamp(left: Expression, right: Expression)
869870
""".stripMargin)
870871
} else {
871872
val tzTerm = ctx.freshName("tz")
873+
val utcTerm = ctx.freshName("utc")
872874
val tzClass = classOf[TimeZone].getName
873875
ctx.addMutableState(tzClass, tzTerm, s"""$tzTerm = $tzClass.getTimeZone("$tz");""")
876+
ctx.addMutableState(tzClass, utcTerm, s"""$utcTerm = $tzClass.getTimeZone("UTC");""")
874877
val eval = left.genCode(ctx)
875878
ev.copy(code = s"""
876879
|${eval.code}
877880
|boolean ${ev.isNull} = ${eval.isNull};
878881
|long ${ev.value} = 0;
879882
|if (!${ev.isNull}) {
880-
| ${ev.value} = ${eval.value} -
881-
| ${tzTerm}.getOffset(${eval.value} / 1000) * 1000L;
883+
| ${ev.value} = $dtu.convertTz(${eval.value}, $tzTerm, $utcTerm);
882884
|}
883885
""".stripMargin)
884886
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -885,24 +885,46 @@ object DateTimeUtils {
885885
guess
886886
}
887887

888+
/**
889+
* Convert the timestamp `ts` from one timezone to another.
890+
*
891+
* TODO: Because of DST, the conversion between UTC and human time is not exactly one-to-one
892+
* mapping, the conversion here may return wrong result, we should make the timestamp
893+
* timezone-aware.
894+
*/
895+
def convertTz(ts: SQLTimestamp, fromZone: TimeZone, toZone: TimeZone): SQLTimestamp = {
896+
// We always use local timezone to parse or format a timestamp
897+
val localZone = threadLocalLocalTimeZone.get()
898+
val utcTs = if (fromZone.getID == localZone.getID) {
899+
ts
900+
} else {
901+
// get the human time using local time zone, that actually is in fromZone.
902+
val localTs = ts + localZone.getOffset(ts / 1000L) * 1000L // in fromZone
903+
localTs - getOffsetFromLocalMillis(localTs / 1000L, fromZone) * 1000L
904+
}
905+
if (toZone.getID == localZone.getID) {
906+
utcTs
907+
} else {
908+
val localTs2 = utcTs + toZone.getOffset(utcTs / 1000L) * 1000L // in toZone
909+
// treat it as local timezone, convert to UTC (we could get the expected human time back)
910+
localTs2 - getOffsetFromLocalMillis(localTs2 / 1000L, localZone) * 1000L
911+
}
912+
}
913+
888914
/**
889915
* Returns a timestamp of given timezone from utc timestamp, with the same string
890916
* representation in their timezone.
891917
*/
892918
def fromUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = {
893-
val tz = TimeZone.getTimeZone(timeZone)
894-
val offset = tz.getOffset(time / 1000L)
895-
time + offset * 1000L
919+
convertTz(time, TimeZoneGMT, TimeZone.getTimeZone(timeZone))
896920
}
897921

898922
/**
899923
* Returns a utc timestamp from a given timestamp from a given timezone, with the same
900924
* string representation in their timezone.
901925
*/
902926
def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = {
903-
val tz = TimeZone.getTimeZone(timeZone)
904-
val offset = getOffsetFromLocalMillis(time / 1000L, tz)
905-
time - offset * 1000L
927+
convertTz(time, TimeZone.getTimeZone(timeZone), TimeZoneGMT)
906928
}
907929

908930
/**

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -488,39 +488,52 @@ class DateTimeUtilsSuite extends SparkFunSuite {
488488
assert(toJavaTimestamp(fromUTCTime(fromJavaTimestamp(Timestamp.valueOf(utc)), tz)).toString
489489
=== expected)
490490
}
491-
test("2011-12-25 09:00:00.123456", "UTC", "2011-12-25 09:00:00.123456")
492-
test("2011-12-25 09:00:00.123456", "JST", "2011-12-25 18:00:00.123456")
493-
test("2011-12-25 09:00:00.123456", "PST", "2011-12-25 01:00:00.123456")
494-
test("2011-12-25 09:00:00.123456", "Asia/Shanghai", "2011-12-25 17:00:00.123456")
495-
496-
// Daylight Saving Time
497-
test("2016-03-13 09:59:59.0", "PST", "2016-03-13 01:59:59.0")
498-
test("2016-03-13 10:00:00.0", "PST", "2016-03-13 03:00:00.0")
499-
test("2016-11-06 08:59:59.0", "PST", "2016-11-06 01:59:59.0")
500-
test("2016-11-06 09:00:00.0", "PST", "2016-11-06 01:00:00.0")
501-
test("2016-11-06 10:00:00.0", "PST", "2016-11-06 02:00:00.0")
491+
for (tz <- DateTimeTestUtils.ALL_TIMEZONES) {
492+
DateTimeTestUtils.withDefaultTimeZone(tz) {
493+
test("2011-12-25 09:00:00.123456", "UTC", "2011-12-25 09:00:00.123456")
494+
test("2011-12-25 09:00:00.123456", "JST", "2011-12-25 18:00:00.123456")
495+
test("2011-12-25 09:00:00.123456", "PST", "2011-12-25 01:00:00.123456")
496+
test("2011-12-25 09:00:00.123456", "Asia/Shanghai", "2011-12-25 17:00:00.123456")
497+
}
498+
}
499+
500+
DateTimeTestUtils.withDefaultTimeZone(TimeZone.getTimeZone("PST")) {
501+
// Daylight Saving Time
502+
test("2016-03-13 09:59:59.0", "PST", "2016-03-13 01:59:59.0")
503+
test("2016-03-13 10:00:00.0", "PST", "2016-03-13 03:00:00.0")
504+
test("2016-11-06 08:59:59.0", "PST", "2016-11-06 01:59:59.0")
505+
test("2016-11-06 09:00:00.0", "PST", "2016-11-06 01:00:00.0")
506+
test("2016-11-06 10:00:00.0", "PST", "2016-11-06 02:00:00.0")
507+
}
502508
}
503509

504510
test("to UTC timestamp") {
505511
def test(utc: String, tz: String, expected: String): Unit = {
506512
assert(toJavaTimestamp(toUTCTime(fromJavaTimestamp(Timestamp.valueOf(utc)), tz)).toString
507513
=== expected)
508514
}
509-
test("2011-12-25 09:00:00.123456", "UTC", "2011-12-25 09:00:00.123456")
510-
test("2011-12-25 18:00:00.123456", "JST", "2011-12-25 09:00:00.123456")
511-
test("2011-12-25 01:00:00.123456", "PST", "2011-12-25 09:00:00.123456")
512-
test("2011-12-25 17:00:00.123456", "Asia/Shanghai", "2011-12-25 09:00:00.123456")
513-
514-
// Daylight Saving Time
515-
test("2016-03-13 01:59:59", "PST", "2016-03-13 09:59:59.0")
516-
// 2016-03-13 02:00:00 PST does not exists
517-
test("2016-03-13 02:00:00", "PST", "2016-03-13 10:00:00.0")
518-
test("2016-03-13 03:00:00", "PST", "2016-03-13 10:00:00.0")
519-
test("2016-11-06 00:59:59", "PST", "2016-11-06 07:59:59.0")
520-
// 2016-11-06 01:00:00 PST could be 2016-11-06 08:00:00 UTC or 2016-11-06 09:00:00 UTC
521-
test("2016-11-06 01:00:00", "PST", "2016-11-06 09:00:00.0")
522-
test("2016-11-06 01:59:59", "PST", "2016-11-06 09:59:59.0")
523-
test("2016-11-06 02:00:00", "PST", "2016-11-06 10:00:00.0")
515+
516+
for (tz <- DateTimeTestUtils.ALL_TIMEZONES) {
517+
DateTimeTestUtils.withDefaultTimeZone(tz) {
518+
test("2011-12-25 09:00:00.123456", "UTC", "2011-12-25 09:00:00.123456")
519+
test("2011-12-25 18:00:00.123456", "JST", "2011-12-25 09:00:00.123456")
520+
test("2011-12-25 01:00:00.123456", "PST", "2011-12-25 09:00:00.123456")
521+
test("2011-12-25 17:00:00.123456", "Asia/Shanghai", "2011-12-25 09:00:00.123456")
522+
}
523+
}
524+
525+
DateTimeTestUtils.withDefaultTimeZone(TimeZone.getTimeZone("PST")) {
526+
// Daylight Saving Time
527+
test("2016-03-13 01:59:59", "PST", "2016-03-13 09:59:59.0")
528+
// 2016-03-13 02:00:00 PST does not exists
529+
test("2016-03-13 02:00:00", "PST", "2016-03-13 10:00:00.0")
530+
test("2016-03-13 03:00:00", "PST", "2016-03-13 10:00:00.0")
531+
test("2016-11-06 00:59:59", "PST", "2016-11-06 07:59:59.0")
532+
// 2016-11-06 01:00:00 PST could be 2016-11-06 08:00:00 UTC or 2016-11-06 09:00:00 UTC
533+
test("2016-11-06 01:00:00", "PST", "2016-11-06 09:00:00.0")
534+
test("2016-11-06 01:59:59", "PST", "2016-11-06 09:59:59.0")
535+
test("2016-11-06 02:00:00", "PST", "2016-11-06 10:00:00.0")
536+
}
524537
}
525538

526539
test("daysToMillis and millisToDays") {

0 commit comments

Comments
 (0)