Skip to content

Commit 17a5007

Browse files
MaxGekkcloud-fan
authored andcommitted
[SPARK-30865][SQL][SS] Refactor DateTimeUtils
### What changes were proposed in this pull request? 1. Move TimeZoneUTC and TimeZoneGMT to DateTimeTestUtils 2. Remove TimeZoneGMT 3. Use ZoneId.systemDefault() instead of defaultTimeZone().toZoneId 4. Alias SQLDate & SQLTimestamp to internal types of DateType and TimestampType 5. Avoid one `*` `DateTimeUtils`.`in fromJulianDay()` 6. Use toTotalMonths in `DateTimeUtils`.`subtractDates()` 7. Remove `julianCommonEraStart`, `timestampToString()`, `microsToEpochDays()`, `epochDaysToMicros()`, `instantToDays()` from `DateTimeUtils`. 8. Make splitDate() private. 9. Remove `def daysToMicros(days: Int): Long` and `def microsToDays(micros: Long): Int`. ### Why are the changes needed? This simplifies the common code related to date-time operations, and should improve maintainability. In particular: 1. TimeZoneUTC and TimeZoneGMT are moved to DateTimeTestUtils because they are used only in tests 2. TimeZoneGMT can be removed because it is equal to TimeZoneUTC 3. After the PR #27494, Spark expressions and DateTimeUtils functions switched to ZoneId instead of TimeZone completely. `defaultTimeZone()` with `TimeZone` as return type is not needed anymore. 4. SQLDate and SQLTimestamp types can be explicitly aliased to internal types of DateType and and TimestampType instead of declaring this in a comment. 5. Avoid one `*` `DateTimeUtils`.`in fromJulianDay()`. 6. Use toTotalMonths in `DateTimeUtils`.`subtractDates()`. ### Does this PR introduce any user-facing change? No ### How was this patch tested? By existing test suites Closes #27617 from MaxGekk/move-time-zone-consts. Lead-authored-by: Max Gekk <max.gekk@gmail.com> Co-authored-by: Maxim Gekk <max.gekk@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent 8750363 commit 17a5007

File tree

22 files changed

+241
-274
lines changed

22 files changed

+241
-274
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
303303
case BinaryType => buildCast[Array[Byte]](_, UTF8String.fromBytes)
304304
case DateType => buildCast[Int](_, d => UTF8String.fromString(dateFormatter.format(d)))
305305
case TimestampType => buildCast[Long](_,
306-
t => UTF8String.fromString(DateTimeUtils.timestampToString(timestampFormatter, t)))
306+
t => UTF8String.fromString(timestampFormatter.format(t)))
307307
case ArrayType(et, _) =>
308308
buildCast[ArrayData](_, array => {
309309
val builder = new UTF8StringBuilder
@@ -443,7 +443,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
443443
case ByteType =>
444444
buildCast[Byte](_, b => longToTimestamp(b.toLong))
445445
case DateType =>
446-
buildCast[Int](_, d => epochDaysToMicros(d, zoneId))
446+
buildCast[Int](_, d => daysToMicros(d, zoneId))
447447
// TimestampWritable.decimalToTimestamp
448448
case DecimalType() =>
449449
buildCast[Decimal](_, d => decimalToTimestamp(d))
@@ -480,7 +480,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
480480
case TimestampType =>
481481
// throw valid precision more than seconds, according to Hive.
482482
// Timestamp.nanos is in 0 to 999,999,999, no more than a second.
483-
buildCast[Long](_, t => microsToEpochDays(t, zoneId))
483+
buildCast[Long](_, t => microsToDays(t, zoneId))
484484
}
485485

486486
// IntervalConverter
@@ -1034,8 +1034,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
10341034
val tf = JavaCode.global(
10351035
ctx.addReferenceObj("timestampFormatter", timestampFormatter),
10361036
timestampFormatter.getClass)
1037-
(c, evPrim, evNull) => code"""$evPrim = UTF8String.fromString(
1038-
org.apache.spark.sql.catalyst.util.DateTimeUtils.timestampToString($tf, $c));"""
1037+
(c, evPrim, evNull) => code"""$evPrim = UTF8String.fromString($tf.format($c));"""
10391038
case CalendarIntervalType =>
10401039
(c, evPrim, _) => code"""$evPrim = UTF8String.fromString($c.toString());"""
10411040
case ArrayType(et, _) =>
@@ -1120,7 +1119,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
11201119
val zid = getZoneId()
11211120
(c, evPrim, evNull) =>
11221121
code"""$evPrim =
1123-
org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToEpochDays($c, $zid);"""
1122+
org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToDays($c, $zid);"""
11241123
case _ =>
11251124
(c, evPrim, evNull) => code"$evNull = true;"
11261125
}
@@ -1247,7 +1246,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
12471246
zoneIdClass)
12481247
(c, evPrim, evNull) =>
12491248
code"""$evPrim =
1250-
org.apache.spark.sql.catalyst.util.DateTimeUtils.epochDaysToMicros($c, $zid);"""
1249+
org.apache.spark.sql.catalyst.util.DateTimeUtils.daysToMicros($c, $zid);"""
12511250
case DecimalType() =>
12521251
(c, evPrim, evNull) => code"$evPrim = ${decimalToTimestampCode(c)};"
12531252
case DoubleType =>

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -895,7 +895,7 @@ abstract class ToTimestamp
895895
} else {
896896
left.dataType match {
897897
case DateType =>
898-
epochDaysToMicros(t.asInstanceOf[Int], zoneId) / downScaleFactor
898+
daysToMicros(t.asInstanceOf[Int], zoneId) / downScaleFactor
899899
case TimestampType =>
900900
t.asInstanceOf[Long] / downScaleFactor
901901
case StringType =>
@@ -975,7 +975,7 @@ abstract class ToTimestamp
975975
boolean ${ev.isNull} = ${eval1.isNull};
976976
$javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
977977
if (!${ev.isNull}) {
978-
${ev.value} = $dtu.epochDaysToMicros(${eval1.value}, $zid) / $downScaleFactor;
978+
${ev.value} = $dtu.daysToMicros(${eval1.value}, $zid) / $downScaleFactor;
979979
}""")
980980
}
981981
}
@@ -1242,10 +1242,10 @@ case class DateAddInterval(
12421242
if (ansiEnabled || itvl.microseconds == 0) {
12431243
DateTimeUtils.dateAddInterval(start.asInstanceOf[Int], itvl)
12441244
} else {
1245-
val startTs = DateTimeUtils.epochDaysToMicros(start.asInstanceOf[Int], zoneId)
1245+
val startTs = DateTimeUtils.daysToMicros(start.asInstanceOf[Int], zoneId)
12461246
val resultTs = DateTimeUtils.timestampAddInterval(
12471247
startTs, itvl.months, itvl.days, itvl.microseconds, zoneId)
1248-
DateTimeUtils.microsToEpochDays(resultTs, zoneId)
1248+
DateTimeUtils.microsToDays(resultTs, zoneId)
12491249
}
12501250
}
12511251

@@ -1261,10 +1261,10 @@ case class DateAddInterval(
12611261
|if ($i.microseconds == 0) {
12621262
| ${ev.value} = $dtu.dateAddInterval($sd, $i);
12631263
|} else {
1264-
| long $startTs = $dtu.epochDaysToMicros($sd, $zid);
1264+
| long $startTs = $dtu.daysToMicros($sd, $zid);
12651265
| long $resultTs =
12661266
| $dtu.timestampAddInterval($startTs, $i.months, $i.days, $i.microseconds, $zid);
1267-
| ${ev.value} = $dtu.microsToEpochDays($resultTs, $zid);
1267+
| ${ev.value} = $dtu.microsToDays($resultTs, $zid);
12681268
|}
12691269
|""".stripMargin
12701270
})

0 commit comments

Comments
 (0)