Skip to content

Commit 1d20d13

Browse files
MaxGekkHyukjinKwon
authored andcommitted
[SPARK-25496][SQL] Deprecate from_utc_timestamp and to_utc_timestamp
## What changes were proposed in this pull request? In the PR, I propose to deprecate the `from_utc_timestamp()` and `to_utc_timestamp`, and disable them by default. The functions can be enabled back via the SQL config `spark.sql.legacy.utcTimestampFunc.enabled`. By default, any calls of the functions throw an analysis exception. One of the reason for deprecation is functions violate semantic of `TimestampType` which is number of microseconds since epoch in UTC time zone. Shifting microseconds since epoch by time zone offset doesn't make sense because the result doesn't represent microseconds since epoch in UTC time zone any more, and cannot be considered as `TimestampType`. ## How was this patch tested? The changes were tested by `DateExpressionsSuite` and `DateFunctionsSuite`. Closes #24195 from MaxGekk/conv-utc-timestamp-deprecate. Lead-authored-by: Maxim Gekk <max.gekk@gmail.com> Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com> Co-authored-by: Hyukjin Kwon <gurwls223@apache.org> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent b8b5acd commit 1d20d13

File tree

11 files changed

+246
-152
lines changed

11 files changed

+246
-152
lines changed

R/pkg/R/functions.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2459,6 +2459,7 @@ setMethod("schema_of_csv", signature(x = "characterOrColumn"),
24592459
#' @note from_utc_timestamp since 1.5.0
24602460
setMethod("from_utc_timestamp", signature(y = "Column", x = "character"),
24612461
function(y, x) {
2462+
.Deprecated(msg = "from_utc_timestamp is deprecated. See SPARK-25496.")
24622463
jc <- callJStatic("org.apache.spark.sql.functions", "from_utc_timestamp", y@jc, x)
24632464
column(jc)
24642465
})
@@ -2517,6 +2518,7 @@ setMethod("next_day", signature(y = "Column", x = "character"),
25172518
#' @note to_utc_timestamp since 1.5.0
25182519
setMethod("to_utc_timestamp", signature(y = "Column", x = "character"),
25192520
function(y, x) {
2521+
.Deprecated(msg = "to_utc_timestamp is deprecated. See SPARK-25496.")
25202522
jc <- callJStatic("org.apache.spark.sql.functions", "to_utc_timestamp", y@jc, x)
25212523
column(jc)
25222524
})

R/pkg/tests/fulltests/test_sparkSQL.R

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1905,10 +1905,20 @@ test_that("date functions on a DataFrame", {
19051905
df2 <- createDataFrame(l2)
19061906
expect_equal(collect(select(df2, minute(df2$b)))[, 1], c(34, 24))
19071907
expect_equal(collect(select(df2, second(df2$b)))[, 1], c(0, 34))
1908-
expect_equal(collect(select(df2, from_utc_timestamp(df2$b, "JST")))[, 1],
1909-
c(as.POSIXct("2012-12-13 21:34:00 UTC"), as.POSIXct("2014-12-15 10:24:34 UTC")))
1910-
expect_equal(collect(select(df2, to_utc_timestamp(df2$b, "JST")))[, 1],
1911-
c(as.POSIXct("2012-12-13 03:34:00 UTC"), as.POSIXct("2014-12-14 16:24:34 UTC")))
1908+
conf <- callJMethod(sparkSession, "conf")
1909+
isUtcTimestampFuncEnabled <- callJMethod(conf, "get", "spark.sql.legacy.utcTimestampFunc.enabled")
1910+
callJMethod(conf, "set", "spark.sql.legacy.utcTimestampFunc.enabled", "true")
1911+
tryCatch({
1912+
# Both from_utc_timestamp and to_utc_timestamp are deprecated as of SPARK-25496
1913+
expect_equal(suppressWarnings(collect(select(df2, from_utc_timestamp(df2$b, "JST"))))[, 1],
1914+
c(as.POSIXct("2012-12-13 21:34:00 UTC"), as.POSIXct("2014-12-15 10:24:34 UTC")))
1915+
expect_equal(suppressWarnings(collect(select(df2, to_utc_timestamp(df2$b, "JST"))))[, 1],
1916+
c(as.POSIXct("2012-12-13 03:34:00 UTC"), as.POSIXct("2014-12-14 16:24:34 UTC")))
1917+
},
1918+
finally = {
1919+
# Reverting the conf back
1920+
callJMethod(conf, "set", "spark.sql.legacy.utcTimestampFunc.enabled", isUtcTimestampFuncEnabled)
1921+
})
19121922
expect_gt(collect(select(df2, unix_timestamp()))[1, 1], 0)
19131923
expect_gt(collect(select(df2, unix_timestamp(df2$b)))[1, 1], 0)
19141924
expect_gt(collect(select(df2, unix_timestamp(lit("2015-01-01"), "yyyy-MM-dd")))[1, 1], 0)

python/pyspark/sql/functions.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1306,7 +1306,10 @@ def from_utc_timestamp(timestamp, tz):
13061306
[Row(local_time=datetime.datetime(1997, 2, 28, 2, 30))]
13071307
>>> df.select(from_utc_timestamp(df.ts, df.tz).alias('local_time')).collect()
13081308
[Row(local_time=datetime.datetime(1997, 2, 28, 19, 30))]
1309+
1310+
.. note:: Deprecated in 3.0. See SPARK-25496
13091311
"""
1312+
warnings.warn("Deprecated in 3.0. See SPARK-25496", DeprecationWarning)
13101313
sc = SparkContext._active_spark_context
13111314
if isinstance(tz, Column):
13121315
tz = _to_java_column(tz)
@@ -1340,7 +1343,10 @@ def to_utc_timestamp(timestamp, tz):
13401343
[Row(utc_time=datetime.datetime(1997, 2, 28, 18, 30))]
13411344
>>> df.select(to_utc_timestamp(df.ts, df.tz).alias('utc_time')).collect()
13421345
[Row(utc_time=datetime.datetime(1997, 2, 28, 1, 30))]
1346+
1347+
.. note:: Deprecated in 3.0. See SPARK-25496
13431348
"""
1349+
warnings.warn("Deprecated in 3.0. See SPARK-25496", DeprecationWarning)
13441350
sc = SparkContext._active_spark_context
13451351
if isinstance(tz, Column):
13461352
tz = _to_java_column(tz)
@@ -3191,9 +3197,13 @@ def _test():
31913197
globs['sc'] = sc
31923198
globs['spark'] = spark
31933199
globs['df'] = spark.createDataFrame([Row(name='Alice', age=2), Row(name='Bob', age=5)])
3200+
3201+
spark.conf.set("spark.sql.legacy.utcTimestampFunc.enabled", "true")
31943202
(failure_count, test_count) = doctest.testmod(
31953203
pyspark.sql.functions, globs=globs,
31963204
optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)
3205+
spark.conf.unset("spark.sql.legacy.utcTimestampFunc.enabled")
3206+
31973207
spark.stop()
31983208
if failure_count:
31993209
sys.exit(-1)

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,13 @@ import scala.util.control.NonFatal
2626

2727
import org.apache.commons.lang3.StringEscapeUtils
2828

29+
import org.apache.spark.sql.AnalysisException
2930
import org.apache.spark.sql.catalyst.InternalRow
3031
import org.apache.spark.sql.catalyst.expressions.codegen._
3132
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
3233
import org.apache.spark.sql.catalyst.util.{DateTimeUtils, TimestampFormatter}
3334
import org.apache.spark.sql.catalyst.util.DateTimeUtils._
35+
import org.apache.spark.sql.internal.SQLConf
3436
import org.apache.spark.sql.types._
3537
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
3638

@@ -1021,6 +1023,11 @@ case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[S
10211023
case class FromUTCTimestamp(left: Expression, right: Expression)
10221024
extends BinaryExpression with ImplicitCastInputTypes {
10231025

1026+
if (!SQLConf.get.utcTimestampFuncEnabled) {
1027+
throw new AnalysisException(s"The $prettyName function has been disabled since Spark 3.0." +
1028+
s"Set ${SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key} to true to enable this function.")
1029+
}
1030+
10241031
override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringType)
10251032
override def dataType: DataType = TimestampType
10261033
override def prettyName: String = "from_utc_timestamp"
@@ -1227,6 +1234,11 @@ case class MonthsBetween(
12271234
case class ToUTCTimestamp(left: Expression, right: Expression)
12281235
extends BinaryExpression with ImplicitCastInputTypes {
12291236

1237+
if (!SQLConf.get.utcTimestampFuncEnabled) {
1238+
throw new AnalysisException(s"The $prettyName function has been disabled since Spark 3.0. " +
1239+
s"Set ${SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key} to true to enable this function.")
1240+
}
1241+
12301242
override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringType)
12311243
override def dataType: DataType = TimestampType
12321244
override def prettyName: String = "to_utc_timestamp"

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1723,6 +1723,12 @@ object SQLConf {
17231723
"and java.sql.Date are used for the same purpose.")
17241724
.booleanConf
17251725
.createWithDefault(false)
1726+
1727+
val UTC_TIMESTAMP_FUNC_ENABLED = buildConf("spark.sql.legacy.utcTimestampFunc.enabled")
1728+
.doc("The configuration property enables the to_utc_timestamp() " +
1729+
"and from_utc_timestamp() functions.")
1730+
.booleanConf
1731+
.createWithDefault(false)
17261732
}
17271733

17281734
/**
@@ -1916,6 +1922,8 @@ class SQLConf extends Serializable with Logging {
19161922

19171923
def datetimeJava8ApiEnabled: Boolean = getConf(DATETIME_JAVA8API_ENABLED)
19181924

1925+
def utcTimestampFuncEnabled: Boolean = getConf(UTC_TIMESTAMP_FUNC_ENABLED)
1926+
19191927
/**
19201928
* Returns the [[Resolver]] for the current configuration, which can be used to determine if two
19211929
* identifiers are equal.

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
1919

2020
import java.sql.Timestamp
2121

22-
import org.apache.log4j.{Appender, AppenderSkeleton, Logger}
22+
import org.apache.log4j.AppenderSkeleton
2323
import org.apache.log4j.spi.LoggingEvent
2424

2525
import org.apache.spark.SparkFunSuite
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
3131
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
3232
import org.apache.spark.sql.catalyst.expressions.objects._
3333
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils}
34+
import org.apache.spark.sql.internal.SQLConf
3435
import org.apache.spark.sql.types._
3536
import org.apache.spark.unsafe.types.UTF8String
3637
import org.apache.spark.util.ThreadUtils
@@ -189,36 +190,42 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
189190
}
190191

191192
test("SPARK-17702: split wide constructor into blocks due to JVM code size limit") {
192-
val length = 5000
193-
val expressions = Seq.fill(length) {
194-
ToUTCTimestamp(
195-
Literal.create(Timestamp.valueOf("2015-07-24 00:00:00"), TimestampType),
196-
Literal.create("PST", StringType))
197-
}
198-
val plan = GenerateMutableProjection.generate(expressions)
199-
val actual = plan(new GenericInternalRow(length)).toSeq(expressions.map(_.dataType))
200-
val expected = Seq.fill(length)(
201-
DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2015-07-24 07:00:00")))
202-
203-
if (actual != expected) {
204-
fail(s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
193+
withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
194+
val length = 5000
195+
val expressions = Seq.fill(length) {
196+
ToUTCTimestamp(
197+
Literal.create(Timestamp.valueOf("2015-07-24 00:00:00"), TimestampType),
198+
Literal.create("PST", StringType))
199+
}
200+
val plan = GenerateMutableProjection.generate(expressions)
201+
val actual = plan(new GenericInternalRow(length)).toSeq(expressions.map(_.dataType))
202+
val expected = Seq.fill(length)(
203+
DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2015-07-24 07:00:00")))
204+
205+
if (actual != expected) {
206+
fail(
207+
s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
208+
}
205209
}
206210
}
207211

208212
test("SPARK-22226: group splitted expressions into one method per nested class") {
209-
val length = 10000
210-
val expressions = Seq.fill(length) {
211-
ToUTCTimestamp(
212-
Literal.create(Timestamp.valueOf("2017-10-10 00:00:00"), TimestampType),
213-
Literal.create("PST", StringType))
214-
}
215-
val plan = GenerateMutableProjection.generate(expressions)
216-
val actual = plan(new GenericInternalRow(length)).toSeq(expressions.map(_.dataType))
217-
val expected = Seq.fill(length)(
218-
DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2017-10-10 07:00:00")))
219-
220-
if (actual != expected) {
221-
fail(s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
213+
withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
214+
val length = 10000
215+
val expressions = Seq.fill(length) {
216+
ToUTCTimestamp(
217+
Literal.create(Timestamp.valueOf("2017-10-10 00:00:00"), TimestampType),
218+
Literal.create("PST", StringType))
219+
}
220+
val plan = GenerateMutableProjection.generate(expressions)
221+
val actual = plan(new GenericInternalRow(length)).toSeq(expressions.map(_.dataType))
222+
val expected = Seq.fill(length)(
223+
DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2017-10-10 07:00:00")))
224+
225+
if (actual != expected) {
226+
fail(
227+
s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
228+
}
222229
}
223230
}
224231

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,12 @@ import java.util.concurrent.TimeUnit
2525
import java.util.concurrent.TimeUnit._
2626

2727
import org.apache.spark.SparkFunSuite
28+
import org.apache.spark.sql.AnalysisException
2829
import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
2930
import org.apache.spark.sql.catalyst.util.{DateTimeUtils, TimestampFormatter}
3031
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
3132
import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneGMT
33+
import org.apache.spark.sql.internal.SQLConf
3234
import org.apache.spark.sql.types._
3335
import org.apache.spark.unsafe.types.CalendarInterval
3436

@@ -816,21 +818,29 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
816818
NonFoldableLiteral.create(tz, StringType)),
817819
if (expected != null) Timestamp.valueOf(expected) else null)
818820
}
819-
test("2015-07-24 00:00:00", "PST", "2015-07-24 07:00:00")
820-
test("2015-01-24 00:00:00", "PST", "2015-01-24 08:00:00")
821-
test(null, "UTC", null)
822-
test("2015-07-24 00:00:00", null, null)
823-
test(null, null, null)
821+
withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
822+
test("2015-07-24 00:00:00", "PST", "2015-07-24 07:00:00")
823+
test("2015-01-24 00:00:00", "PST", "2015-01-24 08:00:00")
824+
test(null, "UTC", null)
825+
test("2015-07-24 00:00:00", null, null)
826+
test(null, null, null)
827+
}
828+
val msg = intercept[AnalysisException] {
829+
test("2015-07-24 00:00:00", "PST", "2015-07-24 07:00:00")
830+
}.getMessage
831+
assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key))
824832
}
825833

826834
test("to_utc_timestamp - invalid time zone id") {
827-
Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
828-
val msg = intercept[java.time.DateTimeException] {
829-
GenerateUnsafeProjection.generate(
830-
ToUTCTimestamp(
831-
Literal(Timestamp.valueOf("2015-07-24 00:00:00")), Literal(invalidTz)) :: Nil)
832-
}.getMessage
833-
assert(msg.contains(invalidTz))
835+
withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
836+
Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
837+
val msg = intercept[java.time.DateTimeException] {
838+
GenerateUnsafeProjection.generate(
839+
ToUTCTimestamp(
840+
Literal(Timestamp.valueOf("2015-07-24 00:00:00")), Literal(invalidTz)) :: Nil)
841+
}.getMessage
842+
assert(msg.contains(invalidTz))
843+
}
834844
}
835845
}
836846

@@ -847,19 +857,28 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
847857
NonFoldableLiteral.create(tz, StringType)),
848858
if (expected != null) Timestamp.valueOf(expected) else null)
849859
}
850-
test("2015-07-24 00:00:00", "PST", "2015-07-23 17:00:00")
851-
test("2015-01-24 00:00:00", "PST", "2015-01-23 16:00:00")
852-
test(null, "UTC", null)
853-
test("2015-07-24 00:00:00", null, null)
854-
test(null, null, null)
860+
withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
861+
test("2015-07-24 00:00:00", "PST", "2015-07-23 17:00:00")
862+
test("2015-01-24 00:00:00", "PST", "2015-01-23 16:00:00")
863+
test(null, "UTC", null)
864+
test("2015-07-24 00:00:00", null, null)
865+
test(null, null, null)
866+
}
867+
val msg = intercept[AnalysisException] {
868+
test("2015-07-24 00:00:00", "PST", "2015-07-23 17:00:00")
869+
}.getMessage
870+
assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key))
855871
}
856872

857873
test("from_utc_timestamp - invalid time zone id") {
858-
Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
859-
val msg = intercept[java.time.DateTimeException] {
860-
GenerateUnsafeProjection.generate(FromUTCTimestamp(Literal(0), Literal(invalidTz)) :: Nil)
861-
}.getMessage
862-
assert(msg.contains(invalidTz))
874+
withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
875+
Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
876+
val msg = intercept[java.time.DateTimeException] {
877+
GenerateUnsafeProjection.generate(
878+
FromUTCTimestamp(Literal(0), Literal(invalidTz)) :: Nil)
879+
}.getMessage
880+
assert(msg.contains(invalidTz))
881+
}
863882
}
864883
}
865884
}

sql/core/src/main/scala/org/apache/spark/sql/functions.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2988,6 +2988,7 @@ object functions {
29882988
* @group datetime_funcs
29892989
* @since 1.5.0
29902990
*/
2991+
@deprecated("This function is deprecated and will be removed in future versions.", "3.0.0")
29912992
def from_utc_timestamp(ts: Column, tz: String): Column = withExpr {
29922993
FromUTCTimestamp(ts.expr, Literal(tz))
29932994
}
@@ -2999,6 +3000,7 @@ object functions {
29993000
* @group datetime_funcs
30003001
* @since 2.4.0
30013002
*/
3003+
@deprecated("This function is deprecated and will be removed in future versions.", "3.0.0")
30023004
def from_utc_timestamp(ts: Column, tz: Column): Column = withExpr {
30033005
FromUTCTimestamp(ts.expr, tz.expr)
30043006
}
@@ -3017,6 +3019,7 @@ object functions {
30173019
* @group datetime_funcs
30183020
* @since 1.5.0
30193021
*/
3022+
@deprecated("This function is deprecated and will be removed in future versions.", "3.0.0")
30203023
def to_utc_timestamp(ts: Column, tz: String): Column = withExpr {
30213024
ToUTCTimestamp(ts.expr, Literal(tz))
30223025
}
@@ -3028,6 +3031,7 @@ object functions {
30283031
* @group datetime_funcs
30293032
* @since 2.4.0
30303033
*/
3034+
@deprecated("This function is deprecated and will be removed in future versions.", "3.0.0")
30313035
def to_utc_timestamp(ts: Column, tz: Column): Column = withExpr {
30323036
ToUTCTimestamp(ts.expr, tz.expr)
30333037
}

0 commit comments

Comments
 (0)