Skip to content

Commit 03cb4d9

Browse files
uros-dbMaxGekk
authored andcommitted
[SPARK-52883][SPARK-52884][SQL] Implement the to_time and try_to_time functions in Scala
### What changes were proposed in this pull request? Implement the `to_time` and `try_to_time` functions in Scala API. ### Why are the changes needed? Expand API support for the `ToTime` and `TryToTime` expressions. ### Does this PR introduce _any_ user-facing change? Yes, the new functions are now available in Scala API. ### How was this patch tested? Added appropriate Scala functions tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51575 from uros-db/scala-try_to_time. Authored-by: Uros Bojanic <uros.bojanic@databricks.com> Signed-off-by: Max Gekk <max.gekk@gmail.com>
1 parent e888e37 commit 03cb4d9

File tree

3 files changed

+246
-2
lines changed

3 files changed

+246
-2
lines changed

python/pyspark/sql/tests/test_functions.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,9 @@ def test_function_parity(self):
8383
# Functions that we expect to be missing in python until they are added to pyspark
8484
expected_missing_in_py = set(
8585
# TODO(SPARK-52888): Implement the make_time function in Python
86-
["make_time"]
86+
# TODO(SPARK-52890): Implement the to_time function in Python
87+
# TODO(SPARK-52891): Implement the try_to_time function in Python
88+
["make_time", "to_time", "try_to_time"]
8789
)
8890

8991
self.assertEqual(

sql/api/src/main/scala/org/apache/spark/sql/functions.scala

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5699,6 +5699,41 @@ object functions {
56995699
def unix_timestamp(s: Column, p: String): Column =
57005700
Column.fn("unix_timestamp", s, lit(p))
57015701

5702+
/**
5703+
* Parses a string value to a time value.
5704+
*
5705+
* @param str
5706+
* A string to be parsed to time.
5707+
* @return
5708+
* A time, or raises an error if the input is malformed.
5709+
*
5710+
* @group datetime_funcs
5711+
* @since 4.1.0
5712+
*/
5713+
def to_time(str: Column): Column = {
5714+
Column.fn("to_time", str)
5715+
}
5716+
5717+
/**
5718+
* Parses a string value to a time value.
5719+
*
5720+
* See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html"> Datetime
5721+
* Patterns</a> for valid time format patterns.
5722+
*
5723+
* @param str
5724+
* A string to be parsed to time.
5725+
* @param format
5726+
* A time format pattern to follow.
5727+
* @return
5728+
* A time, or raises an error if the input is malformed.
5729+
*
5730+
* @group datetime_funcs
5731+
* @since 4.1.0
5732+
*/
5733+
def to_time(str: Column, format: Column): Column = {
5734+
Column.fn("to_time", str, format)
5735+
}
5736+
57025737
/**
57035738
* Converts to a timestamp by casting rules to `TimestampType`.
57045739
*
@@ -5731,6 +5766,41 @@ object functions {
57315766
*/
57325767
def to_timestamp(s: Column, fmt: String): Column = Column.fn("to_timestamp", s, lit(fmt))
57335768

5769+
/**
5770+
* Parses a string value to a time value.
5771+
*
5772+
* @param str
5773+
* A string to be parsed to time.
5774+
* @return
5775+
* A time, or null if the input is malformed.
5776+
*
5777+
* @group datetime_funcs
5778+
* @since 4.1.0
5779+
*/
5780+
def try_to_time(str: Column): Column = {
5781+
Column.fn("try_to_time", str)
5782+
}
5783+
5784+
/**
5785+
* Parses a string value to a time value.
5786+
*
5787+
* See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html"> Datetime
5788+
* Patterns</a> for valid time format patterns.
5789+
*
5790+
* @param str
5791+
* A string to be parsed to time.
5792+
* @param format
5793+
* A time format pattern to follow.
5794+
* @return
5795+
* A time, or null if the input is malformed.
5796+
*
5797+
* @group datetime_funcs
5798+
* @since 4.1.0
5799+
*/
5800+
def try_to_time(str: Column, format: Column): Column = {
5801+
Column.fn("try_to_time", str, format)
5802+
}
5803+
57345804
/**
57355805
* Parses the `s` with the `format` to a timestamp. The function always returns null on an
57365806
* invalid input with`/`without ANSI SQL mode enabled. The result data type is consistent with

sql/core/src/test/scala/org/apache/spark/sql/TimeFunctionsSuiteBase.scala

Lines changed: 173 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package org.apache.spark.sql
1919

2020
import java.time.LocalTime
2121

22-
import org.apache.spark.SparkConf
22+
import org.apache.spark.{SparkConf, SparkDateTimeException}
2323
import org.apache.spark.sql.functions._
2424
import org.apache.spark.sql.internal.SQLConf
2525
import org.apache.spark.sql.test.SharedSparkSession
@@ -161,6 +161,178 @@ abstract class TimeFunctionsSuiteBase extends QueryTest with SharedSparkSession
161161
checkAnswer(result1, expected)
162162
checkAnswer(result2, expected)
163163
}
164+
165+
test("SPARK-52883: to_time function without format") {
166+
// Input data for the function.
167+
val schema = StructType(Seq(
168+
StructField("str", StringType, nullable = false)
169+
))
170+
val data = Seq(
171+
Row("00:00:00"),
172+
Row("01:02:03.4"),
173+
Row("23:59:59.999999")
174+
)
175+
val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema)
176+
177+
// Test the function using both `selectExpr` and `select`.
178+
val result1 = df.selectExpr(
179+
"to_time(str)"
180+
)
181+
val result2 = df.select(
182+
to_time(col("str"))
183+
)
184+
// Check that both methods produce the same result.
185+
checkAnswer(result1, result2)
186+
187+
// Expected output of the function.
188+
val expected = Seq(
189+
"00:00:00",
190+
"01:02:03.4",
191+
"23:59:59.999999"
192+
).toDF("timeString").select(col("timeString").cast("time"))
193+
// Check that the results match the expected output.
194+
checkAnswer(result1, expected)
195+
checkAnswer(result2, expected)
196+
197+
// Error is thrown for malformed input.
198+
val invalidTimeDF = Seq("invalid_time").toDF("str")
199+
checkError(
200+
exception = intercept[SparkDateTimeException] {
201+
invalidTimeDF.select(to_time(col("str"))).collect()
202+
},
203+
condition = "CANNOT_PARSE_TIME",
204+
parameters = Map("input" -> "'invalid_time'", "format" -> "'HH:mm:ss.SSSSSS'")
205+
)
206+
}
207+
208+
test("SPARK-52883: to_time function with format") {
209+
// Input data for the function.
210+
val schema = StructType(Seq(
211+
StructField("str", StringType, nullable = false),
212+
StructField("format", StringType, nullable = false)
213+
))
214+
val data = Seq(
215+
Row("00.00.00", "HH.mm.ss"),
216+
Row("01.02.03.4", "HH.mm.ss.S"),
217+
Row("23.59.59.999999", "HH.mm.ss.SSSSSS")
218+
)
219+
val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema)
220+
221+
// Test the function using both `selectExpr` and `select`.
222+
val result1 = df.selectExpr(
223+
"to_time(str, format)"
224+
)
225+
val result2 = df.select(
226+
to_time(col("str"), col("format"))
227+
)
228+
// Check that both methods produce the same result.
229+
checkAnswer(result1, result2)
230+
231+
// Expected output of the function.
232+
val expected = Seq(
233+
"00:00:00",
234+
"01:02:03.4",
235+
"23:59:59.999999"
236+
).toDF("timeString").select(col("timeString").cast("time"))
237+
// Check that the results match the expected output.
238+
checkAnswer(result1, expected)
239+
checkAnswer(result2, expected)
240+
241+
// Error is thrown for malformed input.
242+
val invalidTimeDF = Seq(("invalid_time", "HH.mm.ss")).toDF("str", "format")
243+
checkError(
244+
exception = intercept[SparkDateTimeException] {
245+
invalidTimeDF.select(to_time(col("str"), col("format"))).collect()
246+
},
247+
condition = "CANNOT_PARSE_TIME",
248+
parameters = Map("input" -> "'invalid_time'", "format" -> "'HH.mm.ss'")
249+
)
250+
}
251+
252+
test("SPARK-52884: try_to_time function without format") {
253+
// Input data for the function.
254+
val schema = StructType(Seq(
255+
StructField("str", StringType)
256+
))
257+
val data = Seq(
258+
Row("00:00:00"),
259+
Row("01:02:03.4"),
260+
Row("23:59:59.999999"),
261+
Row("invalid_time"),
262+
Row(null)
263+
)
264+
val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema)
265+
266+
// Test the function using both `selectExpr` and `select`.
267+
val result1 = df.selectExpr(
268+
"try_to_time(str)"
269+
)
270+
val result2 = df.select(
271+
try_to_time(col("str"))
272+
)
273+
// Check that both methods produce the same result.
274+
checkAnswer(result1, result2)
275+
276+
// Expected output of the function.
277+
val expected = Seq(
278+
"00:00:00",
279+
"01:02:03.4",
280+
"23:59:59.999999",
281+
null,
282+
null
283+
).toDF("timeString").select(col("timeString").cast("time"))
284+
// Check that the results match the expected output.
285+
checkAnswer(result1, expected)
286+
checkAnswer(result2, expected)
287+
}
288+
289+
test("SPARK-52884: try_to_time function with format") {
290+
// Input data for the function.
291+
val schema = StructType(Seq(
292+
StructField("str", StringType),
293+
StructField("format", StringType)
294+
))
295+
val data = Seq(
296+
Row("00.00.00", "HH.mm.ss"),
297+
Row("01.02.03.4", "HH.mm.ss.SSS"),
298+
Row("23.59.59.999999", "HH.mm.ss.SSSSSS"),
299+
Row("invalid_time", "HH.mm.ss"),
300+
Row("00.00.00", "invalid_format"),
301+
Row("invalid_time", "invalid_format"),
302+
Row("00:00:00", "HH.mm.ss"),
303+
Row("abc", "HH.mm.ss"),
304+
Row("00:00:00", null),
305+
Row(null, "HH.mm.ss")
306+
)
307+
val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema)
308+
309+
// Test the function using both `selectExpr` and `select`.
310+
val result1 = df.selectExpr(
311+
"try_to_time(str, format)"
312+
)
313+
val result2 = df.select(
314+
try_to_time(col("str"), col("format"))
315+
)
316+
// Check that both methods produce the same result.
317+
checkAnswer(result1, result2)
318+
319+
// Expected output of the function.
320+
val expected = Seq(
321+
"00:00:00",
322+
"01:02:03.4",
323+
"23:59:59.999999",
324+
null,
325+
null,
326+
null,
327+
null,
328+
null,
329+
null,
330+
null
331+
).toDF("timeString").select(col("timeString").cast("time"))
332+
// Check that the results match the expected output.
333+
checkAnswer(result1, expected)
334+
checkAnswer(result2, expected)
335+
}
164336
}
165337

166338
// This class is used to run the same tests with ANSI mode enabled explicitly.

0 commit comments

Comments
 (0)