Skip to content

Commit 2822526

Browse files
committed
[SPARK-51556][SQL] Add the try_to_time function
### What changes were proposed in this pull request? In the PR, I propose new SQL function `try_to_time` which is similar to `to_time` but returns NULL instead of raising an error in the cases of malformed or non-matched to the given pattern input strings. #### Syntax ```sql try_to_time(str[, format]) ``` #### Arguments - `str`: A STRING expression representing a time. - `format`: An optional format STRING expression. #### Returns A TIME. - If `format` is supplied, it must conform with Datetime patterns. - If `format` is not supplied, the function is a synonym for `cast(expr AS TIME)`. - If `format` is malformed or its application does not result in a well formed time, the function returns NULL. To return an error instead of NULL in case of a malformed `str` use `to_time`. #### Examples ```sql > SELECT try_to_time('00:12:00'); 00:12:00 > SELECT try_to_time("02-69", "HH-mm"); NULL ``` ### Why are the changes needed? To improve user experience with Spark SQL. If an user needs to process wrongly formatted time values in a string format, and ignore errors, she/he can use `try_to_time` instead of `to_time`. ### Does this PR introduce _any_ user-facing change? Yes, it extends Spark SQL by new function. ### How was this patch tested? By running the affected test suites: ``` $ build/sbt "test:testOnly *ExpressionInfoSuite" $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z time.sql" ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#50335 from MaxGekk/try_to_time. Authored-by: Max Gekk <max.gekk@gmail.com> Signed-off-by: Max Gekk <max.gekk@gmail.com>
1 parent 9287760 commit 2822526

File tree

7 files changed

+209
-1
lines changed

7 files changed

+209
-1
lines changed

docs/sql-ref-ansi-compliance.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,7 @@ When ANSI mode is on, it throws exceptions for invalid operations. You can use t
384384
- `try_make_timestamp_ltz`: identical to the function `make_timestamp_ltz`, except that it returns `NULL` result instead of throwing an exception on error.
385385
- `try_make_timestamp_ntz`: identical to the function `make_timestamp_ntz`, except that it returns `NULL` result instead of throwing an exception on error.
386386
- `try_make_interval`: identical to the function `make_interval`, except that it returns `NULL` result instead of throwing an exception on invalid interval.
387+
- `try_to_time`: identical to the function `to_time`, except that it returns `NULL` result instead of throwing an exception on string parsing error.
387388

388389
### SQL Keywords (optional, disabled by default)
389390

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,7 @@ object FunctionRegistry {
463463
expressionBuilder("try_sum", TrySumExpressionBuilder, setAlias = true),
464464
expression[TryToBinary]("try_to_binary"),
465465
expressionBuilder("try_to_timestamp", TryToTimestampExpressionBuilder, setAlias = true),
466+
expressionBuilder("try_to_time", TryToTimeExpressionBuilder, setAlias = true),
466467
expression[TryAesDecrypt]("try_aes_decrypt"),
467468
expression[TryReflect]("try_reflect"),
468469
expression[TryUrlDecode]("try_url_decode"),

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,10 @@ package org.apache.spark.sql.catalyst.expressions
1919

2020
import java.time.DateTimeException
2121

22+
import org.apache.spark.sql.catalyst.analysis.ExpressionBuilder
2223
import org.apache.spark.sql.catalyst.expressions.objects.Invoke
2324
import org.apache.spark.sql.catalyst.util.TimeFormatter
24-
import org.apache.spark.sql.errors.QueryExecutionErrors
25+
import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
2526
import org.apache.spark.sql.internal.types.StringTypeWithCollation
2627
import org.apache.spark.sql.types.{AbstractDataType, ObjectType, TimeType}
2728
import org.apache.spark.unsafe.types.UTF8String
@@ -120,3 +121,42 @@ case class ToTimeParser(fmt: Option[String]) {
120121
}
121122
}
122123
}
124+
125+
/**
126+
* * Parses a column to a time based on the supplied format.
127+
*/
128+
// scalastyle:off line.size.limit
129+
@ExpressionDescription(
130+
usage = """
131+
_FUNC_(str[, format]) - Parses the `str` expression with the `format` expression to a time.
132+
If `format` is malformed or its application does not result in a well formed time, the function
133+
returns NULL. By default, it follows casting rules to a time if the `format` is omitted.
134+
""",
135+
arguments = """
136+
Arguments:
137+
* str - A string to be parsed to time.
138+
* format - Time format pattern to follow. See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime Patterns</a> for valid
139+
time format patterns.
140+
""",
141+
examples = """
142+
Examples:
143+
> SELECT _FUNC_('00:12:00.001');
144+
00:12:00.001
145+
> SELECT _FUNC_('12.10.05.999999', 'HH.mm.ss.SSSSSS');
146+
12:10:05.999999
147+
> SELECT _FUNC_('foo', 'HH:mm:ss');
148+
NULL
149+
""",
150+
group = "datetime_funcs",
151+
since = "4.1.0")
152+
// scalastyle:on line.size.limit
153+
object TryToTimeExpressionBuilder extends ExpressionBuilder {
154+
override def build(funcName: String, expressions: Seq[Expression]): Expression = {
155+
val numArgs = expressions.length
156+
if (numArgs == 1 || numArgs == 2) {
157+
TryEval(ToTime(expressions.head, expressions.drop(1).lastOption))
158+
} else {
159+
throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(1, 2), numArgs)
160+
}
161+
}
162+
}

sql/core/src/test/resources/sql-functions/sql-expression-schema.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,7 @@
367367
| org.apache.spark.sql.catalyst.expressions.TrySubtract | try_subtract | SELECT try_subtract(2, 1) | struct<try_subtract(2, 1):int> |
368368
| org.apache.spark.sql.catalyst.expressions.TryToBinary | try_to_binary | SELECT try_to_binary('abc', 'utf-8') | struct<try_to_binary(abc, utf-8):binary> |
369369
| org.apache.spark.sql.catalyst.expressions.TryToNumber | try_to_number | SELECT try_to_number('454', '999') | struct<try_to_number(454, 999):decimal(3,0)> |
370+
| org.apache.spark.sql.catalyst.expressions.TryToTimeExpressionBuilder | try_to_time | SELECT try_to_time('00:12:00.001') | struct<try_to_time(to_time(00:12:00.001)):time(6)> |
370371
| org.apache.spark.sql.catalyst.expressions.TryToTimestampExpressionBuilder | try_to_timestamp | SELECT try_to_timestamp('2016-12-31 00:12:00') | struct<try_to_timestamp(2016-12-31 00:12:00):timestamp> |
371372
| org.apache.spark.sql.catalyst.expressions.TryUrlDecode | try_url_decode | SELECT try_url_decode('https%3A%2F%2Fspark.apache.org') | struct<try_url_decode(https%3A%2F%2Fspark.apache.org):string> |
372373
| org.apache.spark.sql.catalyst.expressions.TryValidateUTF8 | try_validate_utf8 | SELECT try_validate_utf8('Spark') | struct<try_validate_utf8(Spark):string> |

sql/core/src/test/resources/sql-tests/analyzer-results/time.sql.out

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,78 @@ select to_time("13-60", "HH-mm")
4444
-- !query analysis
4545
Project [to_time(13-60, Some(HH-mm)) AS to_time(13-60, HH-mm)#x]
4646
+- OneRowRelation
47+
48+
49+
-- !query
50+
select try_to_time(null), try_to_time('00:12:00'), try_to_time('01:02:03', 'HH:mm:ss')
51+
-- !query analysis
52+
Project [try_to_time(to_time(null, None)) AS try_to_time(to_time(NULL))#x, try_to_time(to_time(00:12:00, None)) AS try_to_time(to_time(00:12:00))#x, try_to_time(to_time(01:02:03, Some(HH:mm:ss))) AS try_to_time(to_time(01:02:03, HH:mm:ss))#x]
53+
+- OneRowRelation
54+
55+
56+
-- !query
57+
select try_to_time(1)
58+
-- !query analysis
59+
org.apache.spark.sql.catalyst.ExtendedAnalysisException
60+
{
61+
"errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
62+
"sqlState" : "42K09",
63+
"messageParameters" : {
64+
"inputSql" : "\"1\"",
65+
"inputType" : "\"INT\"",
66+
"paramIndex" : "first",
67+
"requiredType" : "\"STRING\"",
68+
"sqlExpr" : "\"to_time(1)\""
69+
},
70+
"queryContext" : [ {
71+
"objectType" : "",
72+
"objectName" : "",
73+
"startIndex" : 8,
74+
"stopIndex" : 21,
75+
"fragment" : "try_to_time(1)"
76+
} ]
77+
}
78+
79+
80+
-- !query
81+
select try_to_time('12:48:31 abc')
82+
-- !query analysis
83+
Project [try_to_time(to_time(12:48:31 abc, None)) AS try_to_time(to_time(12:48:31 abc))#x]
84+
+- OneRowRelation
85+
86+
87+
-- !query
88+
select try_to_time('10:11:12.', 'HH:mm:ss.SSSSSS')
89+
-- !query analysis
90+
Project [try_to_time(to_time(10:11:12., Some(HH:mm:ss.SSSSSS))) AS try_to_time(to_time(10:11:12., HH:mm:ss.SSSSSS))#x]
91+
+- OneRowRelation
92+
93+
94+
-- !query
95+
select try_to_time("02-69", "HH-mm")
96+
-- !query analysis
97+
Project [try_to_time(to_time(02-69, Some(HH-mm))) AS try_to_time(to_time(02-69, HH-mm))#x]
98+
+- OneRowRelation
99+
100+
101+
-- !query
102+
select try_to_time('11:12:13', 'HH:mm:ss', 'SSSSSS')
103+
-- !query analysis
104+
org.apache.spark.sql.AnalysisException
105+
{
106+
"errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
107+
"sqlState" : "42605",
108+
"messageParameters" : {
109+
"actualNum" : "3",
110+
"docroot" : "https://spark.apache.org/docs/latest",
111+
"expectedNum" : "[1, 2]",
112+
"functionName" : "`try_to_time`"
113+
},
114+
"queryContext" : [ {
115+
"objectType" : "",
116+
"objectName" : "",
117+
"startIndex" : 8,
118+
"stopIndex" : 52,
119+
"fragment" : "try_to_time('11:12:13', 'HH:mm:ss', 'SSSSSS')"
120+
} ]
121+
}

sql/core/src/test/resources/sql-tests/inputs/time.sql

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,10 @@ select to_time(time_str, fmt_str) from time_view;
1111
select to_time("11", "HH");
1212
-- invalid: there is no 13 hours
1313
select to_time("13-60", "HH-mm");
14+
15+
select try_to_time(null), try_to_time('00:12:00'), try_to_time('01:02:03', 'HH:mm:ss');
16+
select try_to_time(1);
17+
select try_to_time('12:48:31 abc');
18+
select try_to_time('10:11:12.', 'HH:mm:ss.SSSSSS');
19+
select try_to_time("02-69", "HH-mm");
20+
select try_to_time('11:12:13', 'HH:mm:ss', 'SSSSSS');

sql/core/src/test/resources/sql-tests/results/time.sql.out

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,86 @@ org.apache.spark.SparkDateTimeException
5353
"input" : "'13-60'"
5454
}
5555
}
56+
57+
58+
-- !query
59+
select try_to_time(null), try_to_time('00:12:00'), try_to_time('01:02:03', 'HH:mm:ss')
60+
-- !query schema
61+
struct<try_to_time(to_time(NULL)):time(6),try_to_time(to_time(00:12:00)):time(6),try_to_time(to_time(01:02:03, HH:mm:ss)):time(6)>
62+
-- !query output
63+
NULL 00:12:00 01:02:03
64+
65+
66+
-- !query
67+
select try_to_time(1)
68+
-- !query schema
69+
struct<>
70+
-- !query output
71+
org.apache.spark.sql.catalyst.ExtendedAnalysisException
72+
{
73+
"errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
74+
"sqlState" : "42K09",
75+
"messageParameters" : {
76+
"inputSql" : "\"1\"",
77+
"inputType" : "\"INT\"",
78+
"paramIndex" : "first",
79+
"requiredType" : "\"STRING\"",
80+
"sqlExpr" : "\"to_time(1)\""
81+
},
82+
"queryContext" : [ {
83+
"objectType" : "",
84+
"objectName" : "",
85+
"startIndex" : 8,
86+
"stopIndex" : 21,
87+
"fragment" : "try_to_time(1)"
88+
} ]
89+
}
90+
91+
92+
-- !query
93+
select try_to_time('12:48:31 abc')
94+
-- !query schema
95+
struct<try_to_time(to_time(12:48:31 abc)):time(6)>
96+
-- !query output
97+
NULL
98+
99+
100+
-- !query
101+
select try_to_time('10:11:12.', 'HH:mm:ss.SSSSSS')
102+
-- !query schema
103+
struct<try_to_time(to_time(10:11:12., HH:mm:ss.SSSSSS)):time(6)>
104+
-- !query output
105+
NULL
106+
107+
108+
-- !query
109+
select try_to_time("02-69", "HH-mm")
110+
-- !query schema
111+
struct<try_to_time(to_time(02-69, HH-mm)):time(6)>
112+
-- !query output
113+
NULL
114+
115+
116+
-- !query
117+
select try_to_time('11:12:13', 'HH:mm:ss', 'SSSSSS')
118+
-- !query schema
119+
struct<>
120+
-- !query output
121+
org.apache.spark.sql.AnalysisException
122+
{
123+
"errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
124+
"sqlState" : "42605",
125+
"messageParameters" : {
126+
"actualNum" : "3",
127+
"docroot" : "https://spark.apache.org/docs/latest",
128+
"expectedNum" : "[1, 2]",
129+
"functionName" : "`try_to_time`"
130+
},
131+
"queryContext" : [ {
132+
"objectType" : "",
133+
"objectName" : "",
134+
"startIndex" : 8,
135+
"stopIndex" : 52,
136+
"fragment" : "try_to_time('11:12:13', 'HH:mm:ss', 'SSSSSS')"
137+
} ]
138+
}

0 commit comments

Comments
 (0)