Skip to content

Commit 21db2f8

Browse files
MaxGekkdongjoon-hyun
authored andcommitted
[SPARK-29237][SQL] Prevent real function names in expression example template
### What changes were proposed in this pull request? In the PR, I propose to replace function names in some expression examples by `_FUNC_`, and add a test to check that `_FUNC_` always present in all examples. ### Why are the changes needed? Binding of a function name to an expression is performed in `FunctionRegistry` which is single source of truth. Expression examples should avoid using function name directly because this can make the examples invalid in the future. ### Does this PR introduce any user-facing change? No ### How was this patch tested? Added new test to `SQLQuerySuite` which analyses expression example, and check presence of `_FUNC_`. Closes #25924 from MaxGekk/fix-func-examples. Authored-by: Maxim Gekk <max.gekk@gmail.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
1 parent a1b90bf commit 21db2f8

File tree

3 files changed

+32
-1
lines changed

3 files changed

+32
-1
lines changed

sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
package org.apache.spark.sql.catalyst.expressions;
1919

20+
import com.google.common.annotations.VisibleForTesting;
21+
2022
/**
2123
* Expression information, will be used to describe a expression.
2224
*/
@@ -56,6 +58,11 @@ public String getArguments() {
5658
return arguments;
5759
}
5860

61+
@VisibleForTesting
62+
public String getOriginalExamples() {
63+
return examples;
64+
}
65+
5966
public String getExamples() {
6067
return replaceFunctionName(examples);
6168
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1097,7 +1097,7 @@ case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[S
10971097
usage = "_FUNC_(timestamp, timezone) - Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders that time as a timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14 03:40:00.0'.",
10981098
examples = """
10991099
Examples:
1100-
> SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul');
1100+
> SELECT _FUNC_('2016-08-31', 'Asia/Seoul');
11011101
2016-08-31 09:00:00
11021102
""",
11031103
since = "1.5.0",

sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,30 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
115115
}
116116
}
117117

118+
test("using _FUNC_ instead of function names in examples") {
119+
val exampleRe = "(>.*;)".r
120+
val ignoreSet = Set(
121+
// Examples for CaseWhen show simpler syntax:
122+
// `CASE WHEN ... THEN ... WHEN ... THEN ... END`
123+
"org.apache.spark.sql.catalyst.expressions.CaseWhen",
124+
// _FUNC_ is replaced by `locate` but `locate(... IN ...)` is not supported
125+
"org.apache.spark.sql.catalyst.expressions.StringLocate",
126+
// _FUNC_ is replaced by `%` which causes a parsing error on `SELECT %(2, 1.8)`
127+
"org.apache.spark.sql.catalyst.expressions.Remainder",
128+
// Examples demonstrate alternative names, see SPARK-20749
129+
"org.apache.spark.sql.catalyst.expressions.Length")
130+
spark.sessionState.functionRegistry.listFunction().foreach { funcId =>
131+
val info = spark.sessionState.catalog.lookupFunctionInfo(funcId)
132+
val className = info.getClassName
133+
withClue(s"Expression class '$className'") {
134+
val exprExamples = info.getOriginalExamples
135+
if (!exprExamples.isEmpty && !ignoreSet.contains(className)) {
136+
assert(exampleRe.findAllIn(exprExamples).toSet.forall(_.contains("_FUNC_")))
137+
}
138+
}
139+
}
140+
}
141+
118142
test("SPARK-6743: no columns from cache") {
119143
Seq(
120144
(83, 0, 38),

0 commit comments

Comments
 (0)