Skip to content

Commit c36fecc

Browse files
committed
[SPARK-23327][SQL] Update the description and tests of three external API or functions
## What changes were proposed in this pull request? Update the description and tests of three external API or functions `createFunction `, `length` and `repartitionByRange ` ## How was this patch tested? N/A Author: gatorsmile <gatorsmile@gmail.com> Closes #20495 from gatorsmile/updateFunc.
1 parent b96a083 commit c36fecc

File tree

8 files changed

+44
-28
lines changed

8 files changed

+44
-28
lines changed

R/pkg/R/functions.R

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1026,7 +1026,9 @@ setMethod("last_day",
10261026
})
10271027

10281028
#' @details
1029-
#' \code{length}: Computes the length of a given string or binary column.
1029+
#' \code{length}: Computes the character length of a string data or number of bytes
1030+
#' of a binary data. The length of string data includes the trailing spaces.
1031+
#' The length of binary data includes binary zeros.
10301032
#'
10311033
#' @rdname column_string_functions
10321034
#' @aliases length length,Column-method

python/pyspark/sql/functions.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1705,10 +1705,12 @@ def unhex(col):
17051705
@ignore_unicode_prefix
17061706
@since(1.5)
17071707
def length(col):
1708-
"""Calculates the length of a string or binary expression.
1708+
"""Computes the character length of string data or number of bytes of binary data.
1709+
The length of character data includes the trailing spaces. The length of binary data
1710+
includes binary zeros.
17091711
1710-
>>> spark.createDataFrame([('ABC',)], ['a']).select(length('a').alias('length')).collect()
1711-
[Row(length=3)]
1712+
>>> spark.createDataFrame([('ABC ',)], ['a']).select(length('a').alias('length')).collect()
1713+
[Row(length=4)]
17121714
"""
17131715
sc = SparkContext._active_spark_context
17141716
return Column(sc._jvm.functions.length(_to_java_column(col)))

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -988,8 +988,11 @@ class SessionCatalog(
988988
// -------------------------------------------------------
989989

990990
/**
991-
* Create a metastore function in the database specified in `funcDefinition`.
991+
* Create a function in the database specified in `funcDefinition`.
992992
* If no such database is specified, create it in the current database.
993+
*
994+
* @param ignoreIfExists: When true, ignore if the function with the specified name exists
995+
* in the specified database.
993996
*/
994997
def createFunction(funcDefinition: CatalogFunction, ignoreIfExists: Boolean): Unit = {
995998
val db = formatDatabaseName(funcDefinition.identifier.database.getOrElse(getCurrentDatabase))
@@ -1061,7 +1064,7 @@ class SessionCatalog(
10611064
}
10621065

10631066
/**
1064-
* Check if the specified function exists.
1067+
* Check if the function with the specified name exists
10651068
*/
10661069
def functionExists(name: FunctionIdentifier): Boolean = {
10671070
val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1653,19 +1653,19 @@ case class Left(str: Expression, len: Expression, child: Expression) extends Run
16531653
* A function that returns the char length of the given string expression or
16541654
* number of bytes of the given binary expression.
16551655
*/
1656-
// scalastyle:off line.size.limit
16571656
@ExpressionDescription(
1658-
usage = "_FUNC_(expr) - Returns the character length of `expr` or number of bytes in binary data.",
1657+
usage = "_FUNC_(expr) - Returns the character length of string data or number of bytes of " +
1658+
"binary data. The length of string data includes the trailing spaces. The length of binary " +
1659+
"data includes binary zeros.",
16591660
examples = """
16601661
Examples:
1661-
> SELECT _FUNC_('Spark SQL');
1662-
9
1663-
> SELECT CHAR_LENGTH('Spark SQL');
1664-
9
1665-
> SELECT CHARACTER_LENGTH('Spark SQL');
1666-
9
1662+
> SELECT _FUNC_('Spark SQL ');
1663+
10
1664+
> SELECT CHAR_LENGTH('Spark SQL ');
1665+
10
1666+
> SELECT CHARACTER_LENGTH('Spark SQL ');
1667+
10
16671668
""")
1668-
// scalastyle:on line.size.limit
16691669
case class Length(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
16701670
override def dataType: DataType = IntegerType
16711671
override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType))
@@ -1687,7 +1687,7 @@ case class Length(child: Expression) extends UnaryExpression with ImplicitCastIn
16871687
* A function that returns the bit length of the given string or binary expression.
16881688
*/
16891689
@ExpressionDescription(
1690-
usage = "_FUNC_(expr) - Returns the bit length of `expr` or number of bits in binary data.",
1690+
usage = "_FUNC_(expr) - Returns the bit length of string data or number of bits of binary data.",
16911691
examples = """
16921692
Examples:
16931693
> SELECT _FUNC_('Spark SQL');
@@ -1716,7 +1716,8 @@ case class BitLength(child: Expression) extends UnaryExpression with ImplicitCas
17161716
* A function that returns the byte length of the given string or binary expression.
17171717
*/
17181718
@ExpressionDescription(
1719-
usage = "_FUNC_(expr) - Returns the byte length of `expr` or number of bytes in binary data.",
1719+
usage = "_FUNC_(expr) - Returns the byte length of string data or number of bytes of binary " +
1720+
"data.",
17201721
examples = """
17211722
Examples:
17221723
> SELECT _FUNC_('Spark SQL');

sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2825,6 +2825,7 @@ class Dataset[T] private[sql](
28252825
*
28262826
* At least one partition-by expression must be specified.
28272827
* When no explicit sort order is specified, "ascending nulls first" is assumed.
2828+
* Note, the rows are not sorted in each partition of the resulting Dataset.
28282829
*
28292830
* @group typedrel
28302831
* @since 2.3.0
@@ -2848,6 +2849,7 @@ class Dataset[T] private[sql](
28482849
*
28492850
* At least one partition-by expression must be specified.
28502851
* When no explicit sort order is specified, "ascending nulls first" is assumed.
2852+
* Note, the rows are not sorted in each partition of the resulting Dataset.
28512853
*
28522854
* @group typedrel
28532855
* @since 2.3.0

sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,24 +40,28 @@ import org.apache.spark.sql.types.{StringType, StructField, StructType}
4040
* CREATE [OR REPLACE] FUNCTION [IF NOT EXISTS] [databaseName.]functionName
4141
* AS className [USING JAR\FILE 'uri' [, JAR|FILE 'uri']]
4242
* }}}
43+
*
44+
* @param ignoreIfExists: When true, ignore if the function with the specified name exists
45+
* in the specified database.
46+
* @param replace: When true, alter the function with the specified name
4347
*/
4448
case class CreateFunctionCommand(
4549
databaseName: Option[String],
4650
functionName: String,
4751
className: String,
4852
resources: Seq[FunctionResource],
4953
isTemp: Boolean,
50-
ifNotExists: Boolean,
54+
ignoreIfExists: Boolean,
5155
replace: Boolean)
5256
extends RunnableCommand {
5357

54-
if (ifNotExists && replace) {
58+
if (ignoreIfExists && replace) {
5559
throw new AnalysisException("CREATE FUNCTION with both IF NOT EXISTS and REPLACE" +
5660
" is not allowed.")
5761
}
5862

5963
// Disallow to define a temporary function with `IF NOT EXISTS`
60-
if (ifNotExists && isTemp) {
64+
if (ignoreIfExists && isTemp) {
6165
throw new AnalysisException(
6266
"It is not allowed to define a TEMPORARY function with IF NOT EXISTS.")
6367
}
@@ -79,12 +83,12 @@ case class CreateFunctionCommand(
7983
// Handles `CREATE OR REPLACE FUNCTION AS ... USING ...`
8084
if (replace && catalog.functionExists(func.identifier)) {
8185
// alter the function in the metastore
82-
catalog.alterFunction(CatalogFunction(func.identifier, className, resources))
86+
catalog.alterFunction(func)
8387
} else {
8488
// For a permanent, we will store the metadata into underlying external catalog.
8589
// This function will be loaded into the FunctionRegistry when a query uses it.
8690
// We do not load it into FunctionRegistry right now.
87-
catalog.createFunction(CatalogFunction(func.identifier, className, resources), ifNotExists)
91+
catalog.createFunction(func, ignoreIfExists)
8892
}
8993
}
9094
Seq.empty[Row]

sql/core/src/main/scala/org/apache/spark/sql/functions.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2267,7 +2267,9 @@ object functions {
22672267
}
22682268

22692269
/**
2270-
* Computes the length of a given string or binary column.
2270+
* Computes the character length of a given string or number of bytes of a binary string.
2271+
* The length of character strings include the trailing spaces. The length of binary strings
2272+
* includes binary zeros.
22712273
*
22722274
* @group string_funcs
22732275
* @since 1.5.0

sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -236,39 +236,39 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
236236
Seq(
237237
FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar1"),
238238
FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar2")),
239-
isTemp = true, ifNotExists = false, replace = false)
239+
isTemp = true, ignoreIfExists = false, replace = false)
240240
val expected2 = CreateFunctionCommand(
241241
Some("hello"),
242242
"world",
243243
"com.matthewrathbone.example.SimpleUDFExample",
244244
Seq(
245245
FunctionResource(FunctionResourceType.fromString("archive"), "/path/to/archive"),
246246
FunctionResource(FunctionResourceType.fromString("file"), "/path/to/file")),
247-
isTemp = false, ifNotExists = false, replace = false)
247+
isTemp = false, ignoreIfExists = false, replace = false)
248248
val expected3 = CreateFunctionCommand(
249249
None,
250250
"helloworld3",
251251
"com.matthewrathbone.example.SimpleUDFExample",
252252
Seq(
253253
FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar1"),
254254
FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar2")),
255-
isTemp = true, ifNotExists = false, replace = true)
255+
isTemp = true, ignoreIfExists = false, replace = true)
256256
val expected4 = CreateFunctionCommand(
257257
Some("hello"),
258258
"world1",
259259
"com.matthewrathbone.example.SimpleUDFExample",
260260
Seq(
261261
FunctionResource(FunctionResourceType.fromString("archive"), "/path/to/archive"),
262262
FunctionResource(FunctionResourceType.fromString("file"), "/path/to/file")),
263-
isTemp = false, ifNotExists = false, replace = true)
263+
isTemp = false, ignoreIfExists = false, replace = true)
264264
val expected5 = CreateFunctionCommand(
265265
Some("hello"),
266266
"world2",
267267
"com.matthewrathbone.example.SimpleUDFExample",
268268
Seq(
269269
FunctionResource(FunctionResourceType.fromString("archive"), "/path/to/archive"),
270270
FunctionResource(FunctionResourceType.fromString("file"), "/path/to/file")),
271-
isTemp = false, ifNotExists = true, replace = false)
271+
isTemp = false, ignoreIfExists = true, replace = false)
272272
comparePlans(parsed1, expected1)
273273
comparePlans(parsed2, expected2)
274274
comparePlans(parsed3, expected3)

0 commit comments

Comments
 (0)