From fc5bbce08557744973a2e0a7a43760e7d93e387e Mon Sep 17 00:00:00 2001 From: angerszhu Date: Thu, 13 Aug 2020 16:23:58 +0800 Subject: [PATCH] follow comment --- .../sql/catalyst/parser/PlanParserSuite.scala | 48 ++++++------- .../SparkScriptTransformationExec.scala | 2 + .../resources/sql-tests/inputs/transform.sql | 68 +++++++++---------- .../sql-tests/results/transform.sql.out | 68 +++++++++---------- .../HiveScriptTransformationExec.scala | 2 + 5 files changed, 96 insertions(+), 92 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 8819d24ce419b..319bd011a1a2c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -1088,19 +1088,19 @@ class PlanParserSuite extends AnalysisTest { assertEqual( """ |SELECT TRANSFORM(a, b, c) - |ROW FORMAT DELIMITED - |FIELDS TERMINATED BY '\t' - |COLLECTION ITEMS TERMINATED BY '\u0002' - |MAP KEYS TERMINATED BY '\u0003' - |LINES TERMINATED BY '\n' - |NULL DEFINED AS 'null' - |USING 'cat' AS (a, b, c) - |ROW FORMAT DELIMITED - |FIELDS TERMINATED BY '\t' - |COLLECTION ITEMS TERMINATED BY '\u0004' - |MAP KEYS TERMINATED BY '\u0005' - |LINES TERMINATED BY '\n' - |NULL DEFINED AS 'NULL' + | ROW FORMAT DELIMITED + | FIELDS TERMINATED BY '\t' + | COLLECTION ITEMS TERMINATED BY '\u0002' + | MAP KEYS TERMINATED BY '\u0003' + | LINES TERMINATED BY '\n' + | NULL DEFINED AS 'null' + | USING 'cat' AS (a, b, c) + | ROW FORMAT DELIMITED + | FIELDS TERMINATED BY '\t' + | COLLECTION ITEMS TERMINATED BY '\u0004' + | MAP KEYS TERMINATED BY '\u0005' + | LINES TERMINATED BY '\n' + | NULL DEFINED AS 'NULL' |FROM testData """.stripMargin, ScriptTransformation( @@ -1127,17 +1127,17 @@ class PlanParserSuite extends AnalysisTest { intercept( """ |SELECT TRANSFORM(a, b, c) - |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' - |WITH SERDEPROPERTIES( - | "separatorChar" = "\t", - | "quoteChar" = "'", - | "escapeChar" = "\\") - |USING 'cat' AS (a, b, c) - |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' - |WITH SERDEPROPERTIES( - | "separatorChar" = "\t", - | "quoteChar" = "'", - | "escapeChar" = "\\") + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' + | WITH SERDEPROPERTIES( + | "separatorChar" = "\t", + | "quoteChar" = "'", + | "escapeChar" = "\\") + | USING 'cat' AS (a, b, c) + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' + | WITH SERDEPROPERTIES( + | "separatorChar" = "\t", + | "quoteChar" = "'", + | "escapeChar" = "\\") |FROM testData """.stripMargin, "TRANSFORM with serde is only supported in hive mode") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkScriptTransformationExec.scala index b87c20e6a5656..b2e54fa043a5d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkScriptTransformationExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkScriptTransformationExec.scala @@ -33,6 +33,8 @@ import org.apache.spark.util.CircularBuffer * @param input the set of expression that should be passed to the script. * @param script the command that should be executed. * @param output the attributes that are produced by the script. + * @param child Child Operator + * @param ioschema The wrapper classes of input and output schema properties */ case class SparkScriptTransformationExec( input: Seq[Expression], diff --git a/sql/core/src/test/resources/sql-tests/inputs/transform.sql b/sql/core/src/test/resources/sql-tests/inputs/transform.sql index 8610e384d6fab..c85e9a0cf3b93 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/transform.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/transform.sql @@ -21,27 +21,27 @@ FROM t; -- common supported data types between no serde and serde transform SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM ( - SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l) - USING 'cat' AS ( - a string, - b boolean, - c binary, - d tinyint, - e int, - f smallint, - g long, - h float, - i double, - j decimal(38, 18), - k timestamp, - l date) - FROM t + SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l) + USING 'cat' AS ( + a string, + b boolean, + c binary, + d tinyint, + e int, + f smallint, + g long, + h float, + i double, + j decimal(38, 18), + k timestamp, + l date) + FROM t ) tmp; -- common supported data types between no serde and serde transform SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM ( - SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l) - USING 'cat' AS ( + SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l) + USING 'cat' AS ( a string, b string, c string, @@ -54,7 +54,7 @@ SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM ( j string, k string, l string) - FROM t + FROM t ) tmp; -- SPARK-32388 handle schema less @@ -90,25 +90,25 @@ REDUCE a, b USING 'cat' AS (a, b) FROM t; -- transform with defined row format delimit SELECT TRANSFORM(a, b, c, null) -ROW FORMAT DELIMITED -FIELDS TERMINATED BY '|' -LINES TERMINATED BY '\n' -NULL DEFINED AS 'NULL' + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '|' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' USING 'cat' AS (a, b, c, d) -ROW FORMAT DELIMITED -FIELDS TERMINATED BY '|' -LINES TERMINATED BY '\n' -NULL DEFINED AS 'NULL' + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '|' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' FROM t; SELECT TRANSFORM(a, b, c, null) -ROW FORMAT DELIMITED -FIELDS TERMINATED BY '|' -LINES TERMINATED BY '\n' -NULL DEFINED AS 'NULL' + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '|' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' USING 'cat' AS (d) -ROW FORMAT DELIMITED -FIELDS TERMINATED BY '||' -LINES TERMINATED BY '\n' -NULL DEFINED AS 'NULL' + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '||' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' FROM t; diff --git a/sql/core/src/test/resources/sql-tests/results/transform.sql.out b/sql/core/src/test/resources/sql-tests/results/transform.sql.out index 744d6384f9c45..bac4f08413e61 100644 --- a/sql/core/src/test/resources/sql-tests/results/transform.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/transform.sql.out @@ -50,21 +50,21 @@ Subprocess exited with status 2. Error: python: can't open file 'some_non_existe -- !query SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM ( - SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l) - USING 'cat' AS ( - a string, - b boolean, - c binary, - d tinyint, - e int, - f smallint, - g long, - h float, - i double, - j decimal(38, 18), - k timestamp, - l date) - FROM t + SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l) + USING 'cat' AS ( + a string, + b boolean, + c binary, + d tinyint, + e int, + f smallint, + g long, + h float, + i double, + j decimal(38, 18), + k timestamp, + l date) + FROM t ) tmp -- !query schema struct @@ -76,8 +76,8 @@ struct @@ -188,15 +188,15 @@ struct -- !query SELECT TRANSFORM(a, b, c, null) -ROW FORMAT DELIMITED -FIELDS TERMINATED BY '|' -LINES TERMINATED BY '\n' -NULL DEFINED AS 'NULL' + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '|' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' USING 'cat' AS (a, b, c, d) -ROW FORMAT DELIMITED -FIELDS TERMINATED BY '|' -LINES TERMINATED BY '\n' -NULL DEFINED AS 'NULL' + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '|' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' FROM t -- !query schema struct @@ -207,15 +207,15 @@ struct -- !query SELECT TRANSFORM(a, b, c, null) -ROW FORMAT DELIMITED -FIELDS TERMINATED BY '|' -LINES TERMINATED BY '\n' -NULL DEFINED AS 'NULL' + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '|' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' USING 'cat' AS (d) -ROW FORMAT DELIMITED -FIELDS TERMINATED BY '||' -LINES TERMINATED BY '\n' -NULL DEFINED AS 'NULL' + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '||' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' FROM t -- !query schema struct diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala index 4096916a100c3..60c07d47c6bfe 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala @@ -45,6 +45,8 @@ import org.apache.spark.util.{CircularBuffer, Utils} * @param input the set of expression that should be passed to the script. * @param script the command that should be executed. * @param output the attributes that are produced by the script. + * @param child Child Operator + * @param ioschema The wrapper classes of input and output schema properties */ case class HiveScriptTransformationExec( input: Seq[Expression],