Updated according to review comments

xy_xin · xy_xin · commit a76062c29a5f · 2019-09-02T19:30:51.000+08:00
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -215,7 +215,7 @@ statement
     | SET .*?                                                          #setConfiguration
     | RESET                                                            #resetConfiguration
     | DELETE FROM multipartIdentifier tableAlias whereClause           #deleteFromTable
-    | UPDATE multipartIdentifier tableAlias setClause whereClause      #updateTable
+    | UPDATE multipartIdentifier tableAlias setClause (whereClause)?   #updateTable
     | unsupportedHiveNativeCommands .*?                                #failNativeCommand
     ;
 
@@ -480,7 +480,7 @@ setClause
     ;
 
 assign
-    : key=multipartIdentifier EQ value=valueExpression
+    : key=multipartIdentifier EQ value=expression
     ;
 
 whereClause
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -363,13 +363,18 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     val sets = ctx.setClause().assign().asScala.map {
       kv => visitMultipartIdentifier(kv.key) -> expression(kv.value)
     }.toMap
+    val predicate = if (ctx.whereClause() != null) {
+      Some(expression(ctx.whereClause().booleanExpression()))
+    } else {
+      None
+    }
 
     UpdateTableStatement(
       tableId,
       tableAlias,
       sets.keys.toSeq,
       sets.values.toSeq,
-      expression(ctx.whereClause().booleanExpression()))
+      predicate)
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -578,7 +578,7 @@ case class UpdateTable(
     child: LogicalPlan,
     attrs: Seq[Attribute],
     values: Seq[Expression],
-    condition: Expression) extends Command {
+    condition: Option[Expression]) extends Command {
 
   override def children: Seq[LogicalPlan] = child :: Nil
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/UpdateTableStatement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/UpdateTableStatement.scala
@@ -24,4 +24,4 @@ case class UpdateTableStatement(
     tableAlias: Option[String],
     attrs: Seq[Seq[String]],
     values: Seq[Expression],
-    condition: Expression) extends ParsedStatement
+    condition: Option[Expression]) extends ParsedStatement
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.execution.{FilterExec, ProjectExec, SparkPlan}
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 import org.apache.spark.sql.execution.streaming.continuous.{ContinuousCoalesceExec, WriteToContinuousDataSource, WriteToContinuousDataSourceExec}
 import org.apache.spark.sql.sources
+import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.sources.v2.TableCapability
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
@@ -246,6 +247,11 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
       DeleteFromTableExec(r.table.asDeletable, filters) :: Nil
 
     case UpdateTable(r: DataSourceV2Relation, attrs, values, condition) =>
+      val nested = attrs.asInstanceOf[Seq[Any]].filterNot(_.isInstanceOf[AttributeReference])
+      if (nested.nonEmpty) {
+        throw new RuntimeException(s"Update only support non-nested fields. Nested: $nested")
+      }
+
       val attrsNames = attrs.map(_.name)
       // fail if any updated value cannot be converted.
       val updatedValues = values.map {
@@ -254,11 +260,12 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
               s" cannot translate update set to source expression: $v"))
       }
       // fail if any filter cannot be converted. correctness depends on removing all matching data.
-      val filters = splitConjunctivePredicates(condition).map {
-        f => DataSourceStrategy.translateFilter(f).getOrElse(
-          throw new AnalysisException(s"Exec update failed:" +
-              s" cannot translate expression to source filter: $f"))
-      }.toArray
+      val filters = condition.map(
+        splitConjunctivePredicates(_).map {
+          f => DataSourceStrategy.translateFilter(f).getOrElse(
+            throw new AnalysisException(s"Exec update failed:" +
+                s" cannot translate expression to source filter: $f"))
+        }.toArray).getOrElse(Array.empty[Filter])
       UpdateTableExec(r.table.asUpdatable, attrsNames, updatedValues, filters)::Nil
 
     case WriteToContinuousDataSource(writer, query) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2WriteSupportCheck.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2WriteSupportCheck.scala
@@ -57,7 +57,7 @@ object V2WriteSupportCheck extends (LogicalPlan => Unit) {
       }
 
     case UpdateTable(_, _, _, condition) =>
-      if (SubqueryExpression.hasSubquery(condition)) {
+      if (condition.exists(SubqueryExpression.hasSubquery)) {
         failAnalysis(s"Update by condition with subquery is not supported: $condition")
       }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2SQLSuite.scala
@@ -1770,49 +1770,134 @@ class DataSourceV2SQLSuite
   test("Update: basic") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
-      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
-      sql(s"UPDATE $t SET data='d' WHERE id = 2")
-      checkAnswer(spark.table(t), Seq(
-        Row(2, "d", 2), Row(2, "d", 3), Row(3, "c", 3)))
+      sql(s"CREATE TABLE $t (id bigint, name string, age int, p int)" +
+          " USING foo" +
+          " PARTITIONED BY (id, p)")
+      sql(s"INSERT INTO $t VALUES (1L, 'Herry', 26, 1)," +
+          s" (2L, 'Jack', 31, 2), (3L, 'Lisa', 28, 3), (4L, 'Frank', 33, 3)")
+      sql(s"UPDATE $t SET name='Robert', age=32")
+      checkAnswer(spark.table(t),
+        Seq(Row(1, "Robert", 32, 1),
+          Row(2, "Robert", 32, 2),
+          Row(3, "Robert", 32, 3),
+          Row(4, "Robert", 32, 3)))
     }
   }
 
-  test("Update: alias") {
+  test("Update: update with where clause") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
-      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
-      sql(s"UPDATE $t tbl SET tbl.data='d' WHERE id = 2")
-      checkAnswer(spark.table(t), Seq(
-        Row(2, "d", 2), Row(2, "d", 3), Row(3, "c", 3)))
+      sql(s"CREATE TABLE $t (id bigint, name string, age int, p int)" +
+          " USING foo" +
+          " PARTITIONED BY (id, p)")
+      sql(s"INSERT INTO $t VALUES (1L, 'Herry', 26, 1)," +
+          s" (2L, 'Jack', 31, 2), (3L, 'Lisa', 28, 3), (4L, 'Frank', 33, 3)")
+      sql(s"UPDATE $t SET name='Robert', age=32 where p=2")
+      checkAnswer(spark.table(t),
+        Seq(Row(1, "Herry", 26, 1),
+          Row(2, "Robert", 32, 2),
+          Row(3, "Lisa", 28, 3),
+          Row(4, "Frank", 33, 3)))
+    }
+  }
+
+  test("Update: update the partition key") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, name string, age int, p int)" +
+          " USING foo" +
+          " PARTITIONED BY (id, p)")
+      sql(s"INSERT INTO $t VALUES (1L, 'Herry', 26, 1)," +
+          s" (2L, 'Jack', 31, 2), (3L, 'Lisa', 28, 3), (4L, 'Frank', 33, 3)")
+      sql(s"UPDATE $t SET p=4 where id=4")
+      checkAnswer(spark.table(t),
+        Seq(Row(1, "Herry", 26, 1),
+          Row(2, "Jack", 31, 2),
+          Row(3, "Lisa", 28, 3),
+          Row(4, "Frank", 33, 4)))
+    }
+  }
+
+  test("Update: update with aliased target table - 1") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, name string, age int, p int)" +
+          " USING foo" +
+          " PARTITIONED BY (id, p)")
+      sql(s"INSERT INTO $t VALUES (1L, 'Herry', 26, 1)," +
+          s" (2L, 'Jack', 31, 2), (3L, 'Lisa', 28, 3), (4L, 'Frank', 33, 3)")
+      sql(s"UPDATE $t tbl SET tbl.name='Robert', tbl.age=32 where p=2")
+      checkAnswer(spark.table(t),
+        Seq(Row(1, "Herry", 26, 1),
+          Row(2, "Robert", 32, 2),
+          Row(3, "Lisa", 28, 3),
+          Row(4, "Frank", 33, 3)))
+    }
+  }
+
+  test("Update: update with aliased target table - 2") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, name string, age int, p int)" +
+          " USING foo" +
+          " PARTITIONED BY (id, p)")
+      sql(s"INSERT INTO $t VALUES (1L, 'Herry', 26, 1)," +
+          s" (2L, 'Jack', 31, 2), (3L, 'Lisa', 28, 3), (4L, 'Frank', 33, 3)")
+      sql(s"UPDATE $t AS tbl SET tbl.name='Robert', tbl.age=32 where p=2")
+      checkAnswer(spark.table(t),
+        Seq(Row(1, "Herry", 26, 1),
+          Row(2, "Robert", 32, 2),
+          Row(3, "Lisa", 28, 3),
+          Row(4, "Frank", 33, 3)))
+    }
+  }
+
+  test("Update: update nested field") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id int, point struct<x: double, y: double>) USING foo")
+      sql(s"INSERT INTO $t SELECT 1, named_struct('x', 1.0D, 'y', 1.0D)")
+      checkAnswer(spark.table(t), Seq(Row(1, Row(1.0, 1.0))))
+
+      val exc = intercept[RuntimeException] {
+        sql(s"UPDATE $t tbl SET tbl.point.x='2.0D', tbl.point.y='3.0D'")
+      }
+
+      checkAnswer(spark.table(t), Seq(Row(1, Row(1.0, 1.0))))
+      assert(exc.getMessage.contains("Update only support non-nested fields."))
     }
   }
 
   test("Update: fail if the value expression in set clause cannot be converted") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
-      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
+      sql(s"CREATE TABLE $t (id bigint, name string, age int, p int)" +
+          " USING foo" +
+          " PARTITIONED BY (id, p)")
+      sql(s"INSERT INTO $t VALUES (1L, 'Herry', 26, 1)," +
+          s" (2L, 'Jack', 31, 2), (3L, 'Lisa', 28, 3), (4L, 'Frank', 33, 3)")
       val exc = intercept[AnalysisException] {
-        sql(s"UPDATE $t tbl SET tbl.id=tbl.id + 1 WHERE id = 3")
+        sql(s"UPDATE $t tbl SET tbl.p=tbl.p + 1 WHERE id = 3")
       }
 
-      assert(spark.table(t).filter("id=3").select("data").head().getString(0) == "c")
+      assert(spark.table(t).filter("id=3").select("p").head().getInt(0) == 3)
       assert(exc.getMessage.contains("Exec update failed: "))
     }
   }
 
   test("Update: fail if has subquery") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
-      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
+      sql(s"CREATE TABLE $t (id bigint, name string, age int, p int)" +
+          " USING foo" +
+          " PARTITIONED BY (id, p)")
+      sql(s"INSERT INTO $t VALUES (1L, 'Herry', 26, 1)," +
+          s" (2L, 'Jack', 31, 2), (3L, 'Lisa', 28, 3), (4L, 'Frank', 33, 3)")
       val exc = intercept[AnalysisException] {
-        sql(s"UPDATE $t SET data='d' WHERE id IN (SELECT id FROM $t)")
+        sql(s"UPDATE $t SET name='Robert' WHERE id IN (SELECT id FROM $t)")
       }
 
-      assert(spark.table(t).filter("id=3").select("data").head().getString(0) == "c")
+      assert(spark.table(t).filter("id=4").select("name").head().getString(0) == "Frank")
       assert(exc.getMessage.contains("Update by condition with subquery is not supported"))
     }
   }

Original file line number	Diff line number	Diff line change
`@@ -215,7 +215,7 @@ statement`
`215`	`215`	`\| SET .*? #setConfiguration`
`216`	`216`	`\| RESET #resetConfiguration`
`217`	`217`	`\| DELETE FROM multipartIdentifier tableAlias whereClause #deleteFromTable`
`218`		`- \| UPDATE multipartIdentifier tableAlias setClause whereClause #updateTable`
	`218`	`+ \| UPDATE multipartIdentifier tableAlias setClause (whereClause)? #updateTable`
`219`	`219`	`\| unsupportedHiveNativeCommands .*? #failNativeCommand`
`220`	`220`	`;`
`221`	`221`
`@@ -480,7 +480,7 @@ setClause`
`480`	`480`	`;`
`481`	`481`
`482`	`482`	`assign`
`483`		`- : key=multipartIdentifier EQ value=valueExpression`
	`483`	`+ : key=multipartIdentifier EQ value=expression`
`484`	`484`	`;`
`485`	`485`
`486`	`486`	`whereClause`
Original file line number	Diff line number	Diff line change
`@@ -578,7 +578,7 @@ case class UpdateTable(`
`578`	`578`	`child: LogicalPlan,`
`579`	`579`	`attrs: Seq[Attribute],`
`580`	`580`	`values: Seq[Expression],`
`581`		`- condition: Expression) extends Command {`
	`581`	`+ condition: Option[Expression]) extends Command {`
`582`	`582`
`583`	`583`	`override def children: Seq[LogicalPlan] = child :: Nil`
`584`	`584`	`}`
Original file line number	Diff line number	Diff line change
`@@ -57,7 +57,7 @@ object V2WriteSupportCheck extends (LogicalPlan => Unit) {`
`57`	`57`	`}`
`58`	`58`
`59`	`59`	`case UpdateTable(_, _, _, condition) =>`
`60`		`- if (SubqueryExpression.hasSubquery(condition)) {`
	`60`	`+ if (condition.exists(SubqueryExpression.hasSubquery)) {`
`61`	`61`	`failAnalysis(s"Update by condition with subquery is not supported: $condition")`
`62`	`62`	`}`
`63`	`63`