Move API back to Dataset

szehon-ho · Aug 7, 2024 · 8f4b958 · 8f4b958
1 parent 056492b
commit 8f4b958
Show file tree

Hide file tree

Showing 4 changed files with 42 additions and 65 deletions.
diff --git a/...c/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala b/...c/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
@@ -304,11 +304,8 @@ object CheckConnectJvmClientCompatibility {
         "org.apache.spark.sql.PartitionTransform$ExtractTransform"),
 
       // Update Writer
-      ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.SparkSession.update"),
       ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.UpdateWriter"),
       ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.UpdateWriter$"),
-      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.UpdateWithAssignment"),
-      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.UpdateWithAssignment$"),
       ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.UpdateWithCondition"),
       ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.UpdateWithCondition$")) ++
       mergeIntoWriterExcludeRules

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -4136,6 +4136,34 @@ class Dataset[T] private[sql](
     new MergeIntoWriter[T](table, this, condition)
   }
 
+  /**
+   * Update rows in a table that match a condition.
+   *
+   * Scala Example:
+   * {{{
+   *   spark.update("source", Map("salary" -> lit(200)))
+   *    .where($"salary" === 100)
+   *    .execute()
+   *
+   * }}}
+   * @param tableName is either a qualified or unqualified name that designates a table or view.
+   *                  If a database is specified, it identifies the table/view from the database.
+   *                  Otherwise, it first attempts to find a temporary view with the given name
+   *                  and then match the table/view from the current database.
+   *                  Note that, the global temporary view database is also valid here.
+   * @param assignments A Map of column names to Column expressions representing the updates
+   *     to be applied.
+   * @since 4.0.0
+   */
+  def update(assignments: Map[String, Column]): UpdateWriter[T] = {
+    if (isStreaming) {
+      throw new AnalysisException(
+        errorClass = "CALL_ON_STREAMING_DATASET_UNSUPPORTED",
+        messageParameters = Map("methodName" -> toSQLId("update")))
+    }
+    new UpdateWriter[T](this, assignments)
+  }
+
   /**
    * Interface for saving the content of the streaming Dataset out into external storage.
    *

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -43,7 +43,6 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Range}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
-import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId
 import org.apache.spark.sql.connector.ExternalCommandRunner
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution._
@@ -833,36 +832,6 @@ class SparkSession private(
     ret
   }
 
-  /**
-   * Update rows in a table that match a condition.
-   *
-   * Scala Example:
-   * {{{
-   *   spark.update("source")
-   *    .set(
-   *      Map("salary" -> lit(200))
-   *    )
-   *    .where($"salary" === 100)
-   *    .execute()
-   *
-   * }}}
-   * @param tableName is either a qualified or unqualified name that designates a table or view.
-   *                  If a database is specified, it identifies the table/view from the database.
-   *                  Otherwise, it first attempts to find a temporary view with the given name
-   *                  and then match the table/view from the current database.
-   *                  Note that, the global temporary view database is also valid here.
-   * @since 4.0.0
-   */
-  def update(tableName: String): UpdateWriter = {
-    val tableDF = table(tableName)
-    if (tableDF.isStreaming) {
-      throw new AnalysisException(
-        errorClass = "CALL_ON_STREAMING_DATASET_UNSUPPORTED",
-        messageParameters = Map("methodName" -> toSQLId("update")))
-    }
-    new UpdateWriter(tableDF)
-  }
-
   // scalastyle:off
   // Disable style checker so "implicits" object can start with lowercase i
   /**

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UpdateWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/UpdateWriter.scala
@@ -22,71 +22,54 @@ import org.apache.spark.sql.catalyst.plans.logical.{Assignment, UpdateTable}
 import org.apache.spark.sql.functions.expr
 
 /**
- * `UpdateWriter` provides methods to define and execute an update action on a target table.
+ * This class defines methods to specify a condition an an update operation
+ * or directly executing it.
  *
- * @param tableDF DataFrame representing table to update.
- *
- * @since 4.0.0
- */
-@Experimental
-class UpdateWriter (tableDF: DataFrame) {
-
-  /**
-   * @param assignments A Map of column names to Column expressions representing the updates
-   *     to be applied.
-   */
-  def set(assignments: Map[String, Column]): UpdateWithAssignment = {
-    new UpdateWithAssignment(tableDF, assignments)
-  }
-}
-
-/**
- * A class for defining a condition on an update operation or directly executing it.
- *
- * @param tableDF DataFrame representing table to update.
- * @param assignment A Map of column names to Column expressions representing the updates
+ * @param dataset DataSet representing table to update.
+ * @param assignments A Map of column names to Column expressions representing the updates
  *     to be applied.
+ * @param T type of dataset
  *
  * @since 4.0.0
  */
 @Experimental
-class UpdateWithAssignment(tableDF: DataFrame, assignment: Map[String, Column]) {
+class UpdateWriter[T](dataset: Dataset[T], assignments: Map[String, Column]) {
 
   /**
    * Limits the update to rows matching the specified condition.
    *
    * @param condition the update condition
    * @return
    */
-  def where(condition: Column): UpdateWithCondition = {
-    new UpdateWithCondition(tableDF, assignment, Some(condition))
+  def where(condition: Column): UpdateWithCondition[T] = {
+    new UpdateWithCondition(dataset, assignments, Some(condition))
   }
 
   /**
    * Executes the update operation.
    */
   def execute(): Unit = {
-    new UpdateWithCondition(tableDF, assignment, None)
+    new UpdateWithCondition(dataset, assignments, None)
   }
 }
 
 /**
  * A class for executing an update operation.
  *
- * @param tableDF DataFrame representing table to update.
+ * @param dataset Dataset representing table to update.
  * @param assignments A Map of column names to Column expressions representing the updates
  *     to be applied.
  * @param condition the update condition
  * @since 4.0.0
  */
 @Experimental
-class UpdateWithCondition(
-    tableDF: DataFrame,
+class UpdateWithCondition[T](
+    dataset: Dataset[T],
     assignments: Map[String, Column],
     condition: Option[Column]) {
 
-  private val sparkSession = tableDF.sparkSession
-  private val logicalPlan = tableDF.queryExecution.logical
+  private val sparkSession = dataset.sparkSession
+  private val logicalPlan = dataset.queryExecution.logical
 
   /**
    * Executes the update operation.