[spark] Support data evolution for bucket table

史大洋 · 史大洋 · commit 635b51ed06dc · 2025-11-21T19:06:15.000+08:00
diff --git a/docs/content/append-table/row-tracking.md b/docs/content/append-table/row-tracking.md
@@ -47,7 +47,7 @@ CREATE TABLE part_t (
 WITH ('row-tracking.enabled' = 'true');
 ```
 Notice that:
-- Row tracking is only supported for unaware append tables, not for primary key tables. Which means you can't define `bucket` and `bucket-key` for the table.
+- Row tracking is only supported for unaware or hash_fixed bucket append tables, not for primary key tables.
 - Only spark support update, merge into and delete operations on row-tracking tables, Flink SQL does not support these operations yet.
 - This function is experimental, this line will be removed after being stable.
 
diff --git a/paimon-api/src/main/java/org/apache/paimon/schema/TableSchema.java b/paimon-api/src/main/java/org/apache/paimon/schema/TableSchema.java
@@ -127,6 +127,31 @@ public TableSchema(
         numBucket = CoreOptions.fromMap(options).bucket();
     }
 
+    private TableSchema(
+            int version,
+            long id,
+            List<DataField> fields,
+            int highestFieldId,
+            List<String> partitionKeys,
+            List<String> primaryKeys,
+            Map<String, String> options,
+            @Nullable String comment,
+            long timeMillis,
+            List<String> bucketKeys,
+            int numBucket) {
+        this.version = version;
+        this.id = id;
+        this.fields = Collections.unmodifiableList(new ArrayList<>(fields));
+        this.highestFieldId = highestFieldId;
+        this.partitionKeys = partitionKeys;
+        this.primaryKeys = primaryKeys;
+        this.options = options;
+        this.comment = comment;
+        this.timeMillis = timeMillis;
+        this.bucketKeys = bucketKeys;
+        this.numBucket = numBucket;
+    }
+
     public int version() {
         return version;
     }
@@ -294,7 +319,9 @@ public TableSchema project(@Nullable List<String> writeCols) {
                 primaryKeys,
                 options,
                 comment,
-                timeMillis);
+                timeMillis,
+                bucketKeys,
+                numBucket);
     }
 
     private List<DataField> projectedDataFields(List<String> projectedFieldNames) {
diff --git a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java
@@ -632,7 +632,7 @@ private static void validateRowTracking(TableSchema schema, CoreOptions options)
         boolean rowTrackingEnabled = options.rowTrackingEnabled();
         if (rowTrackingEnabled) {
             checkArgument(
-                    options.bucket() == -1,
+                    options.bucket() == -1 || options.bucket() > 0,
                     "Cannot define %s for row tracking table, it only support bucket = -1",
                     CoreOptions.BUCKET.key());
             checkArgument(
diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/DataEvolutionSparkTableWrite.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/DataEvolutionSparkTableWrite.scala
@@ -40,7 +40,7 @@ import scala.collection.mutable.ListBuffer
 case class DataEvolutionSparkTableWrite(
     writeBuilder: BatchWriteBuilder,
     writeType: RowType,
-    firstRowIdToPartitionMap: mutable.HashMap[Long, Tuple2[BinaryRow, Long]],
+    firstRowIdToPartitionMap: mutable.HashMap[Long, Tuple3[BinaryRow, Int, Long]],
     blobAsDescriptor: Boolean,
     catalogContext: CatalogContext)
   extends SparkTableWriteTrait {
@@ -68,7 +68,7 @@ case class DataEvolutionSparkTableWrite(
 
   def newCurrentWriter(firstRowId: Long): Unit = {
     finishCurrentWriter()
-    val (partition, numRecords) = firstRowIdToPartitionMap.getOrElse(firstRowId, null)
+    val (partition, bucket, numRecords) = firstRowIdToPartitionMap.getOrElse(firstRowId, null)
     if (partition == null) {
       throw new IllegalArgumentException(
         s"First row ID $firstRowId not found in partition map. " +
@@ -81,8 +81,8 @@ case class DataEvolutionSparkTableWrite(
       .asInstanceOf[TableWriteImpl[InternalRow]]
       .getWrite
       .asInstanceOf[AbstractFileStoreWrite[InternalRow]]
-      .createWriter(partition, 0)
-    currentWriter = PerFileWriter(partition, firstRowId, writer, numRecords)
+      .createWriter(partition, bucket)
+    currentWriter = PerFileWriter(partition, bucket, firstRowId, writer, numRecords)
   }
 
   def finishCurrentWriter(): Unit = {
@@ -122,6 +122,7 @@ case class DataEvolutionSparkTableWrite(
 
   private case class PerFileWriter(
       partition: BinaryRow,
+      bucket: Int,
       firstRowId: Long,
       recordWriter: RecordWriter[InternalRow],
       numRecords: Long) {
@@ -149,7 +150,7 @@ case class DataEvolutionSparkTableWrite(
         val dataFileMeta = dataFiles.get(0).assignFirstRowId(firstRowId)
         new CommitMessageImpl(
           partition,
-          0,
+          bucket,
           null,
           new DataIncrement(
             java.util.Arrays.asList(dataFileMeta),
diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonMergeIntoBase.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonMergeIntoBase.scala
@@ -18,15 +18,19 @@
 
 package org.apache.paimon.spark.catalyst.analysis
 
+import org.apache.paimon.CoreOptions.BUCKET_KEY
 import org.apache.paimon.spark.SparkTable
 import org.apache.paimon.spark.catalyst.analysis.expressions.ExpressionHelper
 import org.apache.paimon.spark.commands.{MergeIntoPaimonDataEvolutionTable, MergeIntoPaimonTable}
+import org.apache.paimon.utils.StringUtils
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, Expression, SubqueryExpression}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 
+import java.util
+
 import scala.collection.JavaConverters._
 
 trait PaimonMergeIntoBase
@@ -72,6 +76,13 @@ trait PaimonMergeIntoBase
           resolveNotMatchedBySourceActions(merge, targetOutput, dataEvolutionEnabled)
 
         if (dataEvolutionEnabled) {
+          val bucketKeySt = v2Table.getTable.options().get(BUCKET_KEY.key)
+          if (!StringUtils.isNullOrWhitespaceOnly(bucketKeySt)) {
+            checkUpdateActionValidityForBucketKey(
+              AttributeSet(targetOutput),
+              updateActions,
+              bucketKeySt.split(",").toSeq)
+          }
           MergeIntoPaimonDataEvolutionTable(
             v2Table,
             merge.targetTable,
@@ -142,7 +153,7 @@ trait PaimonMergeIntoBase
     lazy val isMergeConditionValid = {
       val mergeExpressions = splitConjunctivePredicates(mergeCondition)
       primaryKeys.forall {
-        primaryKey => isUpdateExpressionToPrimaryKey(targetOutput, mergeExpressions, primaryKey)
+        primaryKey => isUpdateExpressionForKey(targetOutput, mergeExpressions, primaryKey)
       }
     }
 
@@ -156,4 +167,22 @@ trait PaimonMergeIntoBase
       throw new RuntimeException("Can't update the primary key column in update clause.")
     }
   }
+
+  /** This check will avoid to update the bucket key columns */
+  private def checkUpdateActionValidityForBucketKey(
+      targetOutput: AttributeSet,
+      actions: Seq[UpdateAction],
+      bucketKeys: Seq[String]): Unit = {
+
+    // Check whether there are an update expression related to any primary key.
+    def isUpdateActionValid(action: UpdateAction): Boolean = {
+      validUpdateAssignment(targetOutput, bucketKeys, action.assignments)
+    }
+
+    val valid = actions.forall(isUpdateActionValid)
+    if (!valid) {
+      throw new RuntimeException(
+        "Can't update the bucket key column in data-evolution update clause.")
+    }
+  }
 }
diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/RowLevelHelper.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/RowLevelHelper.scala
@@ -41,23 +41,21 @@ trait RowLevelHelper extends SQLConfHelper {
 
   protected def validUpdateAssignment(
       output: AttributeSet,
-      primaryKeys: Seq[String],
+      keys: Seq[String],
       assignments: Seq[Assignment]): Boolean = {
-    !primaryKeys.exists {
-      primaryKey => isUpdateExpressionToPrimaryKey(output, assignments, primaryKey)
-    }
+    !keys.exists(key => isUpdateExpressionForKey(output, assignments, key))
   }
 
   // Check whether there is an update expression related to primary key.
-  protected def isUpdateExpressionToPrimaryKey(
+  protected def isUpdateExpressionForKey(
       output: AttributeSet,
       expressions: Seq[Expression],
-      primaryKey: String): Boolean = {
+      key: String): Boolean = {
     val resolver = conf.resolver
 
     // Check whether this attribute is same to primary key and is from target table.
-    def isTargetPrimaryKey(attr: AttributeReference): Boolean = {
-      resolver(primaryKey, attr.name) && output.contains(attr)
+    def isTargetKey(attr: AttributeReference): Boolean = {
+      resolver(key, attr.name) && output.contains(attr)
     }
 
     expressions
@@ -67,9 +65,9 @@ trait RowLevelHelper extends SQLConfHelper {
       }
       .exists {
         case EqualTo(left: AttributeReference, right: AttributeReference) =>
-          isTargetPrimaryKey(left) || isTargetPrimaryKey(right)
+          isTargetKey(left) || isTargetKey(right)
         case Assignment(key: AttributeReference, _) =>
-          isTargetPrimaryKey(key)
+          isTargetKey(key)
         case _ => false
       }
   }
diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/DataEvolutionPaimonWriter.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/DataEvolutionPaimonWriter.scala
@@ -38,22 +38,24 @@ import scala.collection.mutable
 
 case class DataEvolutionPaimonWriter(paimonTable: FileStoreTable) extends WriteHelper {
 
-  private lazy val firstRowIdToPartitionMap: mutable.HashMap[Long, Tuple2[BinaryRow, Long]] =
+  private lazy val firstRowIdToPartitionMap: mutable.HashMap[Long, Tuple3[BinaryRow, Int, Long]] =
     initPartitionMap()
   override val table: FileStoreTable = paimonTable.copy(dynamicOp)
 
   @transient private lazy val serializer = new CommitMessageSerializer
 
-  private def initPartitionMap(): mutable.HashMap[Long, Tuple2[BinaryRow, Long]] = {
-    val firstRowIdToPartitionMap = new mutable.HashMap[Long, Tuple2[BinaryRow, Long]]
+  private def initPartitionMap(): mutable.HashMap[Long, Tuple3[BinaryRow, Int, Long]] = {
+    val firstRowIdToPartitionMap = new mutable.HashMap[Long, Tuple3[BinaryRow, Int, Long]]
     table
       .store()
       .newScan()
       .readFileIterator()
       .forEachRemaining(
         k =>
           firstRowIdToPartitionMap
-            .put(k.file().firstRowId(), Tuple2.apply(k.partition(), k.file().rowCount())))
+            .put(
+              k.file().firstRowId(),
+              Tuple3.apply(k.partition(), k.bucket(), k.file().rowCount())))
     firstRowIdToPartitionMap
   }
 
diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/RowTrackingTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/RowTrackingTestBase.scala
@@ -327,6 +327,58 @@ abstract class RowTrackingTestBase extends PaimonSparkTestBase {
     }
   }
 
+  test("Data Evolution: merge into bucket table with data-evolution") {
+    withTable("s", "t") {
+      sql("CREATE TABLE s (id INT, b INT)")
+      sql("INSERT INTO s VALUES (1, 11), (2, 22)")
+
+      sql(
+        "CREATE TABLE t (id INT, b INT, c INT) TBLPROPERTIES (" +
+          "'row-tracking.enabled' = 'true', " +
+          "'data-evolution.enabled' = 'true', " +
+          "'bucket'='2', " +
+          "'bucket-key'='id')")
+      sql("INSERT INTO t SELECT /*+ REPARTITION(1) */ id, id AS b, id AS c FROM range(2, 4)")
+
+      sql("""
+            |MERGE INTO t
+            |USING s
+            |ON t.id = s.id
+            |WHEN MATCHED THEN UPDATE SET t.b = s.b
+            |WHEN NOT MATCHED THEN INSERT (id, b, c) VALUES (id, b, 11)
+            |""".stripMargin)
+      checkAnswer(sql("SELECT count(*) FROM t"), Seq(Row(3)))
+      checkAnswer(
+        sql("SELECT *, _ROW_ID, _SEQUENCE_NUMBER FROM t ORDER BY id"),
+        Seq(Row(1, 11, 11, 2, 2), Row(2, 22, 2, 0, 2), Row(3, 3, 3, 1, 2))
+      )
+    }
+  }
+
+  test("Data Evolution: merge into bucket table with data-evolution update bucket key") {
+    withTable("s", "t") {
+      sql("CREATE TABLE s (id INT, b INT)")
+      sql("INSERT INTO s VALUES (1, 11), (2, 22)")
+
+      sql(
+        "CREATE TABLE t (id INT, b INT, c INT) TBLPROPERTIES (" +
+          "'row-tracking.enabled' = 'true', " +
+          "'data-evolution.enabled' = 'true', " +
+          "'bucket'='2', " +
+          "'bucket-key'='id')")
+
+      assertThrows[RuntimeException] {
+        sql("""
+              |MERGE INTO t
+              |USING s
+              |ON t.id = s.id
+              |WHEN MATCHED THEN UPDATE SET t.id = s.id
+              |WHEN NOT MATCHED THEN INSERT (id, b, c) VALUES (id, b, 11)
+              |""".stripMargin)
+      }
+    }
+  }
+
   test("Data Evolution: update table throws exception") {
     withTable("t") {
       sql(