apache · kevinjqliu · Dec 2, 2025 · Dec 2, 2025 · Dec 2, 2025 · nastra
diff --git a/.baseline/scala/.scala212fmt.conf b/.baseline/scala/.scala212fmt.conf
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+version = 3.9.7
+
+align = none
+align.openParenDefnSite = false
+align.openParenCallSite = false
+align.tokens = []
+importSelectors = "singleLine"
+optIn = {
+  configStyleArguments = false
+}
+danglingParentheses.preset = false
+docstrings.style = Asterisk
+docstrings.wrap = false
+maxColumn = 100
+runner.dialect = scala212
diff --git a/.baseline/scala/.scala213fmt.conf b/.baseline/scala/.scala213fmt.conf
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+version = 3.9.7
+
+align = none
+align.openParenDefnSite = false
+align.openParenCallSite = false
+align.tokens = []
+importSelectors = "singleLine"
+optIn = {
+  configStyleArguments = false
+}
+danglingParentheses.preset = false
+docstrings.style = Asterisk
+docstrings.wrap = false
+maxColumn = 100
+runner.dialect = scala213
diff --git a/baseline.gradle b/baseline.gradle
@@ -62,6 +62,21 @@ subprojects {
         removeUnusedImports()
         licenseHeaderFile "$rootDir/.baseline/copyright/copyright-header-java.txt"
       }
+
+      // Configure different scalafmt rules for specific Scala version
+      if (project.name.startsWith("iceberg-spark") && project.name.endsWith("2.13")) {
+        scala {
+          target 'src/**/*.scala'
+          scalafmt("3.9.7").configFile("$rootDir/.baseline/scala/.scala213fmt.conf")
+          licenseHeaderFile "$rootDir/.baseline/copyright/copyright-header-java.txt", "package"
+        }
+      } else if (project.name.startsWith("iceberg-spark") && project.name.endsWith("2.12")) {
+        scala {
+          target 'src/**/*.scala'
+          scalafmt("3.9.7").configFile("$rootDir/.baseline/scala/.scala212fmt.conf")
+          licenseHeaderFile "$rootDir/.baseline/copyright/copyright-header-java.txt", "package"
+        }
+      }
-      // Configure different scalafmt rules for specific Scala version
-      if (project.name.startsWith("iceberg-spark") && project.name.endsWith("2.13")) {
-        scala {
-          target 'src/main/scala/**/*.scala', 'src/test/scala/**/*.scala', 'src/testFixtures/scala/**/*.scala'
-          scalafmt("3.9.7").configFile("$rootDir/.baseline/scala/.scala213fmt.conf")
-          licenseHeaderFile "$rootDir/.baseline/copyright/copyright-header-java.txt", "package"
-        }
-      } else if (project.name.startsWith("iceberg-spark") && project.name.endsWith("2.12")) {
-        scala {
-          target 'src/main/scala/**/*.scala', 'src/test/scala/**/*.scala', 'src/testFixtures/scala/**/*.scala'
-          scalafmt("3.9.7").configFile("$rootDir/.baseline/scala/.scala212fmt.conf")
-          licenseHeaderFile "$rootDir/.baseline/copyright/copyright-header-java.txt", "package"
-        }
-      }
+      // Configure different scalafmt rules for specific Scala version
+      if (project.name.startsWith("iceberg-spark") && project.name.endsWith("2.13")) {
+        scala {
+          target 'src/**/*.scala'
+          scalafmt("3.9.7").configFile("$rootDir/.baseline/scala/.scala213fmt.conf")
+          licenseHeaderFile "$rootDir/.baseline/copyright/copyright-header-java.txt", "package"
+        }
+      } else if (project.name.startsWith("iceberg-spark") && project.name.endsWith("2.12")) {
+        scala {
+          target 'src/**/*.scala'
+          scalafmt("3.9.7").configFile("$rootDir/.baseline/scala/.scala212fmt.conf")
+          licenseHeaderFile "$rootDir/.baseline/copyright/copyright-header-java.txt", "package"
+        }
+      }
-      // Configure different scalafmt rules for specific Scala version
-      if (project.name.startsWith("iceberg-spark") && project.name.endsWith("2.13")) {
-        scala {
-          target 'src/main/scala/**/*.scala', 'src/test/scala/**/*.scala', 'src/testFixtures/scala/**/*.scala'
-          scalafmt("3.9.7").configFile("$rootDir/.baseline/scala/.scala213fmt.conf")
-          licenseHeaderFile "$rootDir/.baseline/copyright/copyright-header-java.txt", "package"
-        }
-      } else if (project.name.startsWith("iceberg-spark") && project.name.endsWith("2.12")) {
-        scala {
-          target 'src/main/scala/**/*.scala', 'src/test/scala/**/*.scala', 'src/testFixtures/scala/**/*.scala'
-          scalafmt("3.9.7").configFile("$rootDir/.baseline/scala/.scala212fmt.conf")
-          licenseHeaderFile "$rootDir/.baseline/copyright/copyright-header-java.txt", "package"
-        }
-      }
+      // Configure different scalafmt rules for specific Scala version
+      if (project.name.startsWith("iceberg-spark") && project.name.endsWith("2.13")) {
+        scala {
+          target 'src/**/*.scala'
+          scalafmt("3.9.7").configFile("$rootDir/.baseline/scala/.scala213fmt.conf")
+          licenseHeaderFile "$rootDir/.baseline/copyright/copyright-header-java.txt", "package"
+        }
+      } else if (project.name.startsWith("iceberg-spark") && project.name.endsWith("2.12")) {
+        scala {
+          target 'src/**/*.scala'
+          scalafmt("3.9.7").configFile("$rootDir/.baseline/scala/.scala212fmt.conf")
+          licenseHeaderFile "$rootDir/.baseline/copyright/copyright-header-java.txt", "package"
+        }
+      }
     }
   }
 

diff --git a/site/docs/contribute.md b/site/docs/contribute.md
@@ -125,6 +125,8 @@ This project Iceberg also has modules for adding Iceberg support to processing e
 
 Follow the instructions for [Eclipse](https://github.com/google/google-java-format#eclipse) or
 [IntelliJ](https://github.com/google/google-java-format#intellij-android-studio-and-other-jetbrains-ides) to install the **google-java-format** plugin (note the required manual actions for IntelliJ).
+Follow the [instructions](https://scalameta.org/scalafmt/docs/installation.html) to install **scalafmt** plugin
+and configure it to point to the configuration file located under the directory `.baseline/scala/`.
 
 ## Semantic Versioning
 

diff --git a/...ns/src/main/scala/org/apache/iceberg/spark/extensions/IcebergSparkSessionExtensions.scala b/...ns/src/main/scala/org/apache/iceberg/spark/extensions/IcebergSparkSessionExtensions.scala
@@ -16,7 +16,6 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-
 package org.apache.iceberg.spark.extensions
 
 import org.apache.spark.sql.SparkSessionExtensions
@@ -57,8 +56,8 @@ class IcebergSparkSessionExtensions extends (SparkSessionExtensions => Unit) {
     extensions.injectResolutionRule { _ => CheckMergeIntoTableConditions }
     extensions.injectResolutionRule { _ => ProcedureArgumentCoercion }
     extensions.injectResolutionRule { _ => AlignRowLevelCommandAssignments }
-    extensions.injectResolutionRule { _ => RewriteUpdateTableForRowLineage}
-    extensions.injectResolutionRule { _ => RewriteMergeIntoTableForRowLineage}
+    extensions.injectResolutionRule { _ => RewriteUpdateTableForRowLineage }
+    extensions.injectResolutionRule { _ => RewriteMergeIntoTableForRowLineage }
     extensions.injectResolutionRule { _ => RewriteUpdateTable }
     extensions.injectResolutionRule { _ => RewriteMergeIntoTable }
     extensions.injectCheckRule { _ => CheckViews }
@@ -69,7 +68,7 @@ class IcebergSparkSessionExtensions extends (SparkSessionExtensions => Unit) {
     extensions.injectOptimizerRule { _ => ExtendedSimplifyConditionalsInPredicate }
     extensions.injectOptimizerRule { _ => ExtendedReplaceNullWithFalseInPredicate }
     extensions.injectOptimizerRule { _ => ReplaceStaticInvoke }
-    extensions.injectOptimizerRule { _ => RemoveRowLineageOutputFromOriginalTable}
+    extensions.injectOptimizerRule { _ => RemoveRowLineageOutputFromOriginalTable }
     // pre-CBO rules run only once and the order of the rules is important
     // - dynamic filters should be added before replacing commands with rewrite plans
     // - scans must be planned before building writes

diff --git a/...c/main/scala/org/apache/spark/sql/catalyst/analysis/AlignRowLevelCommandAssignments.scala b/...c/main/scala/org/apache/spark/sql/catalyst/analysis/AlignRowLevelCommandAssignments.scala
@@ -16,7 +16,6 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException
@@ -35,8 +34,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
  *
  * Note that this rule must be run before rewriting row-level commands.
  */
-object AlignRowLevelCommandAssignments
-  extends Rule[LogicalPlan] with AssignmentAlignmentSupport {
+object AlignRowLevelCommandAssignments extends Rule[LogicalPlan] with AssignmentAlignmentSupport {
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case u: UpdateIcebergTable if u.resolved && !u.aligned =>
@@ -60,7 +58,7 @@ object AlignRowLevelCommandAssignments
             if (ref.size > 1) {
               throw new AnalysisException(
                 "Nested fields are not supported inside INSERT clauses of MERGE operations: " +
-                s"${ref.mkString("`", "`.`", "`")}")
+                  s"${ref.mkString("`", "`.`", "`")}")
             }
           }
 
@@ -101,8 +99,8 @@ object AlignRowLevelCommandAssignments
       if (assignment.isEmpty) {
         throw new AnalysisException(
           s"Cannot find column '${targetAttr.name}' of the target table among " +
-          s"the INSERT columns: ${assignmentMap.keys.mkString(", ")}. " +
-          "INSERT clauses must provide values for all columns of the target table.")
+            s"the INSERT columns: ${assignmentMap.keys.mkString(", ")}. " +
+            "INSERT clauses must provide values for all columns of the target table.")
       }
 
       val key = assignment.get.key

diff --git a/...ain/scala/org/apache/spark/sql/catalyst/analysis/AlignedRowLevelIcebergCommandCheck.scala b/...ain/scala/org/apache/spark/sql/catalyst/analysis/AlignedRowLevelIcebergCommandCheck.scala
@@ -16,7 +16,6 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException

diff --git a/...ns/src/main/scala/org/apache/spark/sql/catalyst/analysis/AssignmentAlignmentSupport.scala b/...ns/src/main/scala/org/apache/spark/sql/catalyst/analysis/AssignmentAlignmentSupport.scala
@@ -16,7 +16,6 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException
@@ -69,8 +68,8 @@ trait AssignmentAlignmentSupport extends CastSupport {
 
     val columnUpdates = assignments.map(a => ColumnUpdate(toAssignmentRef(a.key), a.value))
     val outputExprs = applyUpdates(table.output, columnUpdates)
-    outputExprs.zip(table.output).map {
-      case (expr, attr) => handleCharVarcharLimits(Assignment(attr, expr))
+    outputExprs.zip(table.output).map { case (expr, attr) =>
+      handleCharVarcharLimits(Assignment(attr, expr))
     }
   }
 
@@ -117,8 +116,7 @@ trait AssignmentAlignmentSupport extends CastSupport {
               val colName = (namePrefix :+ col.name).mkString(".")
               throw new AnalysisException(
                 "Updating nested fields is only supported for StructType " +
-                s"but $colName is of type $otherType"
-              )
+                  s"but $colName is of type $otherType")
           }
 
         // if there are conflicting updates, throw an exception
@@ -129,7 +127,7 @@ trait AssignmentAlignmentSupport extends CastSupport {
           val conflictingCols = updates.map(u => (namePrefix ++ u.ref).mkString("."))
           throw new AnalysisException(
             "Updates are in conflict for these columns: " +
-            conflictingCols.distinct.mkString(", "))
+              conflictingCols.distinct.mkString(", "))
       }
     }
   }
@@ -180,8 +178,13 @@ trait AssignmentAlignmentSupport extends CastSupport {
         // e.g. a struct with fields (a, b) is assigned as a struct with fields (a, c) or (b, a)
         val errors = new mutable.ArrayBuffer[String]()
         val canWrite = DataType.canWrite(
-          expr.dataType, tableAttr.dataType, byName = true, resolver, tableAttr.name,
-          storeAssignmentPolicy, err => errors += err)
+          expr.dataType,
+          tableAttr.dataType,
+          byName = true,
+          resolver,
+          tableAttr.name,
+          storeAssignmentPolicy,
+          err => errors += err)
 
         if (!canWrite) {
           throw new AnalysisException(
@@ -195,7 +198,8 @@ trait AssignmentAlignmentSupport extends CastSupport {
       case _ if tableAttr.dataType.sameType(expr.dataType) =>
         expr
       case StoreAssignmentPolicy.ANSI =>
-        val cast = Cast(expr, tableAttr.dataType, Option(conf.sessionLocalTimeZone), ansiEnabled = true)
+        val cast =
+          Cast(expr, tableAttr.dataType, Option(conf.sessionLocalTimeZone), ansiEnabled = true)
         cast.setTagValue(Cast.BY_TABLE_INSERTION, ())
         TableOutputResolver.checkCastOverflowInTableInsert(cast, colPath.quoted)
       case _ =>

diff --git a/...src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckMergeIntoTableConditions.scala b/...src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckMergeIntoTableConditions.scala
@@ -16,7 +16,6 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException
@@ -57,18 +56,20 @@ object CheckMergeIntoTableConditions extends Rule[LogicalPlan] {
     if (!cond.deterministic) {
       throw new AnalysisException(
         s"Non-deterministic functions are not supported in $condName conditions of " +
-        s"MERGE operations: ${cond.sql}")
+          s"MERGE operations: ${cond.sql}")
     }
 
     if (SubqueryExpression.hasSubquery(cond)) {
       throw new AnalysisException(
         s"Subqueries are not supported in conditions of MERGE operations. " +
-        s"Found a subquery in the $condName condition: ${cond.sql}")
+          s"Found a subquery in the $condName condition: ${cond.sql}")
     }
 
     if (cond.find(_.isInstanceOf[AggregateExpression]).isDefined) {
       throw new AnalysisException(
-        s"Agg functions are not supported in $condName conditions of MERGE operations: " + {cond.sql})
+        s"Agg functions are not supported in $condName conditions of MERGE operations: " + {
+          cond.sql
+        })
     }
   }
 }
diff --git a/...4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckViews.scala b/...4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckViews.scala
@@ -16,7 +16,6 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException
@@ -38,44 +37,71 @@ object CheckViews extends (LogicalPlan => Unit) {
 
   override def apply(plan: LogicalPlan): Unit = {
     plan foreach {
-      case CreateIcebergView(resolvedIdent@ResolvedIdentifier(_: ViewCatalog, _), _, query, columnAliases, _,
-      _, _, _, _, replace, _) =>
+      case CreateIcebergView(
+            resolvedIdent @ ResolvedIdentifier(_: ViewCatalog, _),
+            _,
+            query,
+            columnAliases,
+            _,
+            _,
+            _,
+            _,
+            _,
+            replace,
+            _) =>
         verifyColumnCount(resolvedIdent, columnAliases, query)
-        SchemaUtils.checkColumnNameDuplication(query.schema.fieldNames.toIndexedSeq, SQLConf.get.resolver)
+        SchemaUtils.checkColumnNameDuplication(
+          query.schema.fieldNames.toIndexedSeq,
+          SQLConf.get.resolver)
         if (replace) {
-          val viewIdent: Seq[String] = resolvedIdent.catalog.name() +: resolvedIdent.identifier.asMultipartIdentifier
+          val viewIdent: Seq[String] =
+            resolvedIdent.catalog.name() +: resolvedIdent.identifier.asMultipartIdentifier
           checkCyclicViewReference(viewIdent, query, Seq(viewIdent))
         }
 
       case AlterViewAs(ResolvedV2View(_, _), _, _) =>
-        throw new AnalysisException("ALTER VIEW <viewName> AS is not supported. Use CREATE OR REPLACE VIEW instead")
+        throw new AnalysisException(
+          "ALTER VIEW <viewName> AS is not supported. Use CREATE OR REPLACE VIEW instead")
 
       case _ => // OK
     }
   }
 
-  private def verifyColumnCount(ident: ResolvedIdentifier, columns: Seq[String], query: LogicalPlan): Unit = {
+  private def verifyColumnCount(
+      ident: ResolvedIdentifier,
+      columns: Seq[String],
+      query: LogicalPlan): Unit = {
     if (columns.nonEmpty) {
       if (columns.length > query.output.length) {
-        throw new AnalysisException(String.format("Cannot create view %s.%s, the reason is not enough data columns:\n" +
-          "View columns: %s\n" +
-          "Data columns: %s", ident.catalog.name(), ident.identifier, columns.mkString(", "),
-          query.output.map(c => c.name).mkString(", ")))
+        throw new AnalysisException(
+          String.format(
+            "Cannot create view %s.%s, the reason is not enough data columns:\n" +
+              "View columns: %s\n" +
+              "Data columns: %s",
+            ident.catalog.name(),
+            ident.identifier,
+            columns.mkString(", "),
+            query.output.map(c => c.name).mkString(", ")))
       } else if (columns.length < query.output.length) {
-        throw new AnalysisException(String.format("Cannot create view %s.%s, the reason is too many data columns:\n" +
-          "View columns: %s\n" +
-          "Data columns: %s", ident.catalog.name(), ident.identifier, columns.mkString(", "),
-          query.output.map(c => c.name).mkString(", ")))
+        throw new AnalysisException(
+          String.format(
+            "Cannot create view %s.%s, the reason is too many data columns:\n" +
+              "View columns: %s\n" +
+              "Data columns: %s",
+            ident.catalog.name(),
+            ident.identifier,
+            columns.mkString(", "),
+            query.output.map(c => c.name).mkString(", ")))
       }
     }
   }
 
   private def checkCyclicViewReference(
-    viewIdent: Seq[String],
-    plan: LogicalPlan,
-    cyclePath: Seq[Seq[String]]): Unit = {
+      viewIdent: Seq[String],
+      plan: LogicalPlan,
+      cyclePath: Seq[Seq[String]]): Unit = {
     plan match {
-      case sub@SubqueryAlias(_, Project(_, _)) =>
+      case sub @ SubqueryAlias(_, Project(_, _)) =>
         val currentViewIdent: Seq[String] = sub.identifier.qualifier :+ sub.identifier.name
         checkIfRecursiveView(viewIdent, currentViewIdent, cyclePath, sub.children)
       case v1View: View =>
@@ -94,15 +120,17 @@ object CheckViews extends (LogicalPlan => Unit) {
   }
 
   private def checkIfRecursiveView(
-    viewIdent: Seq[String],
-    currentViewIdent: Seq[String],
-    cyclePath: Seq[Seq[String]],
-    children: Seq[LogicalPlan]
-  ): Unit = {
+      viewIdent: Seq[String],
+      currentViewIdent: Seq[String],
+      cyclePath: Seq[Seq[String]],
+      children: Seq[LogicalPlan]): Unit = {
     val newCyclePath = cyclePath :+ currentViewIdent
     if (currentViewIdent == viewIdent) {
-      throw new AnalysisException(String.format("Recursive cycle in view detected: %s (cycle: %s)",
-        viewIdent.asIdentifier, newCyclePath.map(p => p.mkString(".")).mkString(" -> ")))
+      throw new AnalysisException(
+        String.format(
+          "Recursive cycle in view detected: %s (cycle: %s)",
+          viewIdent.asIdentifier,
+          newCyclePath.map(p => p.mkString(".")).mkString(" -> ")))
     } else {
       children.foreach { c =>
         checkCyclicViewReference(viewIdent, c, newCyclePath)

diff --git a/...n/scala/org/apache/spark/sql/catalyst/analysis/MergeIntoIcebergTableResolutionCheck.scala b/...n/scala/org/apache/spark/sql/catalyst/analysis/MergeIntoIcebergTableResolutionCheck.scala
@@ -16,7 +16,6 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException