apache · ShreyeshArangath · Nov 29, 2025 · Nov 30, 2025 · Nov 30, 2025 · Nov 30, 2025
diff --git a/.github/workflows/iceberg.yml b/.github/workflows/iceberg.yml
@@ -0,0 +1,68 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+name: Iceberg
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - master
+      - branch-*
+  pull_request:
+    branches:
+      - master
+      - branch-*
+
+concurrency:
+  group: iceberg-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test-flink:
+    name: Test Iceberg ${{ matrix.iceberg }} javaVersion ${{ matrix.javaver }} scalaVersion ${{ matrix.scalaver }}
+    runs-on: ubuntu-24.04
+    strategy:
+      fail-fast: false
+      matrix:
+        iceberg: [ "1.9" ]
+        javaver: [ "11", "17"]
+        scalaver: [ "2.12" ]
+        module: [ "thirdparty/auron-iceberg" ]
+        sparkver: [ "spark-3.4", "spark-3.5" ]
+
+
+    steps:
+      - name: Checkout Auron
+        uses: actions/checkout@v4
+
+      - name: Setup Java and Maven cache
+        uses: actions/setup-java@v4
+        with:
+          distribution: 'adopt-hotspot'
+          java-version: ${{ matrix.javaver }}
+          cache: 'maven'
+
+      - name: Test Iceberg Module
+        run: ./build/mvn -B test -X -pl ${{ matrix.module }} -am -Pscala-${{ matrix.scalaver }} -Piceberg-${{ matrix.iceberg }} -P${{ matrix.sparkver }} -Prelease
+
+      - name: Upload reports
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ matrix.module }}-test-report
+          path: ${{ matrix.module }}/target/surefire-reports
diff --git a/spark-extension-shims-spark/src/test/scala/org.apache.auron/AuronFunctionSuite.scala b/spark-extension-shims-spark/src/test/scala/org.apache.auron/AuronFunctionSuite.scala
@@ -137,16 +137,16 @@ class AuronFunctionSuite extends AuronQueryTest with BaseAuronSQLSuite {
   }
 
   test("round function with varying scales for intPi") {
-    withTable("t2") {
-      sql("CREATE TABLE t2 (c1 INT) USING parquet")
+    withTable("t3") {
+      sql("CREATE TABLE t3 (c1 INT) USING parquet")
 
       val intPi: Int = 314159265
-      sql(s"INSERT INTO t2 VALUES($intPi)")
+      sql(s"INSERT INTO t3 VALUES($intPi)")
 
       val scales = -6 to 6
 
       scales.foreach { scale =>
-        checkSparkAnswerAndOperator(s"SELECT round(c1, $scale) AS xx FROM t2")
+        checkSparkAnswerAndOperator(s"SELECT round(c1, $scale) AS xx FROM t3")
       }
     }
   }

diff --git a/spark-extension-shims-spark/src/test/scala/org.apache.auron/BaseAuronSQLSuite.scala b/spark-extension-shims-spark/src/test/scala/org.apache.auron/BaseAuronSQLSuite.scala
@@ -16,11 +16,18 @@
  */
 package org.apache.auron
 
+import java.io.IOException
+import java.nio.file.{Files, FileVisitResult, Path, SimpleFileVisitor}
+import java.nio.file.attribute.BasicFileAttributes
+
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.test.SharedSparkSession
 
 trait BaseAuronSQLSuite extends SharedSparkSession {
 
+  private lazy val suiteWarehouseDir: Path =
+    Files.createTempDirectory("auron-spark-warehouse-")
+
   override protected def sparkConf: SparkConf = {
     super.sparkConf
       .set("spark.sql.extensions", "org.apache.spark.sql.auron.AuronSparkSessionExtension")
@@ -29,6 +36,38 @@ trait BaseAuronSQLSuite extends SharedSparkSession {
         "org.apache.spark.sql.execution.auron.shuffle.AuronShuffleManager")
       .set("spark.memory.offHeap.enabled", "false")
       .set("spark.auron.enable", "true")
+      .set("spark.sql.warehouse.dir", suiteWarehouseDir.toFile.getCanonicalPath)
+  }
+
+  override def afterAll(): Unit = {
+    try {
+      super.afterAll()
+    } finally {
+      // Best-effort cleanup of the per-suite warehouse dir
+      try deleteRecursively(suiteWarehouseDir)
+      catch {
+        case _: Throwable => // ignore
+      }
+    }
+  }
+
+  private def deleteRecursively(root: Path): Unit = {
+    if (root == null) return
+    if (!Files.exists(root)) return
+    Files.walkFileTree(
+      root,
+      new SimpleFileVisitor[Path]() {
+        @throws[IOException]
+        override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
+          Files.deleteIfExists(file)
+          FileVisitResult.CONTINUE
+        }
+        @throws[IOException]
+        override def postVisitDirectory(dir: Path, exc: IOException): FileVisitResult = {
+          Files.deleteIfExists(dir)
+          FileVisitResult.CONTINUE
+        }
+      })
   }
 
 }
diff --git a/...n-shims-spark/src/test/scala/org/apache/spark/sql/execution/BuildInfoInSparkUISuite.scala b/...n-shims-spark/src/test/scala/org/apache/spark/sql/execution/BuildInfoInSparkUISuite.scala
@@ -33,13 +33,17 @@ class BuildInfoInSparkUISuite extends AuronQueryTest with BaseAuronSQLSuite {
     super.sparkConf.set("spark.eventLog.dir", testDir.toString)
   }
 
-  override protected def beforeAll(): Unit = {
+  override def beforeAll(): Unit = {
     testDir = Utils.createTempDir(namePrefix = "spark-events")
     super.beforeAll()
   }
 
-  override protected def afterAll(): Unit = {
-    Utils.deleteRecursively(testDir)
+  override def afterAll(): Unit = {
+    try {
+      super.afterAll()
+    } finally {
+      Utils.deleteRecursively(testDir)
+    }
   }
 
   test("test build info in spark UI ") {

diff --git a/thirdparty/auron-iceberg/pom.xml b/thirdparty/auron-iceberg/pom.xml
@@ -31,6 +31,11 @@
   <description>Apache Auron Iceberg ${icebergVersion} ${scalaVersion}</description>
 
   <dependencies>
+    <dependency>
+      <groupId>org.apache.auron</groupId>
+      <artifactId>spark-extension-shims-spark_2.12</artifactId>
+      <version>${project.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.apache.iceberg</groupId>
       <artifactId>iceberg-spark-runtime-${shortSparkVersion}_${scalaVersion}</artifactId>

diff --git a/...berg/src/main/resources/META-INF.services/org.apache.spark.sql.auron.AuronConvertProvider b/...berg/src/main/resources/META-INF.services/org.apache.spark.sql.auron.AuronConvertProvider
@@ -0,0 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+org.apache.spark.sql.auron.iceberg.IcebergConvertProvider
diff --git a/...on-iceberg/src/main/scala/org/apache/iceberg/spark/source/IcebergPartitionConverter.scala b/...on-iceberg/src/main/scala/org/apache/iceberg/spark/source/IcebergPartitionConverter.scala
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.iceberg.spark.source
+
+import java.nio.ByteBuffer
+
+import org.apache.iceberg.{FileScanTask, Table}
+import org.apache.iceberg.spark.SparkSchemaUtil
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.types.Decimal
+import org.apache.spark.unsafe.types.UTF8String
+
+// Converts Iceberg partition data to Spark InternalRow.
+class IcebergPartitionConverter(table: Table) {
+
+  private case class FieldAccessor(javaClass: Class[_], convert: Any => Any)
+
+  private val tableSparkPartitionSchema: StructType =
+    SparkSchemaUtil.convert(table.spec().partitionType().asSchema())
+
+  require(
+    table.spec().partitionType().fields().size() == tableSparkPartitionSchema.fields.length,
+    s"Mismatch between Iceberg partition fields (${table.spec().partitionType().fields().size()}) " +
+      s"and Spark partition schema (${tableSparkPartitionSchema.fields.length})")
+
+  private def javaClassFor(dt: DataType): Class[_] = dt match {
+    case BooleanType => classOf[java.lang.Boolean]
+    case IntegerType | DateType => classOf[java.lang.Integer]
+    case LongType | TimestampType => classOf[java.lang.Long]
+    case dt if dt.typeName == "timestamp_ntz" => classOf[java.lang.Long]
+    case dt if dt.typeName == "time" => classOf[java.lang.Long]
+    case FloatType => classOf[java.lang.Float]
+    case DoubleType => classOf[java.lang.Double]
+    case StringType => classOf[CharSequence]
+    case BinaryType => classOf[java.nio.ByteBuffer]
+    case _: DecimalType => classOf[java.math.BigDecimal]
+    case other =>
+      throw new UnsupportedOperationException(
+        s"Unsupported Spark partition type from partitionType.asSchema(): $other")
+  }
+
+  private def converterFor(dt: DataType): Any => Any = dt match {
+    case StringType =>
+      (raw: Any) =>
+        if (raw == null) null
+        else
+          raw match {
+            case cs: CharSequence => UTF8String.fromString(cs.toString)
+            case other => UTF8String.fromString(other.toString)
+          }
+
+    case IntegerType | BooleanType | LongType | FloatType | DoubleType =>
+      (raw: Any) => raw
+
+    case DateType =>
+      (raw: Any) =>
+        if (raw == null) null
+        else raw.asInstanceOf[Integer].intValue()
+
+    case TimestampType =>
+      (raw: Any) =>
+        if (raw == null) null
+        else raw.asInstanceOf[Long]
+
+    case dt if dt.typeName == "timestamp_ntz" =>
+      (raw: Any) =>
+        if (raw == null) null
+        else raw.asInstanceOf[Long]
+
+    case dt if dt.typeName == "time" =>
+      (raw: Any) =>
+        if (raw == null) null
+        else raw.asInstanceOf[Long]
+
+    case BinaryType =>
+      (raw: Any) =>
+        if (raw == null) null
+        else
+          raw match {
+            case bb: ByteBuffer =>
+              val dup = bb.duplicate()
+              val arr = new Array[Byte](dup.remaining())
+              dup.get(arr)
+              arr
+            case arr: Array[Byte] => arr
+            case other =>
+              throw new IllegalArgumentException(
+                s"Unexpected binary partition value type: ${other.getClass}")
+          }
+
+    case d: DecimalType =>
+      (raw: Any) =>
+        if (raw == null) null
+        else {
+          val bd: java.math.BigDecimal = raw match {
+            case bd: java.math.BigDecimal => bd
+            case s: String => new java.math.BigDecimal(s)
+            case other => new java.math.BigDecimal(other.toString)
+          }
+          val normalized = bd.setScale(d.scale, java.math.RoundingMode.UNNECESSARY)
+          Decimal(normalized, d.precision, d.scale)
+        }
+
+    case other =>
+      (_: Any) =>
+        throw new UnsupportedOperationException(
+          s"Unsupported Spark partition type in converter from partitionType.asSchema(): $other")
+  }
+
+  private def buildFieldAccessors(sparkSchema: StructType): Array[FieldAccessor] = {
+    val sFields = sparkSchema.fields
+    sFields.map { field =>
+      val dt = field.dataType
+      FieldAccessor(javaClass = javaClassFor(dt), convert = converterFor(dt))
+    }
+  }
+
+  private val specCache = scala.collection.mutable
+    .AnyRefMap[org.apache.iceberg.PartitionSpec, (StructType, Array[FieldAccessor])]()
+
+  private def accessorsFor(
+      spec: org.apache.iceberg.PartitionSpec): (StructType, Array[FieldAccessor]) = {
+    specCache.getOrElseUpdate(
+      spec, {
+        val pt = spec.partitionType()
+        val sps = SparkSchemaUtil.convert(pt.asSchema())
+        require(
+          pt.fields().size() == sps.fields.length,
+          s"Mismatch between Iceberg partition fields (${pt.fields().size()}) and Spark partition schema (${sps.fields.length})")
+        (sps, buildFieldAccessors(sps))
+      })
+  }
+
+  def convert(task: FileScanTask): InternalRow = {
+    val (sparkSchema, fieldAccessors) = accessorsFor(task.spec())
+    val partitionData = task.file().partition()
+    if (partitionData == null || fieldAccessors.isEmpty) {
+      InternalRow.empty
+    } else {
+      val values = fieldAccessors.indices.map { i =>
+        val accessor = fieldAccessors(i)
+        val jcls = accessor.javaClass.asInstanceOf[Class[Any]]
+        val raw = partitionData.get(i, jcls)
+        accessor.convert(raw)
+      }
+      InternalRow.fromSeq(values)
+    }
+  }
+
+  def schema: StructType = tableSparkPartitionSchema
+}