delta-io
diff --git a/‎spark/src/main/scala/org/apache/spark/sql/delta/Checkpoints.scala
Lines changed: 61 additions & 24 deletions b/‎spark/src/main/scala/org/apache/spark/sql/delta/Checkpoints.scala
Lines changed: 61 additions & 24 deletions
diff --git a/‎spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaLogGroupingIterator.scala
Lines changed: 77 additions & 0 deletions b/‎spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaLogGroupingIterator.scala
Lines changed: 77 additions & 0 deletions
@@ -28,8 +28,7 @@ import org.apache.spark.sql.delta.actions.{Action, CheckpointMetadata, Metadata,
 import org.apache.spark.sql.delta.metering.DeltaLogging
 import org.apache.spark.sql.delta.sources.DeltaSQLConf
 import org.apache.spark.sql.delta.storage.LogStore
-import org.apache.spark.sql.delta.util.DeltaFileOperations
-import org.apache.spark.sql.delta.util.FileNames
+import org.apache.spark.sql.delta.util.{DeltaFileOperations, DeltaLogGroupingIterator, FileNames}
 import org.apache.spark.sql.delta.util.FileNames._
 import org.apache.spark.sql.delta.util.JsonUtils
 import org.apache.hadoop.conf.Configuration
@@ -417,7 +416,7 @@ trait Checkpoints extends DeltaLogging {
    * Note that the returned checkpoint will always be < `version`.
    * @param version The checkpoint version to compare against
    */
-  protected def findLastCompleteCheckpointBefore(version: Long): Option[CheckpointInstance] = {
+  private[delta] def findLastCompleteCheckpointBefore(version: Long): Option[CheckpointInstance] = {
     val upperBound = CheckpointInstance(version, CheckpointInstance.Format.SINGLE, numParts = None)
     findLastCompleteCheckpointBefore(Some(upperBound))
   }
@@ -428,38 +427,76 @@ trait Checkpoints extends DeltaLogging {
    * deltalog directory.
    * @param checkpointInstance The checkpoint instance to compare against
    */
-  protected def findLastCompleteCheckpointBefore(
+  private[delta] def findLastCompleteCheckpointBefore(
       checkpointInstance: Option[CheckpointInstance] = None): Option[CheckpointInstance] = {
-    val (upperBoundCv, startVersion) = checkpointInstance
-      .collect { case cv if cv.version >= 0 => (cv, cv.version) }
-      .getOrElse((CheckpointInstance.sentinelValue(versionOpt = None), 0L))
-    var cur = startVersion
-    val hadoopConf = newDeltaHadoopConf()
-
-    logInfo(s"Try to find Delta last complete checkpoint before version $startVersion")
-    while (cur >= 0) {
-      val checkpoints = store.listFrom(
-            listingPrefix(logPath, math.max(0, cur - 1000)),
-            hadoopConf)
-          // Checkpoint files of 0 size are invalid but Spark will ignore them silently when reading
-          // such files, hence we drop them so that we never pick up such checkpoints.
-          .filter { file => isCheckpointFile(file) && file.getLen != 0 }
-          .map{ file => CheckpointInstance(file.getPath) }
-          .takeWhile(tv => (cur == 0 || tv.version <= cur) && tv < upperBoundCv)
-          .toArray
+    val upperBoundCv = checkpointInstance.filterNot(_.version < 0).getOrElse {
+        logInfo(s"Try to find Delta last complete checkpoint")
+        return findLastCompleteCheckpoint()
+      }
+    logInfo(s"Try to find Delta last complete checkpoint before version ${upperBoundCv.version}")
+    var listingEndVersion = upperBoundCv.version
+
+    // Do a backward listing from the upperBoundCv version. We list in chunks of 1000 versions.
+    // ...........................................................................................
+    //                                                                        |
+    //                                                               upper bound cv's version
+    //                                          [ iter-1 looks in this window ]
+    //                          [ iter-2 window ]
+    //         [ iter-3 window  ]
+    //              |
+    //        latest checkpoint
+    while (listingEndVersion >= 0) {
+      val listingStartVersion = math.max(0, listingEndVersion - 1000)
+      val checkpoints = store
+        .listFrom(listingPrefix(logPath, listingStartVersion), newDeltaHadoopConf())
+        .collect {
+          // Also collect delta files from the listing result so that the next takeWhile helps us
+          // terminate iterator early if no checkpoint exists upto the `listingEndVersion`
+          // version.
+          case DeltaFile(file, version) => (file, FileType.DELTA, version)
+          case CheckpointFile(file, version) => (file, FileType.CHECKPOINT, version)
+        }
+        .takeWhile { case (_, _, currentFileVersion) => currentFileVersion <= listingEndVersion }
+        // Checkpoint files of 0 size are invalid but Spark will ignore them silently when
+        // reading such files, hence we drop them so that we never pick up such checkpoints.
+        .collect { case (file, FileType.CHECKPOINT, _) if file.getLen > 0 =>
+          CheckpointInstance(file.getPath)
+        }
+        // We still need to filter on `upperBoundCv` to eliminate checkpoint files which are
+        // same version as `upperBoundCv` but have higher [[CheckpointInstance.Format]]. e.g.
+        // upperBoundCv is a V2_Checkpoint and we have a Single part checkpoint and a v2
+        // checkpoint at the same version. In such a scenario, we should not consider the
+        // v2 checkpoint as it is nor lower than the upperBoundCv.
+        .filter(_ < upperBoundCv)
+        .toArray
       val lastCheckpoint =
         getLatestCompleteCheckpointFromList(checkpoints, Some(upperBoundCv.version))
       if (lastCheckpoint.isDefined) {
         logInfo(s"Delta checkpoint is found at version ${lastCheckpoint.get.version}")
         return lastCheckpoint
-      } else {
-        cur -= 1000
       }
+      listingEndVersion = listingEndVersion - 1000
     }
-    logInfo(s"No checkpoint found for Delta table before version $startVersion")
+    logInfo(s"No checkpoint found for Delta table before version ${upperBoundCv.version}")
     None
   }
 
+  /** Returns the last complete checkpoint in the delta log directory (if any) */
+  private def findLastCompleteCheckpoint(): Option[CheckpointInstance] = {
+    val hadoopConf = newDeltaHadoopConf()
+    val listingResult = store
+      .listFrom(listingPrefix(logPath, 0L), hadoopConf)
+      // Checkpoint files of 0 size are invalid but Spark will ignore them silently when
+      // reading such files, hence we drop them so that we never pick up such checkpoints.
+      .collect { case CheckpointFile(file, _) if file.getLen != 0 => file }
+    new DeltaLogGroupingIterator(listingResult)
+      .flatMap { case (_, files) =>
+        getLatestCompleteCheckpointFromList(files.map(f => CheckpointInstance(f.getPath)).toArray)
+      }.foldLeft(Option.empty[CheckpointInstance])((_, right) => Some(right))
+    // ^The foldLeft here emulates the non-existing Iterator.tailOption method.
+
+  }
+
   /**
    * Given a list of checkpoint files, pick the latest complete checkpoint instance which is not
    * later than `notLaterThan`.
 
@@ -0,0 +1,77 @@
+/*
+ * Copyright (2021) The Delta Lake Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.delta.util
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.sql.delta.util.FileNames.{CheckpointFile, DeltaFile}
+import org.apache.hadoop.fs.FileStatus
+
+/**
+ * An iterator that groups same types of files by version.
+ * Note that this class could handle only Checkpoints and Delta files.
+ * For example for an input iterator:
+ * - 11.checkpoint.0.1.parquet
+ * - 11.checkpoint.1.1.parquet
+ * - 11.json
+ * - 12.checkpoint.parquet
+ * - 12.json
+ * - 13.json
+ * - 14.json
+ * - 15.checkpoint.0.1.parquet
+ * - 15.checkpoint.1.1.parquet
+ * - 15.checkpoint.<uuid>.parquet
+ * - 15.json
+ *  This will return:
+ *  - (11, Seq(11.checkpoint.0.1.parquet, 11.checkpoint.1.1.parquet, 11.json))
+ *  - (12, Seq(12.checkpoint.parquet, 12.json))
+ *  - (13, Seq(13.json))
+ *  - (14, Seq(14.json))
+ *  - (15, Seq(15.checkpoint.0.1.parquet, 15.checkpoint.1.1.parquet, 15.checkpoint.<uuid>.parquet,
+ *             15.json))
+ */
+class DeltaLogGroupingIterator(
+  checkpointAndDeltas: Iterator[FileStatus]) extends Iterator[(Long, ArrayBuffer[FileStatus])] {
+
+  private val bufferedIterator = checkpointAndDeltas.buffered
+
+  /**
+   * Validates that the underlying file is a checkpoint/delta file and returns the corresponding
+   * version.
+   */
+  private def getFileVersion(file: FileStatus): Long = {
+    file match {
+      case DeltaFile(_, version) => version
+      case CheckpointFile(_, version) => version
+      case _ =>
+        throw new IllegalStateException(
+          s"${file.getPath} is not a valid commit file / checkpoint file")
+    }
+  }
+
+  override def hasNext: Boolean = bufferedIterator.hasNext
+
+  override def next(): (Long, ArrayBuffer[FileStatus]) = {
+    val first = bufferedIterator.next()
+    val buffer = scala.collection.mutable.ArrayBuffer(first)
+    val firstFileVersion = getFileVersion(first)
+    while (bufferedIterator.headOption.exists(getFileVersion(_) == firstFileVersion)) {
+      buffer += bufferedIterator.next()
+    }
+    firstFileVersion -> buffer
+  }
+}