Revert "[SPARK-40039][SS] Introducing a streaming checkpoint file man…

…ager based on Hadoop's Abortable interface" This reverts commit 7e4064c.
panbingkun · Aug 27, 2022 · fb4dba1 · fb4dba1
1 parent 8fb8532
commit fb4dba1
Show file tree

Hide file tree

Showing 11 changed files with 59 additions and 539 deletions.
diff --git a/docs/cloud-integration.md b/docs/cloud-integration.md
@@ -231,15 +231,9 @@ The size of the window needs to be set to handle this.
 is no need for a workflow of write-then-rename to ensure that files aren't picked up
 while they are still being written. Applications can write straight to the monitored directory.
 
-1. In case of the default checkpoint file manager called `FileContextBasedCheckpointFileManager`
-streams should only be checkpointed to a store implementing a fast and
-atomic `rename()` operation. Otherwise the checkpointing may be slow and potentially unreliable.
-On AWS S3 with Hadoop 3.3.1 or later using the S3A connector the abortable stream based checkpoint
-file manager can be used (by setting the `spark.sql.streaming.checkpointFileManagerClass`
-configuration to `org.apache.spark.internal.io.cloud.AbortableStreamBasedCheckpointFileManager`)
-which eliminates the slow rename. In this case users must be extra careful to avoid the reuse of
-the checkpoint location among multiple queries running parallelly as that could lead to corruption
-of the checkpointing data.
+1. Streams should only be checkpointed to a store implementing a fast and
+atomic `rename()` operation.
+Otherwise the checkpointing may be slow and potentially unreliable.
 
 ## Committing work into cloud storage safely and fast.
 

diff --git a/hadoop-cloud/README.md b/hadoop-cloud/README.md
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
@@ -49,13 +49,6 @@
       <version>${project.version}</version>
       <scope>provided</scope>
     </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-sql_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -219,22 +212,6 @@
 
       <build>
         <plugins>
-          <plugin>
-            <groupId>org.scalatest</groupId>
-            <artifactId>scalatest-maven-plugin</artifactId>
-            <executions>
-              <execution>
-                <id>test</id>
-                <phase>test</phase>
-                <goals>
-                  <goal>test</goal>
-                </goals>
-                <configuration>
-                  <tagsToExclude>org.apache.spark.internal.io.cloud.IntegrationTestSuite</tagsToExclude>
-                </configuration>
-              </execution>
-            </executions>
-          </plugin>
           <plugin>
             <groupId>org.codehaus.mojo</groupId>
             <artifactId>build-helper-maven-plugin</artifactId>
@@ -320,30 +297,6 @@
       </dependencies>
     </profile>
 
-    <profile>
-      <id>integration-test</id>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.scalatest</groupId>
-            <artifactId>scalatest-maven-plugin</artifactId>
-            <executions>
-              <execution>
-                <id>test</id>
-                <phase>test</phase>
-                <goals>
-                  <goal>test</goal>
-                </goals>
-                <configuration>
-                  <tagsToExclude>None</tagsToExclude>
-                  <tagsToInclude>org.apache.spark.internal.io.cloud.IntegrationTestSuite</tagsToInclude>
-                </configuration>
-              </execution>
-            </executions>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
   </profiles>
 
 </project>
diff --git a/.../scala/org/apache/spark/internal/io/cloud/AbortableStreamBasedCheckpointFileManager.scala b/.../scala/org/apache/spark/internal/io/cloud/AbortableStreamBasedCheckpointFileManager.scala
diff --git a/...a/org/apache/spark/internal/io/cloud/AbortableStreamBasedCheckpointFileManagerSuite.scala b/...a/org/apache/spark/internal/io/cloud/AbortableStreamBasedCheckpointFileManagerSuite.scala
diff --git a/hadoop-cloud/src/test/java/org/apache/spark/internal/io/cloud/IntegrationTestSuite.java b/hadoop-cloud/src/test/java/org/apache/spark/internal/io/cloud/IntegrationTestSuite.java