Skip to content

Commit a41e7e4

Browse files
fenzhuGitHub Enterprise
authored andcommitted
[HADP-55114] Fast skip in case of no table cache (apache#568)
1 parent 060d8a4 commit a41e7e4

File tree

1 file changed

+11
-1
lines changed

1 file changed

+11
-1
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,17 @@ trait FileSourceScanLike extends DataSourceScanExec {
333333
if (sourcePaths.forall(p => !pattern.matcher(p).find())) return partitions
334334

335335
val localPaths = sourcePaths.map(_.replaceFirst(remoteHdfsRegex, localHdfsPrefix))
336+
val hadoopConf = relation.sparkSession.sparkContext.hadoopConfiguration
337+
if (localPaths.isEmpty) {
338+
logInfo("Local paths is empty.")
339+
return partitions
340+
}
341+
val parentPath = new Path(localPaths.head).getParent
342+
val fileSystem = parentPath.getFileSystem(hadoopConf)
343+
if (!fileSystem.exists(parentPath)) {
344+
logInfo("Local cache table directory does not exist.")
345+
return partitions
346+
}
336347
def listFilesIfExists(paths: Seq[String], conf: Configuration): Seq[FileStatus] = {
337348
paths.flatMap { path =>
338349
val p = new Path(path)
@@ -345,7 +356,6 @@ trait FileSourceScanLike extends DataSourceScanExec {
345356
}
346357
}
347358

348-
val hadoopConf = relation.sparkSession.sparkContext.hadoopConfiguration
349359
val parallelPartitionDiscoveryParallelism =
350360
relation.sparkSession.sessionState.conf.parallelPartitionDiscoveryParallelism
351361
val startTime = System.currentTimeMillis()

0 commit comments

Comments
 (0)