Skip to content

Commit 02eb06e

Browse files
rsotn-maprekrivokonmapr
authored andcommitted
[MAPR-30583] InMemoryFileIndex changed to getFileBlockLocations in parallel way (apache#221)
1 parent 304d3dd commit 02eb06e

File tree

1 file changed

+3
-12
lines changed

1 file changed

+3
-12
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -294,10 +294,7 @@ object InMemoryFileIndex extends Logging {
294294
if (filter != null) allFiles.filter(f => filter.accept(f.getPath)) else allFiles
295295
}
296296

297-
val missingFiles = mutable.ArrayBuffer.empty[String]
298-
val filteredLeafStatuses = allLeafStatuses.filterNot(
299-
status => shouldFilterOut(status.getPath.getName))
300-
val resolvedLeafStatuses = filteredLeafStatuses.flatMap {
297+
allLeafStatuses.par.filterNot(status => shouldFilterOut(status.getPath.getName)).map {
301298
case f: LocatedFileStatus =>
302299
Some(f)
303300

@@ -327,14 +324,8 @@ object InMemoryFileIndex extends Logging {
327324
missingFiles += f.getPath.toString
328325
None
329326
}
330-
}
331-
332-
if (missingFiles.nonEmpty) {
333-
logWarning(
334-
s"the following files were missing during file scan:\n ${missingFiles.mkString("\n ")}")
335-
}
336-
337-
resolvedLeafStatuses
327+
lfs
328+
}.seq
338329
}
339330

340331
/** Checks if we should filter out this path name. */

0 commit comments

Comments
 (0)