@@ -167,19 +167,14 @@ case class FileSourceScanExec(
167
167
partitionSchema = relation.partitionSchema,
168
168
relation.sparkSession.sessionState.conf)
169
169
170
+ private var fileListingTime = 0L
171
+
170
172
@ transient private lazy val selectedPartitions : Seq [PartitionDirectory ] = {
171
173
val optimizerMetadataTimeNs = relation.location.metadataOpsTimeNs.getOrElse(0L )
172
174
val startTime = System .nanoTime()
173
175
val ret = relation.location.listFiles(partitionFilters, dataFilters)
174
176
val timeTakenMs = ((System .nanoTime() - startTime) + optimizerMetadataTimeNs) / 1000 / 1000
175
-
176
- metrics(" numFiles" ).add(ret.map(_.files.size.toLong).sum)
177
- metrics(" metadataTime" ).add(timeTakenMs)
178
-
179
- val executionId = sparkContext.getLocalProperty(SQLExecution .EXECUTION_ID_KEY )
180
- SQLMetrics .postDriverMetricUpdates(sparkContext, executionId,
181
- metrics(" numFiles" ) :: metrics(" metadataTime" ) :: Nil )
182
-
177
+ fileListingTime = timeTakenMs
183
178
ret
184
179
}
185
180
@@ -291,6 +286,8 @@ case class FileSourceScanExec(
291
286
}
292
287
293
288
private lazy val inputRDD : RDD [InternalRow ] = {
289
+ // Update metrics for taking effect in both code generation node and normal node.
290
+ updateDriverMetrics()
294
291
val readFile : (PartitionedFile ) => Iterator [InternalRow ] =
295
292
relation.fileFormat.buildReaderWithPartitionValues(
296
293
sparkSession = relation.sparkSession,
@@ -316,7 +313,7 @@ case class FileSourceScanExec(
316
313
override lazy val metrics =
317
314
Map (" numOutputRows" -> SQLMetrics .createMetric(sparkContext, " number of output rows" ),
318
315
" numFiles" -> SQLMetrics .createMetric(sparkContext, " number of files" ),
319
- " metadataTime " -> SQLMetrics .createMetric(sparkContext, " metadata time (ms)" ),
316
+ " fileListingTime " -> SQLMetrics .createMetric(sparkContext, " file listing time (ms)" ),
320
317
" scanTime" -> SQLMetrics .createTimingMetric(sparkContext, " scan time" ))
321
318
322
319
protected override def doExecute (): RDD [InternalRow ] = {
@@ -507,6 +504,19 @@ case class FileSourceScanExec(
507
504
}
508
505
}
509
506
507
+ /**
508
+ * Send the updated metrics to driver, while this function calling, selectedPartitions has
509
+ * been initialized. See SPARK-26327 for more detail.
510
+ */
511
+ private def updateDriverMetrics () = {
512
+ metrics(" numFiles" ).add(selectedPartitions.map(_.files.size.toLong).sum)
513
+ metrics(" fileListingTime" ).add(fileListingTime)
514
+
515
+ val executionId = sparkContext.getLocalProperty(SQLExecution .EXECUTION_ID_KEY )
516
+ SQLMetrics .postDriverMetricUpdates(sparkContext, executionId,
517
+ metrics(" numFiles" ) :: metrics(" fileListingTime" ) :: Nil )
518
+ }
519
+
510
520
override def doCanonicalize (): FileSourceScanExec = {
511
521
FileSourceScanExec (
512
522
relation,
0 commit comments