apache
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala‎
Lines changed: 10 additions & 2 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScan.scala‎
Lines changed: 10 additions & 2 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScan.scala‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala‎
Lines changed: 2 additions & 23 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala‎
Lines changed: 2 additions & 23 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregate.scala‎
Lines changed: 5 additions & 10 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregate.scala‎
Lines changed: 5 additions & 10 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala‎
Lines changed: 13 additions & 5 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala‎
Lines changed: 13 additions & 5 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregate.scala‎
Lines changed: 5 additions & 10 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregate.scala‎
Lines changed: 5 additions & 10 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala‎
Lines changed: 9 additions & 2 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala‎
Lines changed: 6 additions & 8 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala‎
Lines changed: 6 additions & 8 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashOuterJoin.scala‎
Lines changed: 10 additions & 11 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashOuterJoin.scala‎
Lines changed: 10 additions & 11 deletions
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.{InternalRow, CatalystTypeConverters}
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericMutableRow}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Statistics}
+import org.apache.spark.sql.metric.SQLMetrics
 import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.types.DataType
 import org.apache.spark.sql.{Row, SQLContext}
@@ -99,9 +100,16 @@ private[sql] case class PhysicalRDD(
     rdd: RDD[InternalRow],
     extraInformation: String) extends LeafNode {
 
-  override protected[sql] val trackNumOfRowsEnabled = true
+  override private[sql] lazy val metrics = Map(
+    "numRows" -> SQLMetrics.createLongMetric(sparkContext, "number of rows"))
 
-  protected override def doExecute(): RDD[InternalRow] = rdd
+  protected override def doExecute(): RDD[InternalRow] = {
+    val numRows = longMetric("numRows")
+    rdd.map { row =>
+      numRows += 1
+      row
+    }
+  }
 
   override def simpleString: String = "Scan " + extraInformation + output.mkString("[", ",", "]")
 }
 
@@ -21,6 +21,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.{InternalRow, CatalystTypeConverters}
 import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.metric.SQLMetrics
 
 
 /**
@@ -30,11 +31,18 @@ private[sql] case class LocalTableScan(
     output: Seq[Attribute],
     rows: Seq[InternalRow]) extends LeafNode {
 
-  override protected[sql] val trackNumOfRowsEnabled = true
+  override private[sql] lazy val metrics = Map(
+    "numRows" -> SQLMetrics.createLongMetric(sparkContext, "number of rows"))
 
   private lazy val rdd = sqlContext.sparkContext.parallelize(rows)
 
-  protected override def doExecute(): RDD[InternalRow] = rdd
+  protected override def doExecute(): RDD[InternalRow] = {
+    val numRows = longMetric("numRows")
+    rdd.map { row =>
+      numRows += 1
+      row
+    }
+  }
 
   override def executeCollect(): Array[Row] = {
     val converter = CatalystTypeConverters.createToScalaConverter(schema)
 
@@ -80,23 +80,10 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
     super.makeCopy(newArgs)
   }
 
-  /**
-   * Whether track the number of rows output by this SparkPlan
-   */
-  protected[sql] def trackNumOfRowsEnabled: Boolean = false
-
-  private lazy val defaultMetrics: Map[String, SQLMetric[_, _]] =
-    if (trackNumOfRowsEnabled) {
-      Map("numRows" -> SQLMetrics.createLongMetric(sparkContext, "number of rows"))
-    }
-    else {
-      Map.empty
-    }
-
   /**
    * Return all metrics containing metrics of this SparkPlan.
    */
-  private[sql] def metrics: Map[String, SQLMetric[_, _]] = defaultMetrics
+  private[sql] def metrics: Map[String, SQLMetric[_, _]] = Map.empty
 
   /**
    * Return a IntSQLMetric according to the name.
@@ -156,15 +143,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
     }
     RDDOperationScope.withScope(sparkContext, nodeName, false, true) {
       prepare()
-      if (trackNumOfRowsEnabled) {
-        val numRows = longMetric("numRows")
-        doExecute().map { row =>
-          numRows += 1
-          row
-        }
-      } else {
-        doExecute()
-      }
+      doExecute()
     }
   }
 
 
@@ -70,11 +70,7 @@ case class SortBasedAggregate(
   protected override def doExecute(): RDD[InternalRow] = attachTree(this, "execute") {
     val numInputRows = longMetric("numInputRows")
     val numOutputRows = longMetric("numOutputRows")
-    child.execute().mapPartitions { _iter =>
-      val iter = _iter.map { row =>
-        numInputRows += 1
-        row
-      }
+    child.execute().mapPartitions { iter =>
       // Because the constructor of an aggregation iterator will read at least the first row,
       // we need to get the value of iter.hasNext first.
       val hasInput = iter.hasNext
@@ -95,17 +91,16 @@ case class SortBasedAggregate(
           newProjection _,
           child.output,
           iter,
-          outputsUnsafeRows)
+          outputsUnsafeRows,
+          numInputRows,
+          numOutputRows)
         if (!hasInput && groupingExpressions.isEmpty) {
           // There is no input and there is no grouping expressions.
           // We need to output a single row as the output.
           numOutputRows += 1
           Iterator[InternalRow](outputIter.outputForEmptyGroupingKeyWithoutInput())
         } else {
-          outputIter.map { row =>
-            numOutputRows += 1
-            row
-          }
+          outputIter
         }
       }
     }
 
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.aggregate
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression2, AggregateFunction2}
+import org.apache.spark.sql.metric.LongSQLMetric
 import org.apache.spark.unsafe.KVIterator
 
 /**
@@ -37,7 +38,9 @@ class SortBasedAggregationIterator(
     initialInputBufferOffset: Int,
     resultExpressions: Seq[NamedExpression],
     newMutableProjection: (Seq[Expression], Seq[Attribute]) => (() => MutableProjection),
-    outputsUnsafeRows: Boolean)
+    outputsUnsafeRows: Boolean,
+    numInputRows: LongSQLMetric,
+    numOutputRows: LongSQLMetric)
   extends AggregationIterator(
     groupingKeyAttributes,
     valueAttributes,
@@ -103,6 +106,7 @@ class SortBasedAggregationIterator(
       // Get the grouping key.
       val groupingKey = inputKVIterator.getKey
       val currentRow = inputKVIterator.getValue
+      numInputRows += 1
 
       // Check if the current row belongs the current input row.
       if (currentGroupingKey == groupingKey) {
@@ -137,7 +141,7 @@ class SortBasedAggregationIterator(
       val outputRow = generateOutput(currentGroupingKey, sortBasedAggregationBuffer)
       // Initialize buffer values for the next group.
       initializeBuffer(sortBasedAggregationBuffer)
-
+      numOutputRows += 1
       outputRow
     } else {
       // no more result
@@ -151,7 +155,7 @@ class SortBasedAggregationIterator(
 
       nextGroupingKey = inputKVIterator.getKey().copy()
       firstRowInNextGroup = inputKVIterator.getValue().copy()
-
+      numInputRows += 1
       sortedInputHasNewGroup = true
     } else {
       // This inputIter is empty.
@@ -181,7 +185,9 @@ object SortBasedAggregationIterator {
       newProjection: (Seq[Expression], Seq[Attribute]) => Projection,
       inputAttributes: Seq[Attribute],
       inputIter: Iterator[InternalRow],
-      outputsUnsafeRows: Boolean): SortBasedAggregationIterator = {
+      outputsUnsafeRows: Boolean,
+      numInputRows: LongSQLMetric,
+      numOutputRows: LongSQLMetric): SortBasedAggregationIterator = {
     val kvIterator = if (UnsafeProjection.canSupport(groupingExprs)) {
       AggregationIterator.unsafeKVIterator(
         groupingExprs,
@@ -202,7 +208,9 @@ object SortBasedAggregationIterator {
       initialInputBufferOffset,
       resultExpressions,
       newMutableProjection,
-      outputsUnsafeRows)
+      outputsUnsafeRows,
+      numInputRows,
+      numOutputRows)
   }
   // scalastyle:on
 }
@@ -68,11 +68,7 @@ case class TungstenAggregate(
   protected override def doExecute(): RDD[InternalRow] = attachTree(this, "execute") {
     val numInputRows = longMetric("numInputRows")
     val numOutputRows = longMetric("numOutputRows")
-    child.execute().mapPartitions { _iter =>
-      val iter = _iter.map { row =>
-        numInputRows += 1
-        row
-      }
+    child.execute().mapPartitions { iter =>
       val hasInput = iter.hasNext
       if (!hasInput && groupingExpressions.nonEmpty) {
         // This is a grouped aggregate and the input iterator is empty,
@@ -89,16 +85,15 @@ case class TungstenAggregate(
             newMutableProjection,
             child.output,
             iter,
-            testFallbackStartsAt)
+            testFallbackStartsAt,
+            numInputRows,
+            numOutputRows)
 
         if (!hasInput && groupingExpressions.isEmpty) {
           numOutputRows += 1
           Iterator.single[UnsafeRow](aggregationIterator.outputForEmptyGroupingKeyWithoutInput())
         } else {
-          aggregationIterator.map { row =>
-            numOutputRows += 1
-            row
-          }
+          aggregationIterator
         }
       }
     }
 
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeRowJoiner
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.{UnsafeKVExternalSorter, UnsafeFixedWidthAggregationMap}
+import org.apache.spark.sql.metric.LongSQLMetric
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -83,7 +84,9 @@ class TungstenAggregationIterator(
     newMutableProjection: (Seq[Expression], Seq[Attribute]) => (() => MutableProjection),
     originalInputAttributes: Seq[Attribute],
     inputIter: Iterator[InternalRow],
-    testFallbackStartsAt: Option[Int])
+    testFallbackStartsAt: Option[Int],
+    numInputRows: LongSQLMetric,
+    numOutputRows: LongSQLMetric)
   extends Iterator[UnsafeRow] with Logging {
 
   ///////////////////////////////////////////////////////////////////////////
@@ -352,6 +355,7 @@ class TungstenAggregationIterator(
   private def processInputs(): Unit = {
     while (!sortBased && inputIter.hasNext) {
       val newInput = inputIter.next()
+      numInputRows += 1
       val groupingKey = groupProjection.apply(newInput)
       val buffer: UnsafeRow = hashMap.getAggregationBufferFromUnsafeRow(groupingKey)
       if (buffer == null) {
@@ -371,6 +375,7 @@ class TungstenAggregationIterator(
     var i = 0
     while (!sortBased && inputIter.hasNext) {
       val newInput = inputIter.next()
+      numInputRows += 1
       val groupingKey = groupProjection.apply(newInput)
       val buffer: UnsafeRow = if (i < fallbackStartsAt) {
         hashMap.getAggregationBufferFromUnsafeRow(groupingKey)
@@ -439,6 +444,7 @@ class TungstenAggregationIterator(
       // Process the rest of input rows.
       while (inputIter.hasNext) {
         val newInput = inputIter.next()
+        numInputRows += 1
         val groupingKey = groupProjection.apply(newInput)
         buffer.copyFrom(initialAggregationBuffer)
         processRow(buffer, newInput)
@@ -462,6 +468,7 @@ class TungstenAggregationIterator(
       // Insert the rest of input rows.
       while (inputIter.hasNext) {
         val newInput = inputIter.next()
+        numInputRows += 1
         val groupingKey = groupProjection.apply(newInput)
         bufferExtractor(newInput)
         externalSorter.insertKV(groupingKey, buffer)
@@ -657,7 +664,7 @@ class TungstenAggregationIterator(
         TaskContext.get().internalMetricsToAccumulators(
           InternalAccumulator.PEAK_EXECUTION_MEMORY).add(peakMemory)
       }
-
+      numOutputRows += 1
       res
     } else {
       // no more result
 
@@ -86,7 +86,11 @@ case class BroadcastHashJoin(
           numBuildRows += 1
           row.copy()
         }.collect()
-        val hashed = HashedRelation(input.iterator, buildSideKeyGenerator, input.size)
+        // The following line doesn't run in a job so we cannot track the metric value. However, we
+        // have already tracked it in the above lines. So here we can use
+        // `SQLMetrics.nullLongMetric` to ignore it.
+        val hashed = HashedRelation(
+          input.iterator, SQLMetrics.nullLongMetric, buildSideKeyGenerator, input.size)
         sparkContext.broadcast(hashed)
       }
     }(BroadcastHashJoin.broadcastHashJoinExecutionContext)
@@ -113,13 +117,7 @@ case class BroadcastHashJoin(
             InternalAccumulator.PEAK_EXECUTION_MEMORY).add(unsafe.getUnsafeSize)
         case _ =>
       }
-      hashJoin(streamedIter.map { row =>
-        numStreamedRows += 1
-        row
-      }, hashedRelation).map { row =>
-        numOutputRows += 1
-        row
-      }
+      hashJoin(streamedIter, numStreamedRows, hashedRelation, numOutputRows)
     }
   }
 }
 
@@ -89,7 +89,11 @@ case class BroadcastHashOuterJoin(
           numBuildRows += 1
           row.copy()
         }.collect()
-        val hashed = HashedRelation(input.iterator, buildKeyGenerator, input.size)
+        // The following line doesn't run in a job so we cannot track the metric value. However, we
+        // have already tracked it in the above lines. So here we can use
+        // `SQLMetrics.nullLongMetric` to ignore it.
+        val hashed = HashedRelation(
+          input.iterator, SQLMetrics.nullLongMetric, buildKeyGenerator, input.size)
         sparkContext.broadcast(hashed)
       }
     }(BroadcastHashJoin.broadcastHashJoinExecutionContext)
@@ -111,11 +115,7 @@ case class BroadcastHashOuterJoin(
 
     val broadcastRelation = Await.result(broadcastFuture, timeout)
 
-    streamedPlan.execute().mapPartitions { _streamedIter =>
-      val streamedIter = _streamedIter.map { row =>
-        numStreamedRows += 1
-        row
-      }
+    streamedPlan.execute().mapPartitions { streamedIter =>
       val joinedRow = new JoinedRow()
       val hashTable = broadcastRelation.value
       val keyGenerator = streamedKeyGenerator
@@ -131,25 +131,24 @@ case class BroadcastHashOuterJoin(
       joinType match {
         case LeftOuter =>
           streamedIter.flatMap(currentRow => {
+            numStreamedRows += 1
             val rowKey = keyGenerator(currentRow)
             joinedRow.withLeft(currentRow)
-            leftOuterIterator(rowKey, joinedRow, hashTable.get(rowKey), resultProj)
+            leftOuterIterator(rowKey, joinedRow, hashTable.get(rowKey), resultProj, numOutputRows)
           })
 
         case RightOuter =>
           streamedIter.flatMap(currentRow => {
+            numStreamedRows += 1
             val rowKey = keyGenerator(currentRow)
             joinedRow.withRight(currentRow)
-            rightOuterIterator(rowKey, hashTable.get(rowKey), joinedRow, resultProj)
+            rightOuterIterator(rowKey, hashTable.get(rowKey), joinedRow, resultProj, numOutputRows)
           })
 
         case x =>
           throw new IllegalArgumentException(
             s"BroadcastHashOuterJoin should not take $x as the JoinType")
       }
-    }.map { row =>
-      numOutputRows += 1
-      row
     }
   }
 }
Original file line number	Diff line number	Diff line change
`@@ -86,7 +86,11 @@ case class BroadcastHashJoin(`
`86`	`86`	`numBuildRows += 1`
`87`	`87`	`row.copy()`
`88`	`88`	`}.collect()`
`89`		`- val hashed = HashedRelation(input.iterator, buildSideKeyGenerator, input.size)`
	`89`	`+ // The following line doesn't run in a job so we cannot track the metric value. However, we`
	`90`	`+ // have already tracked it in the above lines. So here we can use`
	`91`	+ // `SQLMetrics.nullLongMetric` to ignore it.
	`92`	`+ val hashed = HashedRelation(`
	`93`	`+ input.iterator, SQLMetrics.nullLongMetric, buildSideKeyGenerator, input.size)`
`90`	`94`	`sparkContext.broadcast(hashed)`
`91`	`95`	`}`
`92`	`96`	`}(BroadcastHashJoin.broadcastHashJoinExecutionContext)`
`@@ -113,13 +117,7 @@ case class BroadcastHashJoin(`
`113`	`117`	`InternalAccumulator.PEAK_EXECUTION_MEMORY).add(unsafe.getUnsafeSize)`
`114`	`118`	`case _ =>`
`115`	`119`	`}`
`116`		`- hashJoin(streamedIter.map { row =>`
`117`		`- numStreamedRows += 1`
`118`		`- row`
`119`		`- }, hashedRelation).map { row =>`
`120`		`- numOutputRows += 1`
`121`		`- row`
`122`		`- }`
	`120`	`+ hashJoin(streamedIter, numStreamedRows, hashedRelation, numOutputRows)`
`123`	`121`	`}`
`124`	`122`	`}`
`125`	`123`	`}`