[SPARK-9824] [CORE] Fix the issue that InternalAccumulator leaks WeakReference

zsxwing · rxin · commit f16bc68dfb25 · 2015-08-11T14:06:23.000-07:00
`InternalAccumulator.create` doesn't call `registerAccumulatorForCleanup` to register itself with ContextCleaner, so `WeakReference`s for these accumulators in `Accumulators.originals` won't be removed. This PR added `registerAccumulatorForCleanup` for internal accumulators to avoid the memory leak. Author: zsxwing <zsxwing@gmail.com> Closes #8108 from zsxwing/internal-accumulators-leak.
diff --git a/core/src/main/scala/org/apache/spark/Accumulators.scala b/core/src/main/scala/org/apache/spark/Accumulators.scala
@@ -382,14 +382,18 @@ private[spark] object InternalAccumulator {
    * add to the same set of accumulators. We do this to report the distribution of accumulator
    * values across all tasks within each stage.
    */
-  def create(): Seq[Accumulator[Long]] = {
-    Seq(
-      // Execution memory refers to the memory used by internal data structures created
-      // during shuffles, aggregations and joins. The value of this accumulator should be
-      // approximately the sum of the peak sizes across all such data structures created
-      // in this task. For SQL jobs, this only tracks all unsafe operators and ExternalSort.
-      new Accumulator(
-        0L, AccumulatorParam.LongAccumulatorParam, Some(PEAK_EXECUTION_MEMORY), internal = true)
-    ) ++ maybeTestAccumulator.toSeq
+  def create(sc: SparkContext): Seq[Accumulator[Long]] = {
+    val internalAccumulators = Seq(
+        // Execution memory refers to the memory used by internal data structures created
+        // during shuffles, aggregations and joins. The value of this accumulator should be
+        // approximately the sum of the peak sizes across all such data structures created
+        // in this task. For SQL jobs, this only tracks all unsafe operators and ExternalSort.
+        new Accumulator(
+          0L, AccumulatorParam.LongAccumulatorParam, Some(PEAK_EXECUTION_MEMORY), internal = true)
+      ) ++ maybeTestAccumulator.toSeq
+    internalAccumulators.foreach { accumulator =>
+      sc.cleaner.foreach(_.registerAccumulatorForCleanup(accumulator))
+    }
+    internalAccumulators
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Stage.scala b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
@@ -81,7 +81,7 @@ private[spark] abstract class Stage(
    * accumulators here again will override partial values from the finished tasks.
    */
   def resetInternalAccumulators(): Unit = {
-    _internalAccumulators = InternalAccumulator.create()
+    _internalAccumulators = InternalAccumulator.create(rdd.sparkContext)
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
@@ -160,7 +160,8 @@ class AccumulatorSuite extends SparkFunSuite with Matchers with LocalSparkContex
   }
 
   test("internal accumulators in TaskContext") {
-    val accums = InternalAccumulator.create()
+    sc = new SparkContext("local", "test")
+    val accums = InternalAccumulator.create(sc)
     val taskContext = new TaskContextImpl(0, 0, 0, 0, null, null, accums)
     val internalMetricsToAccums = taskContext.internalMetricsToAccumulators
     val collectedInternalAccums = taskContext.collectInternalAccumulators()

Original file line number	Diff line number	Diff line change
`@@ -81,7 +81,7 @@ private[spark] abstract class Stage(`
`81`	`81`	`* accumulators here again will override partial values from the finished tasks.`
`82`	`82`	`*/`
`83`	`83`	`def resetInternalAccumulators(): Unit = {`
`84`		`- _internalAccumulators = InternalAccumulator.create()`
	`84`	`+ _internalAccumulators = InternalAccumulator.create(rdd.sparkContext)`
`85`	`85`	`}`
`86`	`86`
`87`	`87`	`/**`
Original file line number	Diff line number	Diff line change
`@@ -160,7 +160,8 @@ class AccumulatorSuite extends SparkFunSuite with Matchers with LocalSparkContex`
`160`	`160`	`}`
`161`	`161`
`162`	`162`	`test("internal accumulators in TaskContext") {`
`163`		`- val accums = InternalAccumulator.create()`
	`163`	`+ sc = new SparkContext("local", "test")`
	`164`	`+ val accums = InternalAccumulator.create(sc)`
`164`	`165`	`val taskContext = new TaskContextImpl(0, 0, 0, 0, null, null, accums)`
`165`	`166`	`val internalMetricsToAccums = taskContext.internalMetricsToAccumulators`
`166`	`167`	`val collectedInternalAccums = taskContext.collectInternalAccumulators()`