Skip to content

Commit f36e576

Browse files
committed
Log warning on partition recompute
1 parent c289e91 commit f36e576

File tree

1 file changed

+14
-1
lines changed
  • core/src/main/scala/org/apache/spark/rdd

1 file changed

+14
-1
lines changed

core/src/main/scala/org/apache/spark/rdd/RDD.scala

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import java.util.Random
2222
import scala.collection.Map
2323
import scala.collection.JavaConversions.mapAsScalaMap
2424
import scala.collection.mutable.ArrayBuffer
25+
import scala.collection.mutable.HashSet
2526
import scala.reflect.{classTag, ClassTag}
2627

2728
import com.clearspring.analytics.stream.cardinality.HyperLogLog
@@ -221,12 +222,22 @@ abstract class RDD[T: ClassTag](
221222
}
222223
}
223224

225+
private val previouslyComputed = new HashSet[Partition]
224226
/**
225227
* Compute an RDD partition or read it from a checkpoint if the RDD is checkpointing.
226228
*/
227229
private[spark] def computeOrReadCheckpoint(split: Partition, context: TaskContext): Iterator[T] =
228230
{
229-
if (isCheckpointed) firstParent[T].iterator(split, context) else compute(split, context)
231+
if (isCheckpointed) {
232+
firstParent[T].iterator(split, context)
233+
} else {
234+
if (previouslyComputed.contains(split)) {
235+
logWarning("Recomputing RDD %d, partition %d".format(id, split.index))
236+
} else {
237+
previouslyComputed.add(split)
238+
}
239+
compute(split, context)
240+
}
230241
}
231242

232243
// Transformations (return a new RDD)
@@ -1045,6 +1056,8 @@ abstract class RDD[T: ClassTag](
10451056

10461057
private[spark] def elementClassTag: ClassTag[T] = classTag[T]
10471058

1059+
private[spark] val computeSites = new ArrayBuffer[String]
1060+
10481061
private[spark] var checkpointData: Option[RDDCheckpointData[T]] = None
10491062

10501063
/** Returns the first parent RDD */

0 commit comments

Comments
 (0)