@@ -22,6 +22,7 @@ import java.util.Random
22
22
import scala .collection .Map
23
23
import scala .collection .JavaConversions .mapAsScalaMap
24
24
import scala .collection .mutable .ArrayBuffer
25
+ import scala .collection .mutable .HashSet
25
26
import scala .reflect .{classTag , ClassTag }
26
27
27
28
import com .clearspring .analytics .stream .cardinality .HyperLogLog
@@ -221,12 +222,22 @@ abstract class RDD[T: ClassTag](
221
222
}
222
223
}
223
224
225
+ private val previouslyComputed = new HashSet [Partition ]
224
226
/**
225
227
* Compute an RDD partition or read it from a checkpoint if the RDD is checkpointing.
226
228
*/
227
229
private [spark] def computeOrReadCheckpoint (split : Partition , context : TaskContext ): Iterator [T ] =
228
230
{
229
- if (isCheckpointed) firstParent[T ].iterator(split, context) else compute(split, context)
231
+ if (isCheckpointed) {
232
+ firstParent[T ].iterator(split, context)
233
+ } else {
234
+ if (previouslyComputed.contains(split)) {
235
+ logWarning(" Recomputing RDD %d, partition %d" .format(id, split.index))
236
+ } else {
237
+ previouslyComputed.add(split)
238
+ }
239
+ compute(split, context)
240
+ }
230
241
}
231
242
232
243
// Transformations (return a new RDD)
@@ -1045,6 +1056,8 @@ abstract class RDD[T: ClassTag](
1045
1056
1046
1057
private [spark] def elementClassTag : ClassTag [T ] = classTag[T ]
1047
1058
1059
+ private [spark] val computeSites = new ArrayBuffer [String ]
1060
+
1048
1061
private [spark] var checkpointData : Option [RDDCheckpointData [T ]] = None
1049
1062
1050
1063
/** Returns the first parent RDD */
0 commit comments