@@ -343,15 +343,18 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
343
343
/**
344
344
* Called from executors to get the server URIs and output sizes for each shuffle block that
345
345
* needs to be read from a given range of map output partitions (startPartition is included but
346
- * endPartition is excluded from the range) and is produced by a specific mapper.
346
+ * endPartition is excluded from the range) and is produced by
347
+ * a range of mappers (startMapIndex, endMapIndex, startMapIndex is included and
348
+ * the endMapIndex is excluded).
347
349
*
348
350
* @return A sequence of 2-item tuples, where the first item in the tuple is a BlockManagerId,
349
351
* and the second item is a sequence of (shuffle block id, shuffle block size, map index)
350
352
* tuples describing the shuffle blocks that are stored at that block manager.
351
353
*/
352
- def getMapSizesByMapIndex (
354
+ def getMapSizesByRange (
353
355
shuffleId : Int ,
354
- mapIndex : Int ,
356
+ startMapIndex : Int ,
357
+ endMapIndex : Int ,
355
358
startPartition : Int ,
356
359
endPartition : Int ): Iterator [(BlockManagerId , Seq [(BlockId , Long , Int )])]
357
360
@@ -688,20 +691,25 @@ private[spark] class MapOutputTrackerMaster(
688
691
}
689
692
690
693
/**
691
- * Return the location where the Mapper ran. The locations each includes both a host and an
694
+ * Return the locations where the Mappers ran. The locations each includes both a host and an
692
695
* executor id on that host.
693
696
*
694
697
* @param dep shuffle dependency object
695
- * @param mapId the map id
698
+ * @param startMapIndex the start map index
699
+ * @param endMapIndex the end map index
696
700
* @return a sequence of locations where task runs.
697
701
*/
698
- def getMapLocation (dep : ShuffleDependency [_, _, _], mapId : Int ): Seq [String ] =
702
+ def getMapLocation (
703
+ dep : ShuffleDependency [_, _, _],
704
+ startMapIndex : Int ,
705
+ endMapIndex : Int ): Seq [String ] =
699
706
{
700
707
val shuffleStatus = shuffleStatuses.get(dep.shuffleId).orNull
701
708
if (shuffleStatus != null ) {
702
709
shuffleStatus.withMapStatuses { statuses =>
703
- if (mapId >= 0 && mapId < statuses.length) {
704
- Seq (statuses(mapId).location.host)
710
+ if (startMapIndex < endMapIndex && (startMapIndex >= 0 && endMapIndex < statuses.length)) {
711
+ val statusesPicked = statuses.slice(startMapIndex, endMapIndex).filter(_ != null )
712
+ statusesPicked.map(_.location.host).toSeq
705
713
} else {
706
714
Nil
707
715
}
@@ -737,29 +745,26 @@ private[spark] class MapOutputTrackerMaster(
737
745
case Some (shuffleStatus) =>
738
746
shuffleStatus.withMapStatuses { statuses =>
739
747
MapOutputTracker .convertMapStatuses(
740
- shuffleId, startPartition, endPartition, statuses)
748
+ shuffleId, startPartition, endPartition, statuses, 0 , shuffleStatus.mapStatuses.length )
741
749
}
742
750
case None =>
743
751
Iterator .empty
744
752
}
745
753
}
746
754
747
- override def getMapSizesByMapIndex (
755
+ override def getMapSizesByRange (
748
756
shuffleId : Int ,
749
- mapIndex : Int ,
757
+ startMapIndex : Int ,
758
+ endMapIndex : Int ,
750
759
startPartition : Int ,
751
760
endPartition : Int ): Iterator [(BlockManagerId , Seq [(BlockId , Long , Int )])] = {
752
- logDebug(s " Fetching outputs for shuffle $shuffleId, mapIndex $mapIndex " +
761
+ logDebug(s " Fetching outputs for shuffle $shuffleId, mappers $startMapIndex - $endMapIndex " +
753
762
s " partitions $startPartition- $endPartition" )
754
763
shuffleStatuses.get(shuffleId) match {
755
- case Some (shuffleStatus) =>
764
+ case Some (shuffleStatus) =>
756
765
shuffleStatus.withMapStatuses { statuses =>
757
766
MapOutputTracker .convertMapStatuses(
758
- shuffleId,
759
- startPartition,
760
- endPartition,
761
- statuses,
762
- Some (mapIndex))
767
+ shuffleId, startPartition, endPartition, statuses, startMapIndex, endMapIndex)
763
768
}
764
769
case None =>
765
770
Iterator .empty
@@ -802,7 +807,7 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
802
807
val statuses = getStatuses(shuffleId, conf)
803
808
try {
804
809
MapOutputTracker .convertMapStatuses(
805
- shuffleId, startPartition, endPartition, statuses)
810
+ shuffleId, startPartition, endPartition, statuses, 0 , statuses.length )
806
811
} catch {
807
812
case e : MetadataFetchFailedException =>
808
813
// We experienced a fetch failure so our mapStatuses cache is outdated; clear it:
@@ -811,17 +816,18 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
811
816
}
812
817
}
813
818
814
- override def getMapSizesByMapIndex (
819
+ override def getMapSizesByRange (
815
820
shuffleId : Int ,
816
- mapIndex : Int ,
821
+ startMapIndex : Int ,
822
+ endMapIndex : Int ,
817
823
startPartition : Int ,
818
824
endPartition : Int ): Iterator [(BlockManagerId , Seq [(BlockId , Long , Int )])] = {
819
- logDebug(s " Fetching outputs for shuffle $shuffleId, mapIndex $mapIndex " +
825
+ logDebug(s " Fetching outputs for shuffle $shuffleId, mappers $startMapIndex - $endMapIndex " +
820
826
s " partitions $startPartition- $endPartition" )
821
827
val statuses = getStatuses(shuffleId, conf)
822
828
try {
823
- MapOutputTracker .convertMapStatuses(shuffleId, startPartition, endPartition,
824
- statuses, Some (mapIndex) )
829
+ MapOutputTracker .convertMapStatuses(
830
+ shuffleId, startPartition, endPartition, statuses, startMapIndex, endMapIndex )
825
831
} catch {
826
832
case e : MetadataFetchFailedException =>
827
833
// We experienced a fetch failure so our mapStatuses cache is outdated; clear it:
@@ -980,7 +986,8 @@ private[spark] object MapOutputTracker extends Logging {
980
986
* @param startPartition Start of map output partition ID range (included in range)
981
987
* @param endPartition End of map output partition ID range (excluded from range)
982
988
* @param statuses List of map statuses, indexed by map partition index.
983
- * @param mapIndex When specified, only shuffle blocks from this mapper will be processed.
989
+ * @param startMapIndex Start Map index.
990
+ * @param endMapIndex End Map index.
984
991
* @return A sequence of 2-item tuples, where the first item in the tuple is a BlockManagerId,
985
992
* and the second item is a sequence of (shuffle block id, shuffle block size, map index)
986
993
* tuples describing the shuffle blocks that are stored at that block manager.
@@ -990,11 +997,12 @@ private[spark] object MapOutputTracker extends Logging {
990
997
startPartition : Int ,
991
998
endPartition : Int ,
992
999
statuses : Array [MapStatus ],
993
- mapIndex : Option [Int ] = None ): Iterator [(BlockManagerId , Seq [(BlockId , Long , Int )])] = {
1000
+ startMapIndex : Int ,
1001
+ endMapIndex : Int ): Iterator [(BlockManagerId , Seq [(BlockId , Long , Int )])] = {
994
1002
assert (statuses != null )
995
1003
val splitsByAddress = new HashMap [BlockManagerId , ListBuffer [(BlockId , Long , Int )]]
996
1004
val iter = statuses.iterator.zipWithIndex
997
- for ((status, mapIndex) <- mapIndex.map(index => iter.filter(_._2 == index)).getOrElse(iter )) {
1005
+ for ((status, mapIndex) <- iter.slice(startMapIndex, endMapIndex )) {
998
1006
if (status == null ) {
999
1007
val errorMessage = s " Missing an output location for shuffle $shuffleId"
1000
1008
logError(errorMessage)
0 commit comments