Skip to content

Commit 2924b93

Browse files
committed
fix inifite loop in Externalsorter's mergeWithAggregation
1 parent 4d9e560 commit 2924b93

File tree

2 files changed

+6
-2
lines changed

2 files changed

+6
-2
lines changed

core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,8 @@ private[spark] class ExternalSorter[K, V, C](
525525
val k = elem._1
526526
var c = elem._2
527527
while (sorted.hasNext && sorted.head._1 == k) {
528-
c = mergeCombiners(c, sorted.head._2)
528+
val pair = sorted.next()
529+
c = mergeCombiners(c, pair._2)
529530
}
530531
(k, c)
531532
}

core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,10 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext with PrivateMe
506506
val agg = new Aggregator[Int, Int, Int](i => i, (i, j) => i + j, (i, j) => i + j)
507507
val ord = implicitly[Ordering[Int]]
508508
val sorter = new ExternalSorter(Some(agg), Some(new HashPartitioner(3)), Some(ord), None)
509-
sorter.insertAll((0 until 100000).iterator.map(i => (i / 2, i)))
509+
510+
// avoid combine before spill
511+
sorter.insertAll((0 until 50000).iterator.map(i => (i , 2 * i)))
512+
sorter.insertAll((0 until 50000).iterator.map(i => (i, 2 * i + 1)))
510513
val results = sorter.partitionedIterator.map{case (p, vs) => (p, vs.toSet)}.toSet
511514
val expected = (0 until 3).map(p => {
512515
(p, (0 until 50000).map(i => (i, i * 4 + 1)).filter(_._1 % 3 == p).toSet)

0 commit comments

Comments
 (0)