Skip to content

Commit e1f95b6

Browse files
Merge pull request apache#65 from concretevitamin/parallelize-fix
Fix that collect(parallelize(sc,1:72,15)) drops elements.
2 parents b8204c5 + fc1a71a commit e1f95b6

File tree

2 files changed

+12
-1
lines changed

2 files changed

+12
-1
lines changed

pkg/R/context.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ parallelize <- function(sc, coll, numSlices = 1) {
6262
if (numSlices > length(coll))
6363
numSlices <- length(coll)
6464

65-
sliceLen <- length(coll) %/% numSlices
65+
sliceLen <- ceiling(length(coll) / numSlices)
6666
slices <- split(coll, rep(1:(numSlices + 1), each = sliceLen)[1:length(coll)])
6767

6868
# Serialize each slice: obtain a list of raws, or a list of lists (slices) of

pkg/inst/tests/test_parallelize_collect.R

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,17 @@ test_that("collect(), following a parallelize(), gives back the original collect
6767
expect_equal(collect(strListRDD2), as.list(strList))
6868
})
6969

70+
test_that("regression: collect() following a parallelize() does not drop elements", {
71+
lapply(1:72,
72+
function(collLen) {
73+
lapply(1:15, function(numPart) {
74+
expected <- runif(collLen)
75+
actual <- collect(parallelize(jsc, expected, numPart))
76+
expect_equal(actual, as.list(expected))
77+
})
78+
})
79+
})
80+
7081
test_that("parallelize() and collect() work for lists of pairs (pairwise data)", {
7182
# use the pairwise logical to indicate pairwise data
7283
numPairsRDDD1 <- parallelize(jsc, numPairs, 1)

0 commit comments

Comments
 (0)