Skip to content

Commit 76f6b9e

Browse files
committed
Merge pull request apache#149 from hqzizania/master
[SPARKR-170] Update reduceByKey, groupByKey, combineByKey, partitionBy documentation in RDD.R
2 parents b66534d + 5b380d3 commit 76f6b9e

File tree

5 files changed

+32
-28
lines changed

5 files changed

+32
-28
lines changed

pkg/R/RDD.R

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,12 +1244,12 @@ setMethod("flatMapValues",
12441244

12451245
#' Partition an RDD by key
12461246
#'
1247-
#' This function operates on RDDs where every element is of the form list(K, V).
1247+
#' This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
12481248
#' For each element of this RDD, the partitioner is used to compute a hash
12491249
#' function and the RDD is partitioned using this hash value.
12501250
#'
12511251
#' @param rdd The RDD to partition. Should be an RDD where each element is
1252-
#' list(K, V).
1252+
#' list(K, V) or c(K, V).
12531253
#' @param numPartitions Number of partitions to create.
12541254
#' @param ... Other optional arguments to partitionBy.
12551255
#'
@@ -1261,10 +1261,10 @@ setMethod("flatMapValues",
12611261
#' @examples
12621262
#'\dontrun{
12631263
#' sc <- sparkR.init()
1264-
#' pairs <- list(c(1, 2), c(1.1, 3), c(1, 4))
1264+
#' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
12651265
#' rdd <- parallelize(sc, pairs)
12661266
#' parts <- partitionBy(rdd, 2L)
1267-
#' collectPartition(parts, 0L) # First partition should contain c(1,2) and c(1,3)
1267+
#' collectPartition(parts, 0L) # First partition should contain list(1, 2) and list(1, 4)
12681268
#'}
12691269
setGeneric("partitionBy",
12701270
function(rdd, numPartitions, ...) {
@@ -1325,11 +1325,11 @@ setMethod("partitionBy",
13251325

13261326
#' Group values by key
13271327
#'
1328-
#' This function operates on RDDs where every element is of the form list(K, V).
1328+
#' This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
13291329
#' and group values for each key in the RDD into a single sequence.
13301330
#'
13311331
#' @param rdd The RDD to group. Should be an RDD where each element is
1332-
#' list(K, V).
1332+
#' list(K, V) or c(K, V).
13331333
#' @param numPartitions Number of partitions to create.
13341334
#' @return An RDD where each element is list(K, list(V))
13351335
#' @seealso reduceByKey
@@ -1338,7 +1338,7 @@ setMethod("partitionBy",
13381338
#' @examples
13391339
#'\dontrun{
13401340
#' sc <- sparkR.init()
1341-
#' pairs <- list(c(1, 2), c(1.1, 3), c(1, 4))
1341+
#' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
13421342
#' rdd <- parallelize(sc, pairs)
13431343
#' parts <- groupByKey(rdd, 2L)
13441344
#' grouped <- collect(parts)
@@ -1384,11 +1384,11 @@ setMethod("groupByKey",
13841384

13851385
#' Merge values by key
13861386
#'
1387-
#' This function operates on RDDs where every element is of the form list(K, V).
1387+
#' This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
13881388
#' and merges the values for each key using an associative reduce function.
13891389
#'
13901390
#' @param rdd The RDD to reduce by key. Should be an RDD where each element is
1391-
#' list(K, V).
1391+
#' list(K, V) or c(K, V).
13921392
#' @param combineFunc The associative reduce function to use.
13931393
#' @param numPartitions Number of partitions to create.
13941394
#' @return An RDD where each element is list(K, V') where V' is the merged
@@ -1399,7 +1399,7 @@ setMethod("groupByKey",
13991399
#' @examples
14001400
#'\dontrun{
14011401
#' sc <- sparkR.init()
1402-
#' pairs <- list(c(1, 2), c(1.1, 3), c(1, 4))
1402+
#' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
14031403
#' rdd <- parallelize(sc, pairs)
14041404
#' parts <- reduceByKey(rdd, "+", 2L)
14051405
#' reduced <- collect(parts)
@@ -1456,7 +1456,7 @@ setMethod("reduceByKey",
14561456
#' }
14571457
#'
14581458
#' @param rdd The RDD to combine. Should be an RDD where each element is
1459-
#' list(K, V).
1459+
#' list(K, V) or c(K, V).
14601460
#' @param createCombiner Create a combiner (C) given a value (V)
14611461
#' @param mergeValue Merge the given value (V) with an existing combiner (C)
14621462
#' @param mergeCombiners Merge two combiners and return a new combiner
@@ -1469,7 +1469,7 @@ setMethod("reduceByKey",
14691469
#' @examples
14701470
#'\dontrun{
14711471
#' sc <- sparkR.init()
1472-
#' pairs <- list(c(1, 2), c(1.1, 3), c(1, 4))
1472+
#' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
14731473
#' rdd <- parallelize(sc, pairs)
14741474
#' parts <- combineByKey(rdd, function(x) { x }, "+", "+", 2L)
14751475
#' combined <- collect(parts)

pkg/man/combineByKey.Rd

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
% Generated by roxygen2 (4.0.2): do not edit by hand
1+
% Generated by roxygen2 (4.1.0): do not edit by hand
2+
% Please edit documentation in R/RDD.R
23
\docType{methods}
34
\name{combineByKey}
45
\alias{combineByKey}
@@ -12,7 +13,7 @@ combineByKey(rdd, createCombiner, mergeValue, mergeCombiners, numPartitions)
1213
}
1314
\arguments{
1415
\item{rdd}{The RDD to combine. Should be an RDD where each element is
15-
list(K, V).}
16+
list(K, V) or c(K, V).}
1617

1718
\item{createCombiner}{Create a combiner (C) given a value (V)}
1819

@@ -41,7 +42,7 @@ Users provide three functions:
4142
\examples{
4243
\dontrun{
4344
sc <- sparkR.init()
44-
pairs <- list(c(1, 2), c(1.1, 3), c(1, 4))
45+
pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
4546
rdd <- parallelize(sc, pairs)
4647
parts <- combineByKey(rdd, function(x) { x }, "+", "+", 2L)
4748
combined <- collect(parts)

pkg/man/groupByKey.Rd

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
% Generated by roxygen2 (4.0.2): do not edit by hand
1+
% Generated by roxygen2 (4.1.0): do not edit by hand
2+
% Please edit documentation in R/RDD.R
23
\docType{methods}
34
\name{groupByKey}
45
\alias{groupByKey}
@@ -11,21 +12,21 @@ groupByKey(rdd, numPartitions)
1112
}
1213
\arguments{
1314
\item{rdd}{The RDD to group. Should be an RDD where each element is
14-
list(K, V).}
15+
list(K, V) or c(K, V).}
1516

1617
\item{numPartitions}{Number of partitions to create.}
1718
}
1819
\value{
1920
An RDD where each element is list(K, list(V))
2021
}
2122
\description{
22-
This function operates on RDDs where every element is of the form list(K, V).
23+
This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
2324
and group values for each key in the RDD into a single sequence.
2425
}
2526
\examples{
2627
\dontrun{
2728
sc <- sparkR.init()
28-
pairs <- list(c(1, 2), c(1.1, 3), c(1, 4))
29+
pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
2930
rdd <- parallelize(sc, pairs)
3031
parts <- groupByKey(rdd, 2L)
3132
grouped <- collect(parts)

pkg/man/partitionBy.Rd

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
% Generated by roxygen2 (4.0.2): do not edit by hand
1+
% Generated by roxygen2 (4.1.0): do not edit by hand
2+
% Please edit documentation in R/RDD.R
23
\docType{methods}
34
\name{partitionBy}
45
\alias{partitionBy}
@@ -12,7 +13,7 @@ partitionBy(rdd, numPartitions, ...)
1213
}
1314
\arguments{
1415
\item{rdd}{The RDD to partition. Should be an RDD where each element is
15-
list(K, V).}
16+
list(K, V) or c(K, V).}
1617

1718
\item{numPartitions}{Number of partitions to create.}
1819

@@ -25,17 +26,17 @@ function if not provided}
2526
An RDD partitioned using the specified partitioner.
2627
}
2728
\description{
28-
This function operates on RDDs where every element is of the form list(K, V).
29+
This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
2930
For each element of this RDD, the partitioner is used to compute a hash
3031
function and the RDD is partitioned using this hash value.
3132
}
3233
\examples{
3334
\dontrun{
3435
sc <- sparkR.init()
35-
pairs <- list(c(1, 2), c(1.1, 3), c(1, 4))
36+
pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
3637
rdd <- parallelize(sc, pairs)
3738
parts <- partitionBy(rdd, 2L)
38-
collectPartition(parts, 0L) # First partition should contain c(1,2) and c(1,3)
39+
collectPartition(parts, 0L) # First partition should contain list(1, 2) and list(1, 4)
3940
}
4041
}
4142

pkg/man/reduceByKey.Rd

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
% Generated by roxygen2 (4.0.2): do not edit by hand
1+
% Generated by roxygen2 (4.1.0): do not edit by hand
2+
% Please edit documentation in R/RDD.R
23
\docType{methods}
34
\name{reduceByKey}
45
\alias{reduceByKey}
@@ -12,7 +13,7 @@ reduceByKey(rdd, combineFunc, numPartitions)
1213
}
1314
\arguments{
1415
\item{rdd}{The RDD to reduce by key. Should be an RDD where each element is
15-
list(K, V).}
16+
list(K, V) or c(K, V).}
1617

1718
\item{combineFunc}{The associative reduce function to use.}
1819

@@ -23,13 +24,13 @@ An RDD where each element is list(K, V') where V' is the merged
2324
value
2425
}
2526
\description{
26-
This function operates on RDDs where every element is of the form list(K, V).
27+
This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
2728
and merges the values for each key using an associative reduce function.
2829
}
2930
\examples{
3031
\dontrun{
3132
sc <- sparkR.init()
32-
pairs <- list(c(1, 2), c(1.1, 3), c(1, 4))
33+
pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
3334
rdd <- parallelize(sc, pairs)
3435
parts <- reduceByKey(rdd, "+", 2L)
3536
reduced <- collect(parts)

0 commit comments

Comments
 (0)