Skip to content

Commit 0fa48d1

Browse files
committed
Merge pull request apache#117 from sun-rui/keyBy
Add keyBy() to the RDD class.
2 parents d0347ce + 09083d9 commit 0fa48d1

File tree

4 files changed

+61
-0
lines changed

4 files changed

+61
-0
lines changed

pkg/NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ exportMethods(
2020
"foreachPartition",
2121
"groupByKey",
2222
"join",
23+
"keyBy",
2324
"keys",
2425
"length",
2526
"lapply",

pkg/R/RDD.R

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,6 +1036,31 @@ setMethod("takeSample", signature(rdd = "RDD", withReplacement = "logical",
10361036
sample(samples)[1:total]
10371037
})
10381038

1039+
#' Creates tuples of the elements in this RDD by applying a function.
1040+
#'
1041+
#' @param rdd The RDD.
1042+
#' @param func The function to be applied.
1043+
#' @rdname keyBy
1044+
#' @export
1045+
#' @examples
1046+
#'\dontrun{
1047+
#' sc <- sparkR.init()
1048+
#' rdd <- parallelize(sc, list(1, 2, 3))
1049+
#' collect(keyBy(rdd, function(x) { x*x })) # list(list(1, 1), list(4, 2), list(9, 3))
1050+
#'}
1051+
setGeneric("keyBy", function(rdd, func) { standardGeneric("keyBy") })
1052+
1053+
#' @rdname keyBy
1054+
#' @aliases keyBy,RDD
1055+
setMethod("keyBy",
1056+
signature(rdd = "RDD", func = "function"),
1057+
function(rdd, func) {
1058+
apply.func <- function(x) {
1059+
list(func(x), x)
1060+
}
1061+
lapply(rdd, apply.func)
1062+
})
1063+
10391064
#' Return an RDD with the keys of each tuple.
10401065
#'
10411066
#' @param rdd The RDD from which the keys of each tuple is returned.

pkg/inst/tests/test_rdd.R

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,13 @@ test_that("minimum() on RDDs", {
246246
expect_equal(min, 1)
247247
})
248248

249+
test_that("keyBy on RDDs", {
250+
func <- function(x) { x*x }
251+
keys <- keyBy(rdd, func)
252+
actual <- collect(keys)
253+
expect_equal(actual, lapply(nums, function(x) { list(func(x), x) }))
254+
})
255+
249256
test_that("keys() on RDDs", {
250257
keys <- keys(intRdd)
251258
actual <- collect(keys)

pkg/man/keyBy.Rd

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
% Generated by roxygen2 (4.0.2): do not edit by hand
2+
\docType{methods}
3+
\name{keyBy}
4+
\alias{keyBy}
5+
\alias{keyBy,RDD}
6+
\alias{keyBy,RDD,function-method}
7+
\title{Creates tuples of the elements in this RDD by applying a function.}
8+
\usage{
9+
keyBy(rdd, func)
10+
11+
\S4method{keyBy}{RDD,`function`}(rdd, func)
12+
}
13+
\arguments{
14+
\item{rdd}{The RDD.}
15+
16+
\item{func}{The function to be applied.}
17+
}
18+
\description{
19+
Creates tuples of the elements in this RDD by applying a function.
20+
}
21+
\examples{
22+
\dontrun{
23+
sc <- sparkR.init()
24+
rdd <- parallelize(sc, list(1, 2, 3))
25+
collect(keyBy(rdd, function(x) { x*x })) # list(list(1, 1), list(4, 2), list(9, 3))
26+
}
27+
}
28+

0 commit comments

Comments
 (0)