Skip to content

Commit

Permalink
Merge pull request apache#104 from sun-rui/add_keys_values
Browse files Browse the repository at this point in the history
Add keys() and values() for the RDD class.
  • Loading branch information
shivaram committed Nov 10, 2014
2 parents 4a193ef + d8692e9 commit 32eb619
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 1 deletion.
4 changes: 3 additions & 1 deletion pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ exportMethods(
"filter",
"flatMap",
"groupByKey",
"keys",
"length",
"lapply",
"lapplyPartition",
Expand All @@ -33,7 +34,8 @@ exportMethods(
"takeSample",
"unionRDD",
"unpersist",
"value"
"value",
"values"
)

# S3 methods exported
Expand Down
48 changes: 48 additions & 0 deletions pkg/R/RDD.R
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,54 @@ setMethod("takeSample", signature(rdd = "RDD", withReplacement = "logical",
sample(samples)[1:total]
})

#' Return an RDD with the keys of each tuple.
#'
#' @param rdd The RDD from which the keys of each tuple is returned.
#' @rdname keys
#' @export
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' rdd <- parallelize(sc, list(list(1, 2), list(3, 4)))
#' collect(keys(rdd)) # list(1, 3)
#'}
setGeneric("keys", function(rdd) { standardGeneric("keys") })

#' @rdname keys
#' @aliases keys,RDD
setMethod("keys",
signature(rdd = "RDD"),
function(rdd) {
func <- function(x) {
x[[1]]
}
lapply(rdd, func)
})

#' Return an RDD with the values of each tuple.
#'
#' @param rdd The RDD from which the values of each tuple is returned.
#' @rdname values
#' @export
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' rdd <- parallelize(sc, list(list(1, 2), list(3, 4)))
#' collect(values(rdd)) # list(2, 4)
#'}
setGeneric("values", function(rdd) { standardGeneric("values") })

#' @rdname values
#' @aliases values,RDD
setMethod("values",
signature(rdd = "RDD"),
function(rdd) {
func <- function(x) {
x[[2]]
}
lapply(rdd, func)
})

#' Applies a function to all values of the elements, without modifying the keys.
#'
#' The same as `mapValues()' in Spark.
Expand Down
13 changes: 13 additions & 0 deletions pkg/inst/tests/test_rdd.R
Original file line number Diff line number Diff line change
Expand Up @@ -203,3 +203,16 @@ test_that("minimum() on RDDs", {
min <- minimum(rdd)
expect_equal(min, 1)
})

test_that("keys() on RDDs", {
keys <- keys(intRdd)
actual <- collect(keys)
expect_equal(actual, lapply(intPairs, function(x) { x[[1]] }))
})

test_that("values() on RDDs", {
values <- values(intRdd)
actual <- collect(values)
expect_equal(actual, lapply(intPairs, function(x) { x[[2]] }))
})

26 changes: 26 additions & 0 deletions pkg/man/keys.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
% Generated by roxygen2 (4.0.2): do not edit by hand
\docType{methods}
\name{keys}
\alias{keys}
\alias{keys,RDD}
\alias{keys,RDD-method}
\title{Return an RDD with the keys of each tuple.}
\usage{
keys(rdd)

\S4method{keys}{RDD}(rdd)
}
\arguments{
\item{rdd}{The RDD from which the keys of each tuple is returned.}
}
\description{
Return an RDD with the keys of each tuple.
}
\examples{
\dontrun{
sc <- sparkR.init()
rdd <- parallelize(sc, list(list(1, 2), list(3, 4)))
collect(keys(rdd)) # list(1, 3)
}
}

26 changes: 26 additions & 0 deletions pkg/man/values.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
% Generated by roxygen2 (4.0.2): do not edit by hand
\docType{methods}
\name{values}
\alias{values}
\alias{values,RDD}
\alias{values,RDD-method}
\title{Return an RDD with the values of each tuple.}
\usage{
values(rdd)

\S4method{values}{RDD}(rdd)
}
\arguments{
\item{rdd}{The RDD from which the values of each tuple is returned.}
}
\description{
Return an RDD with the values of each tuple.
}
\examples{
\dontrun{
sc <- sparkR.init()
rdd <- parallelize(sc, list(list(1, 2), list(3, 4)))
collect(values(rdd)) # list(2, 4)
}
}

0 comments on commit 32eb619

Please sign in to comment.