Skip to content

Commit 6813860

Browse files
committed
Fill the docs for DataFrame API in SparkR
1 parent 857220f commit 6813860

File tree

2 files changed

+81
-77
lines changed

2 files changed

+81
-77
lines changed

R/pkg/R/DataFrame.R

+49-45
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ setMethod("initialize", "DataFrame", function(.Object, sdf, isCached) {
4545

4646
#' @rdname DataFrame
4747
#' @export
48+
#'
49+
#' @param sdf A Java object reference to the backing Scala DataFrame
50+
#' @param isCached TRUE if the dataFrame is cached
4851
dataFrame <- function(sdf, isCached = FALSE) {
4952
new("DataFrame", sdf, isCached)
5053
}
@@ -244,7 +247,7 @@ setMethod("columns",
244247
})
245248

246249
#' @rdname columns
247-
#' @export
250+
#' @aliases names,DataFrame,function-method
248251
setMethod("names",
249252
signature(x = "DataFrame"),
250253
function(x) {
@@ -399,23 +402,23 @@ setMethod("repartition",
399402
dataFrame(sdf)
400403
})
401404

402-
#' toJSON
403-
#'
404-
#' Convert the rows of a DataFrame into JSON objects and return an RDD where
405-
#' each element contains a JSON string.
406-
#'
407-
#' @param x A SparkSQL DataFrame
408-
#' @return A StringRRDD of JSON objects
409-
#' @rdname tojson
410-
#' @export
411-
#' @examples
412-
#'\dontrun{
413-
#' sc <- sparkR.init()
414-
#' sqlCtx <- sparkRSQL.init(sc)
415-
#' path <- "path/to/file.json"
416-
#' df <- jsonFile(sqlCtx, path)
417-
#' newRDD <- toJSON(df)
418-
#'}
405+
# toJSON
406+
#
407+
# Convert the rows of a DataFrame into JSON objects and return an RDD where
408+
# each element contains a JSON string.
409+
#
410+
#@param x A SparkSQL DataFrame
411+
# @return A StringRRDD of JSON objects
412+
# @rdname tojson
413+
# @export
414+
# @examples
415+
#\dontrun{
416+
# sc <- sparkR.init()
417+
# sqlCtx <- sparkRSQL.init(sc)
418+
# path <- "path/to/file.json"
419+
# df <- jsonFile(sqlCtx, path)
420+
# newRDD <- toJSON(df)
421+
#}
419422
setMethod("toJSON",
420423
signature(x = "DataFrame"),
421424
function(x) {
@@ -578,8 +581,8 @@ setMethod("limit",
578581
dataFrame(res)
579582
})
580583

581-
# Take the first NUM rows of a DataFrame and return a the results as a data.frame
582-
584+
#' Take the first NUM rows of a DataFrame and return a the results as a data.frame
585+
#'
583586
#' @rdname take
584587
#' @export
585588
#' @examples
@@ -644,22 +647,22 @@ setMethod("first",
644647
take(x, 1)
645648
})
646649

647-
#' toRDD()
648-
#'
649-
#' Converts a Spark DataFrame to an RDD while preserving column names.
650-
#'
651-
#' @param x A Spark DataFrame
652-
#'
653-
#' @rdname DataFrame
654-
#' @export
655-
#' @examples
656-
#'\dontrun{
657-
#' sc <- sparkR.init()
658-
#' sqlCtx <- sparkRSQL.init(sc)
659-
#' path <- "path/to/file.json"
660-
#' df <- jsonFile(sqlCtx, path)
661-
#' rdd <- toRDD(df)
662-
#' }
650+
# toRDD()
651+
#
652+
# Converts a Spark DataFrame to an RDD while preserving column names.
653+
#
654+
# @param x A Spark DataFrame
655+
#
656+
# @rdname DataFrame
657+
# @export
658+
# @examples
659+
#\dontrun{
660+
# sc <- sparkR.init()
661+
# sqlCtx <- sparkRSQL.init(sc)
662+
# path <- "path/to/file.json"
663+
# df <- jsonFile(sqlCtx, path)
664+
# rdd <- toRDD(df)
665+
# }
663666
setMethod("toRDD",
664667
signature(x = "DataFrame"),
665668
function(x) {
@@ -706,6 +709,7 @@ setMethod("groupBy",
706709
#'
707710
#' Compute aggregates by specifying a list of columns
708711
#'
712+
#' @param x a DataFrame
709713
#' @rdname DataFrame
710714
#' @export
711715
setMethod("agg",
@@ -721,53 +725,53 @@ setMethod("agg",
721725
# the requested map function. #
722726
###################################################################################
723727

724-
#' @rdname lapply
728+
# @rdname lapply
725729
setMethod("lapply",
726730
signature(X = "DataFrame", FUN = "function"),
727731
function(X, FUN) {
728732
rdd <- toRDD(X)
729733
lapply(rdd, FUN)
730734
})
731735

732-
#' @rdname lapply
736+
# @rdname lapply
733737
setMethod("map",
734738
signature(X = "DataFrame", FUN = "function"),
735739
function(X, FUN) {
736740
lapply(X, FUN)
737741
})
738742

739-
#' @rdname flatMap
743+
# @rdname flatMap
740744
setMethod("flatMap",
741745
signature(X = "DataFrame", FUN = "function"),
742746
function(X, FUN) {
743747
rdd <- toRDD(X)
744748
flatMap(rdd, FUN)
745749
})
746750

747-
#' @rdname lapplyPartition
751+
# @rdname lapplyPartition
748752
setMethod("lapplyPartition",
749753
signature(X = "DataFrame", FUN = "function"),
750754
function(X, FUN) {
751755
rdd <- toRDD(X)
752756
lapplyPartition(rdd, FUN)
753757
})
754758

755-
#' @rdname lapplyPartition
759+
# @rdname lapplyPartition
756760
setMethod("mapPartitions",
757761
signature(X = "DataFrame", FUN = "function"),
758762
function(X, FUN) {
759763
lapplyPartition(X, FUN)
760764
})
761765

762-
#' @rdname foreach
766+
# @rdname foreach
763767
setMethod("foreach",
764768
signature(x = "DataFrame", func = "function"),
765769
function(x, func) {
766770
rdd <- toRDD(x)
767771
foreach(rdd, func)
768772
})
769773

770-
#' @rdname foreach
774+
# @rdname foreach
771775
setMethod("foreachPartition",
772776
signature(x = "DataFrame", func = "function"),
773777
function(x, func) {
@@ -1009,7 +1013,7 @@ setMethod("sortDF",
10091013
})
10101014

10111015
#' @rdname sortDF
1012-
#' @export
1016+
#' @aliases orderBy,DataFrame,function-method
10131017
setMethod("orderBy",
10141018
signature(x = "DataFrame", col = "characterOrColumn"),
10151019
function(x, col) {
@@ -1046,7 +1050,7 @@ setMethod("filter",
10461050
})
10471051

10481052
#' @rdname filter
1049-
#' @export
1053+
#' @aliases where,DataFrame,function-method
10501054
setMethod("where",
10511055
signature(x = "DataFrame", condition = "characterOrColumn"),
10521056
function(x, condition) {

R/pkg/R/SQLContext.R

+32-32
Original file line numberDiff line numberDiff line change
@@ -150,21 +150,21 @@ createDataFrame <- function(sqlCtx, data, schema = NULL, samplingRatio = 1.0) {
150150
dataFrame(sdf)
151151
}
152152

153-
#' toDF
154-
#'
155-
#' Converts an RDD to a DataFrame by infer the types.
156-
#'
157-
#' @param x An RDD
158-
#'
159-
#' @rdname DataFrame
160-
#' @export
161-
#' @examples
162-
#'\dontrun{
163-
#' sc <- sparkR.init()
164-
#' sqlCtx <- sparkRSQL.init(sc)
165-
#' rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x)))
166-
#' df <- toDF(rdd)
167-
#' }
153+
# toDF
154+
#
155+
# Converts an RDD to a DataFrame by infer the types.
156+
#
157+
# @param x An RDD
158+
#
159+
# @rdname DataFrame
160+
# @export
161+
# @examples
162+
#\dontrun{
163+
# sc <- sparkR.init()
164+
# sqlCtx <- sparkRSQL.init(sc)
165+
# rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x)))
166+
# df <- toDF(rdd)
167+
# }
168168

169169
setGeneric("toDF", function(x, ...) { standardGeneric("toDF") })
170170

@@ -207,23 +207,23 @@ jsonFile <- function(sqlCtx, path) {
207207
}
208208

209209

210-
#' JSON RDD
211-
#'
212-
#' Loads an RDD storing one JSON object per string as a DataFrame.
213-
#'
214-
#' @param sqlCtx SQLContext to use
215-
#' @param rdd An RDD of JSON string
216-
#' @param schema A StructType object to use as schema
217-
#' @param samplingRatio The ratio of simpling used to infer the schema
218-
#' @return A DataFrame
219-
#' @export
220-
#' @examples
221-
#'\dontrun{
222-
#' sc <- sparkR.init()
223-
#' sqlCtx <- sparkRSQL.init(sc)
224-
#' rdd <- texFile(sc, "path/to/json")
225-
#' df <- jsonRDD(sqlCtx, rdd)
226-
#' }
210+
# JSON RDD
211+
#
212+
# Loads an RDD storing one JSON object per string as a DataFrame.
213+
#
214+
# @param sqlCtx SQLContext to use
215+
# @param rdd An RDD of JSON string
216+
# @param schema A StructType object to use as schema
217+
# @param samplingRatio The ratio of simpling used to infer the schema
218+
# @return A DataFrame
219+
# @export
220+
# @examples
221+
#\dontrun{
222+
# sc <- sparkR.init()
223+
# sqlCtx <- sparkRSQL.init(sc)
224+
# rdd <- texFile(sc, "path/to/json")
225+
# df <- jsonRDD(sqlCtx, rdd)
226+
# }
227227

228228
# TODO: support schema
229229
jsonRDD <- function(sqlCtx, rdd, schema = NULL, samplingRatio = 1.0) {

0 commit comments

Comments
 (0)