Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/apache/spark into sparkr-…
Browse files Browse the repository at this point in the history
…cran-changes
  • Loading branch information
shivaram committed Jul 14, 2016
2 parents 3299242 + 91575ca commit e4ad2ec
Show file tree
Hide file tree
Showing 265 changed files with 4,085 additions and 2,509 deletions.
6 changes: 2 additions & 4 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -341,9 +341,8 @@ export("partitionBy",
"rowsBetween",
"rangeBetween")

export("window.partitionBy",
"window.orderBy")

export("windowPartitionBy",
"windowOrderBy")

S3method(print, jobj)
S3method(print, structField)
Expand All @@ -353,4 +352,3 @@ S3method(structField, character)
S3method(structField, jobj)
S3method(structType, jobj)
S3method(structType, structField)
# window.orderBy window.partitionBy
4 changes: 3 additions & 1 deletion R/pkg/R/SQLContext.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ getInternalType <- function(x) {
#' @return whatever the target returns
#' @noRd
dispatchFunc <- function(newFuncSig, x, ...) {
funcName <- as.character(sys.call(sys.parent())[[1]])
# When called with SparkR::createDataFrame, sys.call()[[1]] returns c(::, SparkR, createDataFrame)
callsite <- as.character(sys.call(sys.parent())[[1]])
funcName <- callsite[[length(callsite)]]
f <- get(paste0(funcName, ".default"))
# Strip sqlContext from list of parameters and then pass the rest along.
contextNames <- c("org.apache.spark.sql.SQLContext",
Expand Down
4 changes: 2 additions & 2 deletions R/pkg/R/WindowSpec.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ NULL

#' S4 class that represents a WindowSpec
#'
#' WindowSpec can be created by using window.partitionBy() or window.orderBy()
#' WindowSpec can be created by using windowPartitionBy() or windowOrderBy()
#'
#' @rdname WindowSpec
#' @seealso \link{window.partitionBy}, \link{window.orderBy}
#' @seealso \link{windowPartitionBy}, \link{windowOrderBy}
#'
#' @param sws A Java object reference to the backing Scala WindowSpec
#' @export
Expand Down
10 changes: 4 additions & 6 deletions R/pkg/R/generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -779,13 +779,13 @@ setGeneric("rowsBetween", function(x, start, end) { standardGeneric("rowsBetween
#' @export
setGeneric("rangeBetween", function(x, start, end) { standardGeneric("rangeBetween") })

#' @rdname window.partitionBy
#' @rdname windowPartitionBy
#' @export
setGeneric("window.partitionBy", function(x, ...) { standardGeneric("window.partitionBy") })
setGeneric("windowPartitionBy", function(col, ...) { standardGeneric("windowPartitionBy") })

#' @rdname window.orderBy
#' @rdname windowOrderBy
#' @export
setGeneric("window.orderBy", function(x, ...) { standardGeneric("window.orderBy") })
setGeneric("windowOrderBy", function(col, ...) { standardGeneric("windowOrderBy") })

###################### Expression Function Methods ##########################

Expand Down Expand Up @@ -1255,7 +1255,6 @@ setGeneric("spark.glm", function(data, formula, ...) { standardGeneric("spark.gl
#' @export
setGeneric("glm")

#' predict
#' @rdname predict
#' @export
setGeneric("predict", function(object, ...) { standardGeneric("predict") })
Expand All @@ -1280,7 +1279,6 @@ setGeneric("spark.naiveBayes", function(data, formula, ...) { standardGeneric("s
#' @export
setGeneric("spark.survreg", function(data, formula, ...) { standardGeneric("spark.survreg") })

#' write.ml
#' @rdname write.ml
#' @export
setGeneric("write.ml", function(object, path, ...) { standardGeneric("write.ml") })
44 changes: 35 additions & 9 deletions R/pkg/R/mllib.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,34 @@ setClass("AFTSurvivalRegressionModel", representation(jobj = "jobj"))
#' @note KMeansModel since 2.0.0
setClass("KMeansModel", representation(jobj = "jobj"))

#' Saves the MLlib model to the input path
#'
#' Saves the MLlib model to the input path. For more information, see the specific
#' MLlib model below.
#' @rdname write.ml
#' @name write.ml
#' @export
#' @seealso \link{spark.glm}, \link{glm}
#' @seealso \link{spark.kmeans}, \link{spark.naiveBayes}, \link{spark.survreg}
#' @seealso \link{read.ml}
NULL

#' Makes predictions from a MLlib model
#'
#' Makes predictions from a MLlib model. For more information, see the specific
#' MLlib model below.
#' @rdname predict
#' @name predict
#' @export
#' @seealso \link{spark.glm}, \link{glm}
#' @seealso \link{spark.kmeans}, \link{spark.naiveBayes}, \link{spark.survreg}
NULL

#' Generalized Linear Models
#'
#' Fits generalized linear model against a Spark DataFrame. Users can print, make predictions on the
#' produced model and save the model to the input path.
#' Fits generalized linear model against a Spark DataFrame.
#' Users can call \code{summary} to print a summary of the fitted model, \code{predict} to make
#' predictions on new data, and \code{write.ml}/\code{read.ml} to save/load fitted models.
#'
#' @param data SparkDataFrame for training.
#' @param formula A symbolic description of the model to be fitted. Currently only a few formula
Expand Down Expand Up @@ -146,7 +170,7 @@ setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDat
})

# Returns the summary of a model produced by glm() or spark.glm(), similarly to R's summary().
#'

#' @param object A fitted generalized linear model
#' @return \code{summary} returns a summary object of the fitted model, a list of components
#' including at least the coefficients, null/residual deviance, null/residual degrees
Expand Down Expand Up @@ -186,7 +210,7 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"),
})

# Prints the summary of GeneralizedLinearRegressionModel
#'

#' @rdname spark.glm
#' @param x Summary object of fitted generalized linear model returned by \code{summary} function
#' @export
Expand Down Expand Up @@ -271,7 +295,8 @@ setMethod("summary", signature(object = "NaiveBayesModel"),
#' K-Means Clustering Model
#'
#' Fits a k-means clustering model against a Spark DataFrame, similarly to R's kmeans().
#' Users can print, make predictions on the produced model and save the model to the input path.
#' Users can call \code{summary} to print a summary of the fitted model, \code{predict} to make
#' predictions on new data, and \code{write.ml}/\code{read.ml} to save/load fitted models.
#'
#' @param data SparkDataFrame for training
#' @param formula A symbolic description of the model to be fitted. Currently only a few formula
Expand Down Expand Up @@ -345,7 +370,7 @@ setMethod("fitted", signature(object = "KMeansModel"),
})

# Get the summary of a k-means model
#'

#' @param object A fitted k-means model
#' @return \code{summary} returns the model's coefficients, size and cluster
#' @rdname spark.kmeans
Expand All @@ -372,7 +397,7 @@ setMethod("summary", signature(object = "KMeansModel"),
})

# Predicted values based on a k-means model
#'

#' @return \code{predict} returns the predicted values based on a k-means model
#' @rdname spark.kmeans
#' @export
Expand Down Expand Up @@ -466,7 +491,7 @@ setMethod("write.ml", signature(object = "AFTSurvivalRegressionModel", path = "c
})

# Saves the generalized linear model to the input path.
#'

#' @param path The directory where the model is saved
#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
#' which means throw exception if the output path exists.
Expand All @@ -484,7 +509,7 @@ setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", pat
})

# Save fitted MLlib model to the input path
#'

#' @param path The directory where the model is saved
#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
#' which means throw exception if the output path exists.
Expand All @@ -508,6 +533,7 @@ setMethod("write.ml", signature(object = "KMeansModel", path = "character"),
#' @rdname read.ml
#' @name read.ml
#' @export
#' @seealso \link{write.ml}
#' @examples
#' \dontrun{
#' path <- "path/to/model"
Expand Down
84 changes: 47 additions & 37 deletions R/pkg/R/window.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,42 +17,47 @@

# window.R - Utility functions for defining window in DataFrames

#' window.partitionBy
#' windowPartitionBy
#'
#' Creates a WindowSpec with the partitioning defined.
#'
#' @rdname window.partitionBy
#' @name window.partitionBy
#' @aliases window.partitionBy,character-method
#' @param col A column name or Column by which rows are partitioned to
#' windows.
#' @param ... Optional column names or Columns in addition to col, by
#' which rows are partitioned to windows.
#'
#' @rdname windowPartitionBy
#' @name windowPartitionBy
#' @aliases windowPartitionBy,character-method
#' @export
#' @examples
#' \dontrun{
#' ws <- window.partitionBy("key1", "key2")
#' ws <- windowPartitionBy("key1", "key2")
#' df1 <- select(df, over(lead("value", 1), ws))
#'
#' ws <- window.partitionBy(df$key1, df$key2)
#' ws <- windowPartitionBy(df$key1, df$key2)
#' df1 <- select(df, over(lead("value", 1), ws))
#' }
#' @note window.partitionBy(character) since 2.0.0
setMethod("window.partitionBy",
signature(x = "character"),
function(x, ...) {
#' @note windowPartitionBy(character) since 2.0.0
setMethod("windowPartitionBy",
signature(col = "character"),
function(col, ...) {
windowSpec(
callJStatic("org.apache.spark.sql.expressions.Window",
"partitionBy",
x,
list(...)))
})

#' @rdname window.partitionBy
#' @name window.partitionBy
#' @aliases window.partitionBy,Column-method
#' @rdname windowPartitionBy
#' @name windowPartitionBy
#' @aliases windowPartitionBy,Column-method
#' @export
#' @note window.partitionBy(Column) since 2.0.0
setMethod("window.partitionBy",
signature(x = "Column"),
function(x, ...) {
jcols <- lapply(list(x, ...), function(c) {
#' @note windowPartitionBy(Column) since 2.0.0
setMethod("windowPartitionBy",
signature(col = "Column"),
function(col, ...) {
jcols <- lapply(list(col, ...), function(c) {
c@jc
})
windowSpec(
Expand All @@ -61,42 +66,47 @@ setMethod("window.partitionBy",
jcols))
})

#' window.orderBy
#' windowOrderBy
#'
#' Creates a WindowSpec with the ordering defined.
#'
#' @rdname window.orderBy
#' @name window.orderBy
#' @aliases window.orderBy,character-method
#' @param col A column name or Column by which rows are ordered within
#' windows.
#' @param ... Optional column names or Columns in addition to col, by
#' which rows are ordered within windows.
#'
#' @rdname windowOrderBy
#' @name windowOrderBy
#' @aliases windowOrderBy,character-method
#' @export
#' @examples
#' \dontrun{
#' ws <- window.orderBy("key1", "key2")
#' ws <- windowOrderBy("key1", "key2")
#' df1 <- select(df, over(lead("value", 1), ws))
#'
#' ws <- window.orderBy(df$key1, df$key2)
#' ws <- windowOrderBy(df$key1, df$key2)
#' df1 <- select(df, over(lead("value", 1), ws))
#' }
#' @note window.orderBy(character) since 2.0.0
setMethod("window.orderBy",
signature(x = "character"),
function(x, ...) {
#' @note windowOrderBy(character) since 2.0.0
setMethod("windowOrderBy",
signature(col = "character"),
function(col, ...) {
windowSpec(
callJStatic("org.apache.spark.sql.expressions.Window",
"orderBy",
x,
col,
list(...)))
})

#' @rdname window.orderBy
#' @name window.orderBy
#' @aliases window.orderBy,Column-method
#' @rdname windowOrderBy
#' @name windowOrderBy
#' @aliases windowOrderBy,Column-method
#' @export
#' @note window.orderBy(Column) since 2.0.0
setMethod("window.orderBy",
signature(x = "Column"),
function(x, ...) {
jcols <- lapply(list(x, ...), function(c) {
#' @note windowOrderBy(Column) since 2.0.0
setMethod("windowOrderBy",
signature(col = "Column"),
function(col, ...) {
jcols <- lapply(list(col, ...), function(c) {
c@jc
})
windowSpec(
Expand Down
2 changes: 1 addition & 1 deletion R/pkg/inst/tests/testthat/jarTest.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#
library(SparkR)

sparkSession <- sparkR.session()
sparkR.session()

helloTest <- SparkR:::callJStatic("sparkR.test.hello",
"helloWorld",
Expand Down
2 changes: 1 addition & 1 deletion R/pkg/inst/tests/testthat/packageInAJarTest.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
library(SparkR)
library(sparkPackageTest)

sparkSession <- sparkR.session()
sparkR.session()

run1 <- myfunc(5L)

Expand Down
13 changes: 7 additions & 6 deletions R/pkg/inst/tests/testthat/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ test_that("read csv as DataFrame", {
"Empty,Dummy,Placeholder")
writeLines(mockLinesCsv, csvPath)

df2 <- read.df(csvPath, "csv", header = "true", inferSchema = "true", na.string = "Empty")
df2 <- read.df(csvPath, "csv", header = "true", inferSchema = "true", na.strings = "Empty")
expect_equal(count(df2), 4)
withoutna2 <- na.omit(df2, how = "any", cols = "year")
expect_equal(count(withoutna2), 3)
Expand Down Expand Up @@ -2376,25 +2376,25 @@ test_that("gapply() and gapplyCollect() on a DataFrame", {
test_that("Window functions on a DataFrame", {
df <- createDataFrame(list(list(1L, "1"), list(2L, "2"), list(1L, "1"), list(2L, "2")),
schema = c("key", "value"))
ws <- orderBy(window.partitionBy("key"), "value")
ws <- orderBy(windowPartitionBy("key"), "value")
result <- collect(select(df, over(lead("key", 1), ws), over(lead("value", 1), ws)))
names(result) <- c("key", "value")
expected <- data.frame(key = c(1L, NA, 2L, NA),
value = c("1", NA, "2", NA),
stringsAsFactors = FALSE)
expect_equal(result, expected)

ws <- orderBy(window.partitionBy(df$key), df$value)
ws <- orderBy(windowPartitionBy(df$key), df$value)
result <- collect(select(df, over(lead("key", 1), ws), over(lead("value", 1), ws)))
names(result) <- c("key", "value")
expect_equal(result, expected)

ws <- partitionBy(window.orderBy("value"), "key")
ws <- partitionBy(windowOrderBy("value"), "key")
result <- collect(select(df, over(lead("key", 1), ws), over(lead("value", 1), ws)))
names(result) <- c("key", "value")
expect_equal(result, expected)

ws <- partitionBy(window.orderBy(df$value), df$key)
ws <- partitionBy(windowOrderBy(df$value), df$key)
result <- collect(select(df, over(lead("key", 1), ws), over(lead("value", 1), ws)))
names(result) <- c("key", "value")
expect_equal(result, expected)
Expand All @@ -2405,7 +2405,8 @@ test_that("createDataFrame sqlContext parameter backward compatibility", {
a <- 1:3
b <- c("a", "b", "c")
ldf <- data.frame(a, b)
df <- suppressWarnings(createDataFrame(sqlContext, ldf))
# Call function with namespace :: operator - SPARK-16538
df <- suppressWarnings(SparkR::createDataFrame(sqlContext, ldf))
expect_equal(columns(df), c("a", "b"))
expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
expect_equal(count(df), 3)
Expand Down
5 changes: 5 additions & 0 deletions common/network-shuffle/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@
<artifactId>jackson-databind</artifactId>
</dependency>

<dependency>
<groupId>io.dropwizard.metrics</groupId>
<artifactId>metrics-core</artifactId>
</dependency>

<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
Expand Down
Loading

0 comments on commit e4ad2ec

Please sign in to comment.