apache
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 1 addition & 1 deletion b/‎CONTRIBUTING.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/create-docs.sh‎
Lines changed: 9 additions & 2 deletions b/‎R/create-docs.sh‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎R/pkg/R/DataFrame.R‎
Lines changed: 1 addition & 0 deletions b/‎R/pkg/R/DataFrame.R‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/pkg/R/mllib.R‎
Lines changed: 4 additions & 4 deletions b/‎R/pkg/R/mllib.R‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎R/pkg/R/sparkR.R‎
Lines changed: 2 additions & 2 deletions b/‎R/pkg/R/sparkR.R‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎R/pkg/inst/tests/testthat/test_mllib.R‎
Lines changed: 2 additions & 0 deletions b/‎R/pkg/inst/tests/testthat/test_mllib.R‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎R/pkg/inst/tests/testthat/test_sparkSQL.R‎
Lines changed: 11 additions & 1 deletion b/‎R/pkg/inst/tests/testthat/test_sparkSQL.R‎
Lines changed: 11 additions & 1 deletion
@@ -6,7 +6,7 @@ It lists steps that are required before creating a PR. In particular, consider:
 
 - Is the change important and ready enough to ask the community to spend time reviewing?
 - Have you searched for existing, related JIRAs and pull requests?
-- Is this a new feature that can stand alone as a package on http://spark-packages.org ?
+- Is this a new feature that can stand alone as a [third party project](https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects) ?
 - Is the change being proposed clearly explained and motivated?
 
 When you contribute code, you affirm that the contribution is your original work and that you 
 
@@ -17,11 +17,13 @@
 # limitations under the License.
 #
 
-# Script to create API docs for SparkR
-# This requires `devtools` and `knitr` to be installed on the machine.
+# Script to create API docs and vignettes for SparkR
+# This requires `devtools`, `knitr` and `rmarkdown` to be installed on the machine.
 
 # After running this script the html docs can be found in 
 # $SPARK_HOME/R/pkg/html
+# The vignettes can be found in
+# $SPARK_HOME/R/pkg/vignettes/sparkr_vignettes.html
 
 set -o pipefail
 set -e
@@ -43,4 +45,9 @@ Rscript -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knit
 
 popd
 
+# render creates SparkR vignettes
+Rscript -e 'library(rmarkdown); paths <- .libPaths(); .libPaths(c("lib", paths)); Sys.setenv(SPARK_HOME=tools::file_path_as_absolute("..")); render("pkg/vignettes/sparkr-vignettes.Rmd"); .libPaths(paths)'
+
+find pkg/vignettes/. -not -name '.' -not -name '*.Rmd' -not -name '*.md' -not -name '*.pdf' -not -name '*.html' -delete
+
 popd
@@ -2635,6 +2635,7 @@ setMethod("write.df",
             write <- callJMethod(df@sdf, "write")
             write <- callJMethod(write, "format", source)
             write <- callJMethod(write, "mode", jmode)
+            write <- callJMethod(write, "options", options)
             write <- callJMethod(write, "save", path)
           })
 
 
@@ -720,8 +720,9 @@ setMethod("predict", signature(object = "MultilayerPerceptronClassificationModel
 # Returns the summary of a Multilayer Perceptron Classification Model produced by \code{spark.mlp}
 
 #' @param object a Multilayer Perceptron Classification Model fitted by \code{spark.mlp}
-#' @return \code{summary} returns a list containing \code{layers}, the label distribution, and
-#'         \code{tables}, conditional probabilities given the target label.
+#' @return \code{summary} returns a list containing \code{labelCount}, \code{layers}, and
+#'         \code{weights}. For \code{weights}, it is a numeric vector with length equal to
+#'         the expected given the architecture (i.e., for 8-10-2 network, 100 connection weights).
 #' @rdname spark.mlp
 #' @export
 #' @aliases summary,MultilayerPerceptronClassificationModel-method
@@ -732,7 +733,6 @@ setMethod("summary", signature(object = "MultilayerPerceptronClassificationModel
             labelCount <- callJMethod(jobj, "labelCount")
             layers <- unlist(callJMethod(jobj, "layers"))
             weights <- callJMethod(jobj, "weights")
-            weights <- matrix(weights, nrow = length(weights))
             list(labelCount = labelCount, layers = layers, weights = weights)
           })
 
@@ -1241,7 +1241,7 @@ setMethod("predict", signature(object = "GaussianMixtureModel"),
 #' @note spark.als since 2.1.0
 setMethod("spark.als", signature(data = "SparkDataFrame"),
           function(data, ratingCol = "rating", userCol = "user", itemCol = "item",
-                   rank = 10, reg = 1.0, maxIter = 10, nonnegative = FALSE,
+                   rank = 10, reg = 0.1, maxIter = 10, nonnegative = FALSE,
                    implicitPrefs = FALSE, alpha = 1.0, numUserBlocks = 10, numItemBlocks = 10,
                    checkpointInterval = 10, seed = 0) {
 
 
@@ -100,7 +100,7 @@ sparkR.stop <- function() {
 #' @param sparkEnvir Named list of environment variables to set on worker nodes
 #' @param sparkExecutorEnv Named list of environment variables to be used when launching executors
 #' @param sparkJars Character vector of jar files to pass to the worker nodes
-#' @param sparkPackages Character vector of packages from spark-packages.org
+#' @param sparkPackages Character vector of package coordinates
 #' @seealso \link{sparkR.session}
 #' @rdname sparkR.init-deprecated
 #' @export
@@ -327,7 +327,7 @@ sparkRHive.init <- function(jsc = NULL) {
 #' @param sparkHome Spark Home directory.
 #' @param sparkConfig named list of Spark configuration to set on worker nodes.
 #' @param sparkJars character vector of jar files to pass to the worker nodes.
-#' @param sparkPackages character vector of packages from spark-packages.org
+#' @param sparkPackages character vector of package coordinates
 #' @param enableHiveSupport enable support for Hive, fallback if not built with Hive support; once
 #'        set, this cannot be turned off on an existing session
 #' @param ... named Spark properties passed to the method.
 
@@ -369,6 +369,8 @@ test_that("spark.mlp", {
   expect_equal(summary$labelCount, 3)
   expect_equal(summary$layers, c(4, 5, 4, 3))
   expect_equal(length(summary$weights), 64)
+  expect_equal(head(summary$weights, 5), list(-0.878743, 0.2154151, -1.16304, -0.6583214, 1.009825),
+               tolerance = 1e-6)
 
   # Test predict method
   mlpTestDF <- df
 
@@ -208,7 +208,7 @@ test_that("create DataFrame from RDD", {
   unsetHiveContext()
 })
 
-test_that("read csv as DataFrame", {
+test_that("read/write csv as DataFrame", {
   csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv")
   mockLinesCsv <- c("year,make,model,comment,blank",
                    "\"2012\",\"Tesla\",\"S\",\"No comment\",",
@@ -243,7 +243,17 @@ test_that("read csv as DataFrame", {
   expect_equal(count(withoutna2), 3)
   expect_equal(count(where(withoutna2, withoutna2$make == "Dummy")), 0)
 
+  # writing csv file
+  csvPath2 <- tempfile(pattern = "csvtest2", fileext = ".csv")
+  write.df(df2, path = csvPath2, "csv", header = "true")
+  df3 <- read.df(csvPath2, "csv", header = "true")
+  expect_equal(nrow(df3), nrow(df2))
+  expect_equal(colnames(df3), colnames(df2))
+  csv <- read.csv(file = list.files(csvPath2, pattern = "^part", full.names = T)[[1]])
+  expect_equal(colnames(df3), colnames(csv))
+
   unlink(csvPath)
+  unlink(csvPath2)
 })
 
 test_that("convert NAs to null type in DataFrames", {