Skip to content

Commit 31b8724

Browse files
committed
Merge remote-tracking branch 'upstream/master' into tempViewCases
2 parents 4ab1b8a + 8f0c35a commit 31b8724

File tree

268 files changed

+5404
-1558
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

268 files changed

+5404
-1558
lines changed

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ It lists steps that are required before creating a PR. In particular, consider:
66

77
- Is the change important and ready enough to ask the community to spend time reviewing?
88
- Have you searched for existing, related JIRAs and pull requests?
9-
- Is this a new feature that can stand alone as a package on http://spark-packages.org ?
9+
- Is this a new feature that can stand alone as a [third party project](https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects) ?
1010
- Is the change being proposed clearly explained and motivated?
1111

1212
When you contribute code, you affirm that the contribution is your original work and that you

R/create-docs.sh

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@
1717
# limitations under the License.
1818
#
1919

20-
# Script to create API docs for SparkR
21-
# This requires `devtools` and `knitr` to be installed on the machine.
20+
# Script to create API docs and vignettes for SparkR
21+
# This requires `devtools`, `knitr` and `rmarkdown` to be installed on the machine.
2222

2323
# After running this script the html docs can be found in
2424
# $SPARK_HOME/R/pkg/html
25+
# The vignettes can be found in
26+
# $SPARK_HOME/R/pkg/vignettes/sparkr_vignettes.html
2527

2628
set -o pipefail
2729
set -e
@@ -43,4 +45,9 @@ Rscript -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knit
4345

4446
popd
4547

48+
# render creates SparkR vignettes
49+
Rscript -e 'library(rmarkdown); paths <- .libPaths(); .libPaths(c("lib", paths)); Sys.setenv(SPARK_HOME=tools::file_path_as_absolute("..")); render("pkg/vignettes/sparkr-vignettes.Rmd"); .libPaths(paths)'
50+
51+
find pkg/vignettes/. -not -name '.' -not -name '*.Rmd' -not -name '*.md' -not -name '*.pdf' -not -name '*.html' -delete
52+
4653
popd

R/pkg/R/DataFrame.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2635,6 +2635,7 @@ setMethod("write.df",
26352635
write <- callJMethod(df@sdf, "write")
26362636
write <- callJMethod(write, "format", source)
26372637
write <- callJMethod(write, "mode", jmode)
2638+
write <- callJMethod(write, "options", options)
26382639
write <- callJMethod(write, "save", path)
26392640
})
26402641

R/pkg/R/mllib.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -720,8 +720,9 @@ setMethod("predict", signature(object = "MultilayerPerceptronClassificationModel
720720
# Returns the summary of a Multilayer Perceptron Classification Model produced by \code{spark.mlp}
721721

722722
#' @param object a Multilayer Perceptron Classification Model fitted by \code{spark.mlp}
723-
#' @return \code{summary} returns a list containing \code{layers}, the label distribution, and
724-
#' \code{tables}, conditional probabilities given the target label.
723+
#' @return \code{summary} returns a list containing \code{labelCount}, \code{layers}, and
724+
#' \code{weights}. For \code{weights}, it is a numeric vector with length equal to
725+
#' the expected given the architecture (i.e., for 8-10-2 network, 100 connection weights).
725726
#' @rdname spark.mlp
726727
#' @export
727728
#' @aliases summary,MultilayerPerceptronClassificationModel-method
@@ -732,7 +733,6 @@ setMethod("summary", signature(object = "MultilayerPerceptronClassificationModel
732733
labelCount <- callJMethod(jobj, "labelCount")
733734
layers <- unlist(callJMethod(jobj, "layers"))
734735
weights <- callJMethod(jobj, "weights")
735-
weights <- matrix(weights, nrow = length(weights))
736736
list(labelCount = labelCount, layers = layers, weights = weights)
737737
})
738738

@@ -1241,7 +1241,7 @@ setMethod("predict", signature(object = "GaussianMixtureModel"),
12411241
#' @note spark.als since 2.1.0
12421242
setMethod("spark.als", signature(data = "SparkDataFrame"),
12431243
function(data, ratingCol = "rating", userCol = "user", itemCol = "item",
1244-
rank = 10, reg = 1.0, maxIter = 10, nonnegative = FALSE,
1244+
rank = 10, reg = 0.1, maxIter = 10, nonnegative = FALSE,
12451245
implicitPrefs = FALSE, alpha = 1.0, numUserBlocks = 10, numItemBlocks = 10,
12461246
checkpointInterval = 10, seed = 0) {
12471247

R/pkg/R/sparkR.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ sparkR.stop <- function() {
100100
#' @param sparkEnvir Named list of environment variables to set on worker nodes
101101
#' @param sparkExecutorEnv Named list of environment variables to be used when launching executors
102102
#' @param sparkJars Character vector of jar files to pass to the worker nodes
103-
#' @param sparkPackages Character vector of packages from spark-packages.org
103+
#' @param sparkPackages Character vector of package coordinates
104104
#' @seealso \link{sparkR.session}
105105
#' @rdname sparkR.init-deprecated
106106
#' @export
@@ -327,7 +327,7 @@ sparkRHive.init <- function(jsc = NULL) {
327327
#' @param sparkHome Spark Home directory.
328328
#' @param sparkConfig named list of Spark configuration to set on worker nodes.
329329
#' @param sparkJars character vector of jar files to pass to the worker nodes.
330-
#' @param sparkPackages character vector of packages from spark-packages.org
330+
#' @param sparkPackages character vector of package coordinates
331331
#' @param enableHiveSupport enable support for Hive, fallback if not built with Hive support; once
332332
#' set, this cannot be turned off on an existing session
333333
#' @param ... named Spark properties passed to the method.

R/pkg/inst/tests/testthat/test_mllib.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,8 @@ test_that("spark.mlp", {
369369
expect_equal(summary$labelCount, 3)
370370
expect_equal(summary$layers, c(4, 5, 4, 3))
371371
expect_equal(length(summary$weights), 64)
372+
expect_equal(head(summary$weights, 5), list(-0.878743, 0.2154151, -1.16304, -0.6583214, 1.009825),
373+
tolerance = 1e-6)
372374

373375
# Test predict method
374376
mlpTestDF <- df

R/pkg/inst/tests/testthat/test_sparkSQL.R

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ test_that("create DataFrame from RDD", {
208208
unsetHiveContext()
209209
})
210210

211-
test_that("read csv as DataFrame", {
211+
test_that("read/write csv as DataFrame", {
212212
csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv")
213213
mockLinesCsv <- c("year,make,model,comment,blank",
214214
"\"2012\",\"Tesla\",\"S\",\"No comment\",",
@@ -243,7 +243,17 @@ test_that("read csv as DataFrame", {
243243
expect_equal(count(withoutna2), 3)
244244
expect_equal(count(where(withoutna2, withoutna2$make == "Dummy")), 0)
245245

246+
# writing csv file
247+
csvPath2 <- tempfile(pattern = "csvtest2", fileext = ".csv")
248+
write.df(df2, path = csvPath2, "csv", header = "true")
249+
df3 <- read.df(csvPath2, "csv", header = "true")
250+
expect_equal(nrow(df3), nrow(df2))
251+
expect_equal(colnames(df3), colnames(df2))
252+
csv <- read.csv(file = list.files(csvPath2, pattern = "^part", full.names = T)[[1]])
253+
expect_equal(colnames(df3), colnames(csv))
254+
246255
unlink(csvPath)
256+
unlink(csvPath2)
247257
})
248258

249259
test_that("convert NAs to null type in DataFrames", {

0 commit comments

Comments
 (0)