Skip to content

Commit c596c6f

Browse files
committed
Merge branch 'master' into issue/no-spark-repl-fork
2 parents 2b1a305 + e14b545 commit c596c6f

File tree

697 files changed

+22128
-7957
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

697 files changed

+22128
-7957
lines changed

.rat-excludes

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,4 +86,10 @@ local-1430917381535_2
8686
DESCRIPTION
8787
NAMESPACE
8888
test_support/*
89+
.*Rd
90+
help/*
91+
html/*
92+
INDEX
8993
.lintr
94+
gen-java.*
95+
.*avpr

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -948,6 +948,6 @@ The following components are provided under the MIT License. See project link fo
948948
(MIT License) SLF4J LOG4J-12 Binding (org.slf4j:slf4j-log4j12:1.7.5 - http://www.slf4j.org)
949949
(MIT License) pyrolite (org.spark-project:pyrolite:2.0.1 - http://pythonhosted.org/Pyro4/)
950950
(MIT License) scopt (com.github.scopt:scopt_2.10:3.2.0 - https://github.com/scopt/scopt)
951-
(The MIT License) Mockito (org.mockito:mockito-all:1.8.5 - http://www.mockito.org)
951+
(The MIT License) Mockito (org.mockito:mockito-core:1.9.5 - http://www.mockito.org)
952952
(MIT License) jquery (https://jquery.org/license/)
953953
(MIT License) AnchorJS (https://github.com/bryanbraun/anchorjs)

R/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ SparkR is an R package that provides a light-weight frontend to use Spark from R
66

77
#### Build Spark
88

9-
Build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn) and include the `-PsparkR` profile to build the R package. For example to use the default Hadoop versions you can run
9+
Build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run
1010
```
1111
build/mvn -DskipTests -Psparkr package
1212
```

R/pkg/R/DataFrame.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,8 +169,8 @@ setMethod("isLocal",
169169
#'}
170170
setMethod("showDF",
171171
signature(x = "DataFrame"),
172-
function(x, numRows = 20) {
173-
s <- callJMethod(x@sdf, "showString", numToInt(numRows))
172+
function(x, numRows = 20, truncate = TRUE) {
173+
s <- callJMethod(x@sdf, "showString", numToInt(numRows), truncate)
174174
cat(s)
175175
})
176176

R/pkg/R/SQLContext.R

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,9 @@ infer_type <- function(x) {
8686
createDataFrame <- function(sqlContext, data, schema = NULL, samplingRatio = 1.0) {
8787
if (is.data.frame(data)) {
8888
# get the names of columns, they will be put into RDD
89-
schema <- names(data)
89+
if (is.null(schema)) {
90+
schema <- names(data)
91+
}
9092
n <- nrow(data)
9193
m <- ncol(data)
9294
# get rid of factor type

R/pkg/R/client.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ generateSparkSubmitArgs <- function(args, sparkHome, jars, sparkSubmitOpts, pack
5757
}
5858

5959
launchBackend <- function(args, sparkHome, jars, sparkSubmitOpts, packages) {
60-
sparkSubmitBin <- determineSparkSubmitBin()
60+
sparkSubmitBinName <- determineSparkSubmitBin()
6161
if (sparkHome != "") {
6262
sparkSubmitBin <- file.path(sparkHome, "bin", sparkSubmitBinName)
6363
} else {

R/pkg/R/generics.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
# @rdname aggregateRDD
2121
# @seealso reduce
2222
# @export
23-
setGeneric("aggregateRDD", function(x, zeroValue, seqOp, combOp) { standardGeneric("aggregateRDD") })
23+
setGeneric("aggregateRDD",
24+
function(x, zeroValue, seqOp, combOp) { standardGeneric("aggregateRDD") })
2425

2526
# @rdname cache-methods
2627
# @export

R/pkg/R/pairRDD.R

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -560,8 +560,8 @@ setMethod("join",
560560
# Left outer join two RDDs
561561
#
562562
# @description
563-
# \code{leftouterjoin} This function left-outer-joins two RDDs where every element is of the form list(K, V).
564-
# The key types of the two RDDs should be the same.
563+
# \code{leftouterjoin} This function left-outer-joins two RDDs where every element is of
564+
# the form list(K, V). The key types of the two RDDs should be the same.
565565
#
566566
# @param x An RDD to be joined. Should be an RDD where each element is
567567
# list(K, V).
@@ -597,8 +597,8 @@ setMethod("leftOuterJoin",
597597
# Right outer join two RDDs
598598
#
599599
# @description
600-
# \code{rightouterjoin} This function right-outer-joins two RDDs where every element is of the form list(K, V).
601-
# The key types of the two RDDs should be the same.
600+
# \code{rightouterjoin} This function right-outer-joins two RDDs where every element is of
601+
# the form list(K, V). The key types of the two RDDs should be the same.
602602
#
603603
# @param x An RDD to be joined. Should be an RDD where each element is
604604
# list(K, V).
@@ -634,8 +634,8 @@ setMethod("rightOuterJoin",
634634
# Full outer join two RDDs
635635
#
636636
# @description
637-
# \code{fullouterjoin} This function full-outer-joins two RDDs where every element is of the form list(K, V).
638-
# The key types of the two RDDs should be the same.
637+
# \code{fullouterjoin} This function full-outer-joins two RDDs where every element is of
638+
# the form list(K, V). The key types of the two RDDs should be the same.
639639
#
640640
# @param x An RDD to be joined. Should be an RDD where each element is
641641
# list(K, V).

R/pkg/R/sparkR.R

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,12 @@ sparkR.init <- function(
105105
sparkPackages = "") {
106106

107107
if (exists(".sparkRjsc", envir = .sparkREnv)) {
108-
cat("Re-using existing Spark Context. Please stop SparkR with sparkR.stop() or restart R to create a new Spark Context\n")
108+
cat(paste("Re-using existing Spark Context.",
109+
"Please stop SparkR with sparkR.stop() or restart R to create a new Spark Context\n"))
109110
return(get(".sparkRjsc", envir = .sparkREnv))
110111
}
111112

112-
sparkMem <- Sys.getenv("SPARK_MEM", "512m")
113+
sparkMem <- Sys.getenv("SPARK_MEM", "1024m")
113114
jars <- suppressWarnings(normalizePath(as.character(sparkJars)))
114115

115116
# Classpath separator is ";" on Windows
@@ -132,7 +133,7 @@ sparkR.init <- function(
132133
sparkHome = sparkHome,
133134
jars = jars,
134135
sparkSubmitOpts = Sys.getenv("SPARKR_SUBMIT_ARGS", "sparkr-shell"),
135-
sparkPackages = sparkPackages)
136+
packages = sparkPackages)
136137
# wait atmost 100 seconds for JVM to launch
137138
wait <- 0.1
138139
for (i in 1:25) {
@@ -180,14 +181,16 @@ sparkR.init <- function(
180181

181182
sparkExecutorEnvMap <- new.env()
182183
if (!any(names(sparkExecutorEnv) == "LD_LIBRARY_PATH")) {
183-
sparkExecutorEnvMap[["LD_LIBRARY_PATH"]] <- paste0("$LD_LIBRARY_PATH:",Sys.getenv("LD_LIBRARY_PATH"))
184+
sparkExecutorEnvMap[["LD_LIBRARY_PATH"]] <-
185+
paste0("$LD_LIBRARY_PATH:",Sys.getenv("LD_LIBRARY_PATH"))
184186
}
185187
for (varname in names(sparkExecutorEnv)) {
186188
sparkExecutorEnvMap[[varname]] <- sparkExecutorEnv[[varname]]
187189
}
188190

189191
nonEmptyJars <- Filter(function(x) { x != "" }, jars)
190-
localJarPaths <- sapply(nonEmptyJars, function(j) { utils::URLencode(paste("file:", uriSep, j, sep = "")) })
192+
localJarPaths <- sapply(nonEmptyJars,
193+
function(j) { utils::URLencode(paste("file:", uriSep, j, sep = "")) })
191194

192195
# Set the start time to identify jobjs
193196
# Seconds resolution is good enough for this purpose, so use ints

R/pkg/R/utils.R

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -334,18 +334,21 @@ getStorageLevel <- function(newLevel = c("DISK_ONLY",
334334
"MEMORY_ONLY_SER_2",
335335
"OFF_HEAP")) {
336336
match.arg(newLevel)
337+
storageLevelClass <- "org.apache.spark.storage.StorageLevel"
337338
storageLevel <- switch(newLevel,
338-
"DISK_ONLY" = callJStatic("org.apache.spark.storage.StorageLevel", "DISK_ONLY"),
339-
"DISK_ONLY_2" = callJStatic("org.apache.spark.storage.StorageLevel", "DISK_ONLY_2"),
340-
"MEMORY_AND_DISK" = callJStatic("org.apache.spark.storage.StorageLevel", "MEMORY_AND_DISK"),
341-
"MEMORY_AND_DISK_2" = callJStatic("org.apache.spark.storage.StorageLevel", "MEMORY_AND_DISK_2"),
342-
"MEMORY_AND_DISK_SER" = callJStatic("org.apache.spark.storage.StorageLevel", "MEMORY_AND_DISK_SER"),
343-
"MEMORY_AND_DISK_SER_2" = callJStatic("org.apache.spark.storage.StorageLevel", "MEMORY_AND_DISK_SER_2"),
344-
"MEMORY_ONLY" = callJStatic("org.apache.spark.storage.StorageLevel", "MEMORY_ONLY"),
345-
"MEMORY_ONLY_2" = callJStatic("org.apache.spark.storage.StorageLevel", "MEMORY_ONLY_2"),
346-
"MEMORY_ONLY_SER" = callJStatic("org.apache.spark.storage.StorageLevel", "MEMORY_ONLY_SER"),
347-
"MEMORY_ONLY_SER_2" = callJStatic("org.apache.spark.storage.StorageLevel", "MEMORY_ONLY_SER_2"),
348-
"OFF_HEAP" = callJStatic("org.apache.spark.storage.StorageLevel", "OFF_HEAP"))
339+
"DISK_ONLY" = callJStatic(storageLevelClass, "DISK_ONLY"),
340+
"DISK_ONLY_2" = callJStatic(storageLevelClass, "DISK_ONLY_2"),
341+
"MEMORY_AND_DISK" = callJStatic(storageLevelClass, "MEMORY_AND_DISK"),
342+
"MEMORY_AND_DISK_2" = callJStatic(storageLevelClass, "MEMORY_AND_DISK_2"),
343+
"MEMORY_AND_DISK_SER" = callJStatic(storageLevelClass,
344+
"MEMORY_AND_DISK_SER"),
345+
"MEMORY_AND_DISK_SER_2" = callJStatic(storageLevelClass,
346+
"MEMORY_AND_DISK_SER_2"),
347+
"MEMORY_ONLY" = callJStatic(storageLevelClass, "MEMORY_ONLY"),
348+
"MEMORY_ONLY_2" = callJStatic(storageLevelClass, "MEMORY_ONLY_2"),
349+
"MEMORY_ONLY_SER" = callJStatic(storageLevelClass, "MEMORY_ONLY_SER"),
350+
"MEMORY_ONLY_SER_2" = callJStatic(storageLevelClass, "MEMORY_ONLY_SER_2"),
351+
"OFF_HEAP" = callJStatic(storageLevelClass, "OFF_HEAP"))
349352
}
350353

351354
# Utility function for functions where an argument needs to be integer but we want to allow
@@ -545,9 +548,11 @@ mergePartitions <- function(rdd, zip) {
545548
lengthOfKeys <- part[[len - lengthOfValues]]
546549
stopifnot(len == lengthOfKeys + lengthOfValues)
547550

548-
# For zip operation, check if corresponding partitions of both RDDs have the same number of elements.
551+
# For zip operation, check if corresponding partitions
552+
# of both RDDs have the same number of elements.
549553
if (zip && lengthOfKeys != lengthOfValues) {
550-
stop("Can only zip RDDs with same number of elements in each pair of corresponding partitions.")
554+
stop(paste("Can only zip RDDs with same number of elements",
555+
"in each pair of corresponding partitions."))
551556
}
552557

553558
if (lengthOfKeys > 1) {
Binary file not shown.

R/pkg/inst/tests/jarTest.R

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
library(SparkR)
18+
19+
sc <- sparkR.init()
20+
21+
helloTest <- SparkR:::callJStatic("sparkR.test.hello",
22+
"helloWorld",
23+
"Dave")
24+
25+
basicFunction <- SparkR:::callJStatic("sparkR.test.basicFunction",
26+
"addStuff",
27+
2L,
28+
2L)
29+
30+
sparkR.stop()
31+
output <- c(helloTest, basicFunction)
32+
writeLines(output)

R/pkg/inst/tests/test_binaryFile.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ test_that("saveAsObjectFile()/objectFile() works with multiple paths", {
8282
saveAsObjectFile(rdd2, fileName2)
8383

8484
rdd <- objectFile(sc, c(fileName1, fileName2))
85-
expect_true(count(rdd) == 2)
85+
expect_equal(count(rdd), 2)
8686

8787
unlink(fileName1, recursive = TRUE)
8888
unlink(fileName2, recursive = TRUE)

R/pkg/inst/tests/test_binary_function.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ test_that("union on two RDDs", {
3838
union.rdd <- unionRDD(rdd, text.rdd)
3939
actual <- collect(union.rdd)
4040
expect_equal(actual, c(as.list(nums), mockFile))
41-
expect_true(getSerializedMode(union.rdd) == "byte")
41+
expect_equal(getSerializedMode(union.rdd), "byte")
4242

4343
rdd<- map(text.rdd, function(x) {x})
4444
union.rdd <- unionRDD(rdd, text.rdd)
4545
actual <- collect(union.rdd)
4646
expect_equal(actual, as.list(c(mockFile, mockFile)))
47-
expect_true(getSerializedMode(union.rdd) == "byte")
47+
expect_equal(getSerializedMode(union.rdd), "byte")
4848

4949
unlink(fileName)
5050
})

R/pkg/inst/tests/test_includeJAR.R

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
context("include an external JAR in SparkContext")
18+
19+
runScript <- function() {
20+
sparkHome <- Sys.getenv("SPARK_HOME")
21+
sparkTestJarPath <- "R/lib/SparkR/test_support/sparktestjar_2.10-1.0.jar"
22+
jarPath <- paste("--jars", shQuote(file.path(sparkHome, sparkTestJarPath)))
23+
scriptPath <- file.path(sparkHome, "R/lib/SparkR/tests/jarTest.R")
24+
submitPath <- file.path(sparkHome, "bin/spark-submit")
25+
res <- system2(command = submitPath,
26+
args = c(jarPath, scriptPath),
27+
stdout = TRUE)
28+
tail(res, 2)
29+
}
30+
31+
test_that("sparkJars tag in SparkContext", {
32+
testOutput <- runScript()
33+
helloTest <- testOutput[1]
34+
expect_equal(helloTest, "Hello, Dave")
35+
basicFunction <- testOutput[2]
36+
expect_equal(basicFunction, "4")
37+
})

R/pkg/inst/tests/test_parallelize_collect.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ test_that("parallelize() on simple vectors and lists returns an RDD", {
5757
strListRDD2)
5858

5959
for (rdd in rdds) {
60-
expect_true(inherits(rdd, "RDD"))
60+
expect_is(rdd, "RDD")
6161
expect_true(.hasSlot(rdd, "jrdd")
6262
&& inherits(rdd@jrdd, "jobj")
6363
&& isInstanceOf(rdd@jrdd, "org.apache.spark.api.java.JavaRDD"))

R/pkg/inst/tests/test_rdd.R

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ test_that("get number of partitions in RDD", {
3333
})
3434

3535
test_that("first on RDD", {
36-
expect_true(first(rdd) == 1)
36+
expect_equal(first(rdd), 1)
3737
newrdd <- lapply(rdd, function(x) x + 1)
38-
expect_true(first(newrdd) == 2)
38+
expect_equal(first(newrdd), 2)
3939
})
4040

4141
test_that("count and length on RDD", {
@@ -669,27 +669,31 @@ test_that("fullOuterJoin() on pairwise RDDs", {
669669
rdd1 <- parallelize(sc, list(list(1,2), list(1,3), list(3,3)))
670670
rdd2 <- parallelize(sc, list(list(1,1), list(2,4)))
671671
actual <- collect(fullOuterJoin(rdd1, rdd2, 2L))
672-
expected <- list(list(1, list(2, 1)), list(1, list(3, 1)), list(2, list(NULL, 4)), list(3, list(3, NULL)))
672+
expected <- list(list(1, list(2, 1)), list(1, list(3, 1)),
673+
list(2, list(NULL, 4)), list(3, list(3, NULL)))
673674
expect_equal(sortKeyValueList(actual), sortKeyValueList(expected))
674675

675676
rdd1 <- parallelize(sc, list(list("a",2), list("a",3), list("c", 1)))
676677
rdd2 <- parallelize(sc, list(list("a",1), list("b",4)))
677678
actual <- collect(fullOuterJoin(rdd1, rdd2, 2L))
678-
expected <- list(list("b", list(NULL, 4)), list("a", list(2, 1)), list("a", list(3, 1)), list("c", list(1, NULL)))
679+
expected <- list(list("b", list(NULL, 4)), list("a", list(2, 1)),
680+
list("a", list(3, 1)), list("c", list(1, NULL)))
679681
expect_equal(sortKeyValueList(actual),
680682
sortKeyValueList(expected))
681683

682684
rdd1 <- parallelize(sc, list(list(1,1), list(2,2)))
683685
rdd2 <- parallelize(sc, list(list(3,3), list(4,4)))
684686
actual <- collect(fullOuterJoin(rdd1, rdd2, 2L))
685687
expect_equal(sortKeyValueList(actual),
686-
sortKeyValueList(list(list(1, list(1, NULL)), list(2, list(2, NULL)), list(3, list(NULL, 3)), list(4, list(NULL, 4)))))
688+
sortKeyValueList(list(list(1, list(1, NULL)), list(2, list(2, NULL)),
689+
list(3, list(NULL, 3)), list(4, list(NULL, 4)))))
687690

688691
rdd1 <- parallelize(sc, list(list("a",1), list("b",2)))
689692
rdd2 <- parallelize(sc, list(list("c",3), list("d",4)))
690693
actual <- collect(fullOuterJoin(rdd1, rdd2, 2L))
691694
expect_equal(sortKeyValueList(actual),
692-
sortKeyValueList(list(list("a", list(1, NULL)), list("b", list(2, NULL)), list("d", list(NULL, 4)), list("c", list(NULL, 3)))))
695+
sortKeyValueList(list(list("a", list(1, NULL)), list("b", list(2, NULL)),
696+
list("d", list(NULL, 4)), list("c", list(NULL, 3)))))
693697
})
694698

695699
test_that("sortByKey() on pairwise RDDs", {

0 commit comments

Comments
 (0)