Skip to content

Commit a1e5312

Browse files
committed
Merge remote-tracking branch 'apache-github/master' into SPARK-16406
2 parents d64346b + 767d480 commit a1e5312

File tree

1,480 files changed

+47372
-22833
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,480 files changed

+47372
-22833
lines changed

.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@
1717
.idea/
1818
.idea_modules/
1919
.project
20+
.pydevproject
2021
.scala_dependencies
2122
.settings
2223
/lib/
2324
R-unit-tests.log
2425
R/unit-tests.out
26+
R/cran-check.out
2527
build/*.jar
2628
build/apache-maven*
2729
build/scala*
@@ -78,3 +80,7 @@ spark-warehouse/
7880
.RData
7981
.RHistory
8082
.Rhistory
83+
*.Rproj
84+
*.Rproj.*
85+
86+
.Rproj.user

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ notifications:
4444
# 5. Run maven install before running lint-java.
4545
install:
4646
- export MAVEN_SKIP_RC=1
47-
- build/mvn -T 4 -q -DskipTests -Pyarn -Phadoop-2.3 -Pkinesis-asl -Phive -Phive-thriftserver install
47+
- build/mvn -T 4 -q -DskipTests -Pmesos -Pyarn -Phadoop-2.3 -Pkinesis-asl -Phive -Phive-thriftserver install
4848

4949
# 6. Run lint-java.
5050
script:

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
263263
(New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
264264
(The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
265265
(The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
266-
(The New BSD License) Py4J (net.sf.py4j:py4j:0.10.1 - http://py4j.sourceforge.net/)
266+
(The New BSD License) Py4J (net.sf.py4j:py4j:0.10.3 - http://py4j.sourceforge.net/)
267267
(Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/)
268268
(BSD licence) sbt and sbt-launch-lib.bash
269269
(BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE)

R/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,5 @@
44
lib
55
pkg/man
66
pkg/html
7+
SparkR.Rcheck/
8+
SparkR_*.tar.gz

R/WINDOWS.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,23 @@ To build SparkR on Windows, the following steps are required
44

55
1. Install R (>= 3.1) and [Rtools](http://cran.r-project.org/bin/windows/Rtools/). Make sure to
66
include Rtools and R in `PATH`.
7+
78
2. Install
89
[JDK7](http://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html) and set
910
`JAVA_HOME` in the system environment variables.
11+
1012
3. Download and install [Maven](http://maven.apache.org/download.html). Also include the `bin`
1113
directory in Maven in `PATH`.
14+
1215
4. Set `MAVEN_OPTS` as described in [Building Spark](http://spark.apache.org/docs/latest/building-spark.html).
13-
5. Open a command shell (`cmd`) in the Spark directory and run `mvn -DskipTests -Psparkr package`
16+
17+
5. Open a command shell (`cmd`) in the Spark directory and build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run
18+
19+
```bash
20+
mvn.cmd -DskipTests -Psparkr package
21+
```
22+
23+
`.\build\mvn` is a shell script so `mvn.cmd` should be used directly on Windows.
1424

1525
## Unit tests
1626

R/check-cran.sh

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/bin/bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
set -o pipefail
21+
set -e
22+
23+
FWDIR="$(cd `dirname $0`; pwd)"
24+
pushd $FWDIR > /dev/null
25+
26+
if [ ! -z "$R_HOME" ]
27+
then
28+
R_SCRIPT_PATH="$R_HOME/bin"
29+
else
30+
# if system wide R_HOME is not found, then exit
31+
if [ ! `command -v R` ]; then
32+
echo "Cannot find 'R_HOME'. Please specify 'R_HOME' or make sure R is properly installed."
33+
exit 1
34+
fi
35+
R_SCRIPT_PATH="$(dirname $(which R))"
36+
fi
37+
echo "USING R_HOME = $R_HOME"
38+
39+
# Build the latest docs
40+
$FWDIR/create-docs.sh
41+
42+
# Build a zip file containing the source package
43+
"$R_SCRIPT_PATH/"R CMD build $FWDIR/pkg
44+
45+
# Run check as-cran.
46+
VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'`
47+
48+
CRAN_CHECK_OPTIONS="--as-cran"
49+
50+
if [ -n "$NO_TESTS" ]
51+
then
52+
CRAN_CHECK_OPTIONS=$CRAN_CHECK_OPTIONS" --no-tests"
53+
fi
54+
55+
if [ -n "$NO_MANUAL" ]
56+
then
57+
CRAN_CHECK_OPTIONS=$CRAN_CHECK_OPTIONS" --no-manual"
58+
fi
59+
60+
echo "Running CRAN check with $CRAN_CHECK_OPTIONS options"
61+
62+
"$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz
63+
64+
popd > /dev/null

R/pkg/.Rbuildignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
^.*\.Rproj$
2+
^\.Rproj\.user$
3+
^\.lintr$
4+
^src-native$
5+
^html$

R/pkg/DESCRIPTION

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,25 @@
11
Package: SparkR
22
Type: Package
3-
Title: R frontend for Spark
3+
Title: R Frontend for Apache Spark
44
Version: 2.0.0
5-
Date: 2013-09-09
6-
Author: The Apache Software Foundation
7-
Maintainer: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
8-
Imports:
9-
methods
5+
Date: 2016-08-27
6+
Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
7+
email = "shivaram@cs.berkeley.edu"),
8+
person("Xiangrui", "Meng", role = "aut",
9+
email = "meng@databricks.com"),
10+
person("Felix", "Cheung", role = "aut",
11+
email = "felixcheung@apache.org"),
12+
person(family = "The Apache Software Foundation", role = c("aut", "cph")))
13+
URL: http://www.apache.org/ http://spark.apache.org/
14+
BugReports: https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-ContributingBugReports
1015
Depends:
1116
R (>= 3.0),
12-
methods,
17+
methods
1318
Suggests:
1419
testthat,
1520
e1071,
1621
survival
17-
Description: R frontend for Spark
22+
Description: The SparkR package provides an R frontend for Apache Spark.
1823
License: Apache License (== 2.0)
1924
Collate:
2025
'schema.R'
@@ -33,6 +38,8 @@ Collate:
3338
'context.R'
3439
'deserialize.R'
3540
'functions.R'
41+
'install.R'
42+
'jvm.R'
3643
'mllib.R'
3744
'serialize.R'
3845
'sparkR.R'

R/pkg/NAMESPACE

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Imports from base R
2-
importFrom(methods, setGeneric, setMethod, setOldClass)
2+
# Do not include stats:: "rpois", "runif" - causes error at runtime
3+
importFrom("methods", "setGeneric", "setMethod", "setOldClass")
4+
importFrom("methods", "is", "new", "signature", "show")
5+
importFrom("stats", "gaussian", "setNames")
6+
importFrom("utils", "download.file", "packageVersion", "untar")
37

48
# Disable native libraries till we figure out how to package it
59
# See SPARKR-7839
@@ -11,8 +15,15 @@ export("sparkR.init")
1115
export("sparkR.stop")
1216
export("sparkR.session.stop")
1317
export("sparkR.conf")
18+
export("sparkR.version")
1419
export("print.jobj")
1520

21+
export("sparkR.newJObject")
22+
export("sparkR.callJMethod")
23+
export("sparkR.callJStatic")
24+
25+
export("install.spark")
26+
1627
export("sparkRSQL.init",
1728
"sparkRHive.init")
1829

@@ -23,8 +34,16 @@ exportMethods("glm",
2334
"summary",
2435
"spark.kmeans",
2536
"fitted",
37+
"spark.mlp",
2638
"spark.naiveBayes",
27-
"spark.survreg")
39+
"spark.survreg",
40+
"spark.lda",
41+
"spark.posterior",
42+
"spark.perplexity",
43+
"spark.isoreg",
44+
"spark.gaussianMixture",
45+
"spark.als",
46+
"spark.kstest")
2847

2948
# Job group lifecycle management methods
3049
export("setJobGroup",
@@ -324,7 +343,8 @@ export("as.DataFrame",
324343
"tables",
325344
"uncacheTable",
326345
"print.summary.GeneralizedLinearRegressionModel",
327-
"read.ml")
346+
"read.ml",
347+
"print.summary.KSTest")
328348

329349
export("structField",
330350
"structField.jobj",
@@ -341,5 +361,15 @@ export("partitionBy",
341361
"rowsBetween",
342362
"rangeBetween")
343363

344-
export("window.partitionBy",
345-
"window.orderBy")
364+
export("windowPartitionBy",
365+
"windowOrderBy")
366+
367+
S3method(print, jobj)
368+
S3method(print, structField)
369+
S3method(print, structType)
370+
S3method(print, summary.GeneralizedLinearRegressionModel)
371+
S3method(print, summary.KSTest)
372+
S3method(structField, character)
373+
S3method(structField, jobj)
374+
S3method(structType, jobj)
375+
S3method(structType, structField)

0 commit comments

Comments
 (0)