Skip to content

Commit

Permalink
Merge branch 'master' into SPARK-32106-MINOR
Browse files Browse the repository at this point in the history
  • Loading branch information
AngersZhuuuu committed Dec 9, 2020
2 parents 5b41fbc + a713a7e commit 63bdb53
Show file tree
Hide file tree
Showing 1,307 changed files with 31,702 additions and 14,170 deletions.
113 changes: 65 additions & 48 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
fail-fast: false
matrix:
java:
- 1.8
- 8
hadoop:
- hadoop3.2
hive:
Expand All @@ -49,26 +49,26 @@ jobs:
include:
# Hive tests
- modules: hive
java: 1.8
java: 8
hadoop: hadoop3.2
hive: hive2.3
included-tags: org.apache.spark.tags.SlowHiveTest
comment: "- slow tests"
- modules: hive
java: 1.8
java: 8
hadoop: hadoop3.2
hive: hive2.3
excluded-tags: org.apache.spark.tags.SlowHiveTest
comment: "- other tests"
# SQL tests
- modules: sql
java: 1.8
java: 8
hadoop: hadoop3.2
hive: hive2.3
included-tags: org.apache.spark.tags.ExtendedSQLTest
comment: "- slow tests"
- modules: sql
java: 1.8
java: 8
hadoop: hadoop3.2
hive: hive2.3
excluded-tags: org.apache.spark.tags.ExtendedSQLTest
Expand Down Expand Up @@ -101,24 +101,18 @@ jobs:
build/zinc-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ matrix.java }}-${{ matrix.hadoop }}-maven-
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
path: ~/.cache/coursier
key: ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
${{ matrix.java }}-${{ matrix.hadoop }}-coursier-
- name: Install JDK ${{ matrix.java }}
- name: Install Java ${{ matrix.java }}
uses: actions/setup-java@v1
with:
java-version: ${{ matrix.java }}
Expand All @@ -141,9 +135,7 @@ jobs:
run: |
# Hive and SQL tests become flaky when running in parallel as it's too intensive.
if [[ "$MODULES_TO_TEST" == "hive" ]] || [[ "$MODULES_TO_TEST" == "sql" ]]; then export SERIAL_SBT_TESTS=1; fi
mkdir -p ~/.m2
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
rm -rf ~/.m2/repository/org/apache/spark
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v2
Expand All @@ -161,7 +153,7 @@ jobs:
name: "Build modules: ${{ matrix.modules }}"
runs-on: ubuntu-20.04
container:
image: dongjoon/apache-spark-github-action-image:20201015
image: dongjoon/apache-spark-github-action-image:20201025
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -196,16 +188,10 @@ jobs:
build/zinc-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: pyspark-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
pyspark-maven-
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
Expand All @@ -228,24 +214,22 @@ jobs:
# Run the tests.
- name: Run tests
run: |
mkdir -p ~/.m2
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST"
rm -rf ~/.m2/repository/org/apache/spark
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v2
with:
name: test-results-${{ matrix.modules }}--1.8-hadoop3.2-hive2.3
name: test-results-${{ matrix.modules }}--8-hadoop3.2-hive2.3
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
uses: actions/upload-artifact@v2
with:
name: unit-tests-log-${{ matrix.modules }}--1.8-hadoop3.2-hive2.3
name: unit-tests-log-${{ matrix.modules }}--8-hadoop3.2-hive2.3
path: "**/target/unit-tests.log"

sparkr:
name: Build modules - sparkr
name: "Build modules: sparkr"
runs-on: ubuntu-20.04
container:
image: dongjoon/apache-spark-github-action-image:20201025
Expand All @@ -272,16 +256,10 @@ jobs:
build/zinc-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: sparkr-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
sparkr-maven-
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
Expand All @@ -291,18 +269,16 @@ jobs:
sparkr-coursier-
- name: Run tests
run: |
mkdir -p ~/.m2
# The followings are also used by `r-lib/actions/setup-r` to avoid
# R issues at docker environment
export TZ=UTC
export _R_CHECK_SYSTEM_CLOCK_=FALSE
./dev/run-tests --parallelism 2 --modules sparkr
rm -rf ~/.m2/repository/org/apache/spark
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v2
with:
name: test-results-sparkr--1.8-hadoop3.2-hive2.3
name: test-results-sparkr--8-hadoop3.2-hive2.3
path: "**/target/test-reports/*.xml"

# Static analysis, and documentation build
Expand All @@ -312,17 +288,37 @@ jobs:
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- name: Cache Scala, SBT, Maven and Zinc
uses: actions/cache@v2
with:
path: |
build/apache-maven-*
build/zinc-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
path: ~/.cache/coursier
key: docs-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
docs-coursier-
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: docs-maven-repo-${{ hashFiles('**/pom.xml') }}
key: docs-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
docs-maven-
- name: Install JDK 1.8
- name: Install Java 8
uses: actions/setup-java@v1
with:
java-version: 1.8
java-version: 8
- name: Install Python 3.6
uses: actions/setup-python@v2
with:
Expand Down Expand Up @@ -373,8 +369,8 @@ jobs:
cd docs
jekyll build
java11:
name: Java 11 build
java-11:
name: Java 11 build with Maven
runs-on: ubuntu-20.04
steps:
- name: Checkout Spark repository
Expand All @@ -394,12 +390,12 @@ jobs:
run: |
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
export MAVEN_CLI_OPTS="--no-transfer-progress"
mkdir -p ~/.m2
# It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414.
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install
rm -rf ~/.m2/repository/org/apache/spark
scala-213:
name: Scala 2.13 build
name: Scala 2.13 build with SBT
runs-on: ubuntu-20.04
steps:
- name: Checkout Spark repository
Expand All @@ -411,11 +407,32 @@ jobs:
key: scala-213-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
scala-213-coursier-
- name: Install Java 11
- name: Install Java 8
uses: actions/setup-java@v1
with:
java-version: 11
java-version: 8
- name: Build with SBT
run: |
./dev/change-scala-version.sh 2.13
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Djava.version=11 -Pscala-2.13 compile test:compile
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pscala-2.13 compile test:compile
hadoop-2:
name: Hadoop 2 build with SBT
runs-on: ubuntu-20.04
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
path: ~/.cache/coursier
key: hadoop-2-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
hadoop-2-coursier-
- name: Install Java 8
uses: actions/setup-java@v1
with:
java-version: 8
- name: Build with SBT
run: |
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile
39 changes: 39 additions & 0 deletions .github/workflows/publish_snapshot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: Publish Snapshot

on:
schedule:
- cron: '0 0 * * *'

jobs:
publish-snapshot:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
branch:
- master
- branch-3.1
steps:
- name: Checkout Spark repository
uses: actions/checkout@master
with:
ref: ${{ matrix.branch }}
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: snapshot-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
snapshot-maven-
- name: Install Java 8
uses: actions/setup-java@v1
with:
java-version: 8
- name: Publish snapshot
env:
ASF_USERNAME: ${{ secrets.NEXUS_USER }}
ASF_PASSWORD: ${{ secrets.NEXUS_PW }}
GPG_KEY: "not_used"
GPG_PASSPHRASE: "not_used"
GIT_REF: ${{ matrix.branch }}
run: ./dev/create-release/release-build.sh publish-snapshot
2 changes: 1 addition & 1 deletion R/CRAN_RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ To release SparkR as a package to CRAN, we would use the `devtools` package. Ple

First, check that the `Version:` field in the `pkg/DESCRIPTION` file is updated. Also, check for stale files not under source control.

Note that while `run-tests.sh` runs `check-cran.sh` (which runs `R CMD check`), it is doing so with `--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - therefore it will be preferred to run `R CMD check` on the source package built manually before uploading a release. Also note that for CRAN checks for pdf vignettes to success, `qpdf` tool must be there (to install it, eg. `yum -q -y install qpdf`).
Note that while `run-tests.sh` runs `check-cran.sh` (which runs `R CMD check`), it is doing so with `--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - therefore it will be preferred to run `R CMD check` on the source package built manually before uploading a release. Also note that for CRAN checks for pdf vignettes to success, `qpdf` tool must be there (to install it, e.g. `yum -q -y install qpdf`).

To upload a release, we would need to update the `cran-comments.md`. This should generally contain the results from running the `check-cran.sh` script along with comments on status of all `WARNING` (should not be any) or `NOTE`. As a part of `check-cran.sh` and the release process, the vignettes is build - make sure `SPARK_HOME` is set and Spark jars are accessible.

Expand Down
2 changes: 1 addition & 1 deletion R/install-dev.bat
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ MKDIR %SPARK_HOME%\R\lib

rem When you pass the package path directly as an argument to R CMD INSTALL,
rem it takes the path as 'C:\projects\spark\R\..\R\pkg"' as an example at
rem R 4.0. To work around this, directly go to the directoy and install it.
rem R 4.0. To work around this, directly go to the directory and install it.
rem See also SPARK-32074
pushd %SPARK_HOME%\R\pkg\
R.exe CMD INSTALL --library="%SPARK_HOME%\R\lib" .
Expand Down
2 changes: 1 addition & 1 deletion R/pkg/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: SparkR
Type: Package
Version: 3.1.0
Version: 3.2.0
Title: R Front End for 'Apache Spark'
Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
Expand Down
6 changes: 6 additions & 0 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ exportMethods("%<=>%",
"%in%",
"abs",
"acos",
"acosh",
"add_months",
"alias",
"approx_count_distinct",
Expand All @@ -222,6 +223,7 @@ exportMethods("%<=>%",
"array_remove",
"array_repeat",
"array_sort",
"array_to_vector",
"array_transform",
"arrays_overlap",
"array_union",
Expand All @@ -232,8 +234,10 @@ exportMethods("%<=>%",
"asc_nulls_last",
"ascii",
"asin",
"asinh",
"assert_true",
"atan",
"atanh",
"atan2",
"avg",
"base64",
Expand Down Expand Up @@ -292,6 +296,7 @@ exportMethods("%<=>%",
"floor",
"format_number",
"format_string",
"from_avro",
"from_csv",
"from_json",
"from_unixtime",
Expand Down Expand Up @@ -416,6 +421,7 @@ exportMethods("%<=>%",
"timestamp_seconds",
"toDegrees",
"toRadians",
"to_avro",
"to_csv",
"to_date",
"to_json",
Expand Down
Loading

0 comments on commit 63bdb53

Please sign in to comment.