Skip to content

Commit

Permalink
Migrate PR builders from Jenkins to Github Actions
Browse files Browse the repository at this point in the history
  • Loading branch information
HyukjinKwon committed Jul 9, 2020
1 parent 1cb5bfc commit ba8029e
Show file tree
Hide file tree
Showing 20 changed files with 396 additions and 160 deletions.
267 changes: 175 additions & 92 deletions .github/workflows/master.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,148 +9,231 @@ on:
- master

jobs:
# Build: build Spark and run the tests for specified modules.
build:

name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
java: [ '1.8', '11' ]
hadoop: [ 'hadoop-2.7', 'hadoop-3.2' ]
hive: [ 'hive-1.2', 'hive-2.3' ]
exclude:
- java: '11'
hive: 'hive-1.2'
- hadoop: 'hadoop-3.2'
hive: 'hive-1.2'
name: Build Spark - JDK${{ matrix.java }}/${{ matrix.hadoop }}/${{ matrix.hive }}

java:
- 1.8
hadoop:
- hadoop3.2
hive:
- hive2.3
# TODO(SPARK-XXXXX): We don't test 'streaming-kinesis-asl' for now.
# Kinesis tests depends on external Amazon kinesis service.
# Note that the modules below are from sparktestsupport/modules.py.
modules:
- |-
core, unsafe, kvstore, avro,
network_common, network_shuffle, repl, launcher
examples, sketch, graphx
- |-
catalyst, sql
- |-
hive-thriftserver
- |-
streaming, sql-kafka-0-10, streaming-kafka-0-10
- |-
mllib-local, mllib
- |-
pyspark-sql, pyspark-mllib, pyspark-resource
- |-
pyspark-core, pyspark-streaming, pyspark-ml
- |-
sparkr
- |-
yarn, mesos, kubernetes, hadoop-cloud,
spark-ganglia-lgpl
# Here, we split Hive tests into some of heavy ones and the rest of them.
included-tags: [""]
excluded-tags: [""]
comment: ["- running all tests"]
include:
- modules: hive
java: 1.8
hadoop: hadoop3.2
hive: hive2.3
included-tags: org.apache.spark.tags.HeavyHiveTest
comment: "- running heavy tests"
- modules: hive
java: 1.8
hadoop: hadoop3.2
hive: hive2.3
excluded-tags: org.apache.spark.tags.HeavyHiveTest
comment: "- running non-heavy tests"
env:
TEST_ONLY_MODULES: ${{ matrix.modules }}
HADOOP_PROFILE: ${{ matrix.hadoop }}
HIVE_PROFILE: ${{ matrix.hive }}
# Github Actions' default miniconda
CONDA_PREFIX: /usr/share/miniconda
# Don't run the tests in parallel due to flakiness. See SparkParallelTestGrouping.
SERIAL_SBT_TESTS: 1
TEST_ONLY_EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
TEST_ONLY_INCLUDED_TAGS: ${{ matrix.included-tags }}
steps:
- uses: actions/checkout@master
# We split caches because GitHub Action Cache has a 400MB-size limit.
- uses: actions/cache@v1
- name: Checkout Spark repository
uses: actions/checkout@v2
# Cache local repositories. Note that Github Actions cache has a 2G limit.
- name: Cache Scala, SBT, Maven and Zinc
uses: actions/cache@v1
with:
path: build
key: build-${{ hashFiles('**/pom.xml') }}
restore-keys: |
build-
- uses: actions/cache@v1
with:
path: ~/.m2/repository/com
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-
- uses: actions/cache@v1
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository/org
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-
- uses: actions/cache@v1
with:
path: ~/.m2/repository/net
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-${{ hashFiles('**/pom.xml') }}
path: ~/.m2/repository
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-
- uses: actions/cache@v1
${{ matrix.java }}-${{ matrix.hadoop }}-maven-
- name: Cache Ivy local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository/io
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-${{ hashFiles('**/pom.xml') }}
path: ~/.ivy2/cache
key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }}
restore-keys: |
${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-
- name: Set up JDK ${{ matrix.java }}
${{ matrix.java }}-${{ matrix.hadoop }}-ivy-
- name: Install JDK ${{ matrix.java }}
uses: actions/setup-java@v1
with:
java-version: ${{ matrix.java }}
- name: Build with Maven
# PySpark
- name: Install PyPy3
# SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
# Note that order here matters because default python3 is overridden by pypy3.
uses: actions/setup-python@v2
if: contains(matrix.modules, 'pyspark') || matrix.modules == 'sql'
with:
python-version: pypy3
architecture: x64
- name: Install Python 2.7
uses: actions/setup-python@v2
if: contains(matrix.modules, 'pyspark') || matrix.modules == 'sql'
with:
python-version: 2.7
architecture: x64
- name: Install Python 3.6
uses: actions/setup-python@v2
if: contains(matrix.modules, 'pyspark') || matrix.modules == 'sql'
with:
python-version: 3.6
architecture: x64
- name: Install Python packages
if: contains(matrix.modules, 'pyspark') || matrix.modules == 'sql'
# PyArrow is not supported in PyPy yet, see ARROW-2651.
# scipy installation with PyPy fails for an unknown reason.
run: |
python3 -m pip install numpy pyarrow pandas scipy
python3 -m pip list
python2 -m pip install numpy pyarrow pandas scipy
python2 -m pip list
# Installing NumPy is flaky in PyPy.
pypy3 -m pip install numpy pandas
pypy3 -m pip list
# SparkR
- name: Install R 3.6
uses: r-lib/actions/setup-r@v1
if: contains(matrix.modules, 'sparkr')
with:
r-version: 3.6
- name: Install R packages
if: contains(matrix.modules, 'sparkr')
run: |
sudo apt-get install -y libcurl4-openssl-dev
sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')"
# Show installed packages in R.
sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]'
# Run the tests.
- name: "Run tests: ${{ matrix.modules }}"
run: |
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
export MAVEN_CLI_OPTS="--no-transfer-progress"
mkdir -p ~/.m2
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -P${{ matrix.hive }} -Phive-thriftserver -P${{ matrix.hadoop }} -Phadoop-cloud -Djava.version=${{ matrix.java }} install
./dev/run-tests --parallelism 2
rm -rf ~/.m2/repository/org/apache/spark
# Linters: run the linters and other static analysis.
lint:
name: Linters, licenses, dependencies
runs-on: ubuntu-latest
name: Linters (Java/Scala/Python), licenses, dependencies
steps:
- uses: actions/checkout@master
- uses: actions/setup-java@v1
- name: Install JDK 11
uses: actions/setup-java@v1
with:
java-version: '11'
- uses: actions/setup-python@v1
java-version: 11
- name: Install Python 3.6
uses: actions/setup-python@v2
with:
python-version: '3.x'
architecture: 'x64'
- name: Scala
run: ./dev/lint-scala
- name: Java
run: ./dev/lint-java
- name: Python
python-version: 3.6
architecture: x64
- name: Install Python linter dependencies
run: |
pip install flake8 sphinx numpy
./dev/lint-python
- name: License
run: ./dev/check-license
- name: Dependencies
run: ./dev/test-dependencies.sh

lintr:
runs-on: ubuntu-latest
name: Linter (R)
steps:
- uses: actions/checkout@master
- uses: actions/setup-java@v1
pip3 install flake8 sphinx numpy
- name: Install R 3.6
uses: r-lib/actions/setup-r@v1
with:
java-version: '11'
- uses: r-lib/actions/setup-r@v1
with:
r-version: '3.6.2'
- name: Install lib
r-version: 3.6
- name: Install R linter dependencies and SparkR
run: |
sudo apt-get install -y libcurl4-openssl-dev
- name: install R packages
run: |
sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')"
sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')"
- name: package and install SparkR
run: ./R/install-dev.sh
- name: lint-r
./R/install-dev.sh
- name: Scala linter
run: ./dev/lint-scala
- name: Java linter
run: ./dev/lint-java
- name: Python linter
run: ./dev/lint-python
- name: R linter
run: ./dev/lint-r
- name: License test
run: ./dev/check-license
- name: Dependencies test
run: ./dev/test-dependencies.sh

# Documentation build.
docs:
name: Build documentation
runs-on: ubuntu-latest
name: Generate documents
steps:
- uses: actions/checkout@master
- uses: actions/cache@v1
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: docs-maven-repo-${{ hashFiles('**/pom.xml') }}
restore-keys: |
docs-maven-repo-
- uses: actions/setup-java@v1
docs-maven-
- name: Install JDK 1.8
uses: actions/setup-java@v1
with:
java-version: '1.8'
- uses: actions/setup-python@v1
java-version: 1.8
- name: Install Python 3.6
uses: actions/setup-python@v2
with:
python-version: '3.x'
architecture: 'x64'
- uses: actions/setup-ruby@v1
python-version: 3.6
architecture: x64
- name: Install Ruby 2.7
uses: actions/setup-ruby@v1
with:
ruby-version: '2.7'
- uses: r-lib/actions/setup-r@v1
ruby-version: 2.7
- name: Install R 3.6
uses: r-lib/actions/setup-r@v1
with:
r-version: '3.6.2'
- name: Install lib and pandoc
r-version: 3.6
- name: Install dependencies
run: |
sudo apt-get install -y libcurl4-openssl-dev pandoc
- name: Install packages
run: |
pip install sphinx mkdocs numpy
gem install jekyll jekyll-redirect-from rouge
sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')"
- name: Run jekyll build
sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
- name: Run documentation build
run: |
cd docs
jekyll build
30 changes: 30 additions & 0 deletions common/tags/src/test/java/org/apache/spark/tags/HeavyHiveTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.tags;

import org.scalatest.TagAnnotation;

import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;

@TagAnnotation
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.METHOD, ElementType.TYPE})
public @interface HeavyHiveTest { }
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,7 @@ class MasterSuite extends SparkFunSuite
}
}

test("SPARK-27510: Master should avoid dead loop while launching executor failed in Worker") {
ignore("SPARK-27510: Master should avoid dead loop while launching executor failed in Worker") {
val master = makeAliveMaster()
var worker: MockExecutorLaunchFailWorker = null
try {
Expand Down
8 changes: 6 additions & 2 deletions dev/run-pip-tests
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,12 @@ for python in "${PYTHON_EXECS[@]}"; do
VIRTUALENV_PATH="$VIRTUALENV_BASE"/$python
rm -rf "$VIRTUALENV_PATH"
if [ -n "$USE_CONDA" ]; then
if [ -f "$CONDA_PREFIX/etc/profile.d/conda.sh" ]; then
# See also https://github.com/conda/conda/issues/7980
source $CONDA_PREFIX/etc/profile.d/conda.sh
fi
conda create -y -p "$VIRTUALENV_PATH" python=$python numpy pandas pip setuptools
source activate "$VIRTUALENV_PATH"
conda activate "$VIRTUALENV_PATH"
else
mkdir -p "$VIRTUALENV_PATH"
virtualenv --python=$python "$VIRTUALENV_PATH"
Expand Down Expand Up @@ -120,7 +124,7 @@ for python in "${PYTHON_EXECS[@]}"; do

# conda / virtualenv environments need to be deactivated differently
if [ -n "$USE_CONDA" ]; then
source deactivate
conda deactivate
else
deactivate
fi
Expand Down
Loading

0 comments on commit ba8029e

Please sign in to comment.