Skip to content

Gsk

Gsk #187

Workflow file for this run

on:
push:
branches: main
pull_request:
branches: main
name: Spark-Connect
jobs:
Spark-Tests:
runs-on: ubuntu-latest
name: ${{ matrix.config.name }}
strategy:
fail-fast: false
matrix:
config:
- {spark: '3.5.2', pyspark: '3.5', hadoop: '3', name: 'PySpark 3.5'}
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes
SPARK_VERSION: ${{ matrix.config.pyspark }}
HADOOP_VERSION: ${{ matrix.config.hadoop }}
PYSPARK_VERSION: ${{ matrix.config.pyspark }}
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
DATABRICKS_HOST: "https://rstudio-partner-posit-default.cloud.databricks.com"
DATABRICKS_CLUSTER_ID: "0916-215603-ofitqny9"
steps:
- uses: actions/checkout@v3
- uses: r-lib/actions/setup-r@v2
with:
r-version: 'release'
use-public-rspm: true
- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: |
any::devtools
any::arrow
needs: check
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install Venv
run: |
sudo apt-get install python3-venv
- name: Cache Spark
id: cache-spark
uses: actions/cache@v3
with:
path: /home/runner/spark/spark-${{ matrix.config.spark }}-bin-hadoop${{ matrix.config.hadoop }}
key: sparklyr-spark-${{ matrix.config.spark }}-bin-hadoop${{ matrix.config.hadoop }}
- name: Install Spark (via sparklyr)
if: steps.cache-spark.outputs.cache-hit != 'true'
run: |
devtools::install_github("sparklyr/sparklyr")
sparklyr::spark_install(version = Sys.getenv("SPARK_VERSION"))
print(sparklyr::spark_install_find(Sys.getenv("SPARK_VERSION"))$sparkVersionDir)
shell: Rscript {0}
- name: Cache Scala
id: cache-scala
uses: actions/cache@v3
with:
path: /home/runner/scala/
key: scala-2
- name: Install Scala (via sparklyr)
if: steps.cache-scala.outputs.cache-hit != 'true'
run: |
sparklyr::download_scalac()
shell: Rscript {0}
- name: R Session Info
run: sessionInfo()
shell: Rscript {0}
- name: R Environment Variables
run: Sys.getenv()
shell: Rscript {0}
- name: R Installed Packages
run: |
m_pkgs <- installed.packages()
t_pkgs <- as.data.frame(m_pkgs, row.names = FALSE)
print(t_pkgs[, c("Package", "Version")])
shell: Rscript {0}
- name: R Tests
run: |
devtools::load_all()
devtools::test(reporter = sparklyr_reporter())
shell: Rscript {0}