Skip to content

Python build

Python build #3300

Workflow file for this run

name: Python build
on:
push:
branches:
- master
paths:
- 'common/**'
- 'spark/**'
- 'spark-shaded/**'
- 'pom.xml'
- 'python/**'
- '.github/workflows/python.yml'
pull_request:
branches:
- '*'
paths:
- 'common/**'
- 'spark/**'
- 'spark-shaded/**'
- 'pom.xml'
- 'python/**'
- '.github/workflows/python.yml'
env:
MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=60
JAI_CORE_VERSION: "1.1.3"
JAI_CODEC_VERSION: "1.1.3"
JAI_IMAGEIO_VERSION: "1.1"
DO_NOT_TRACK: true
permissions:
contents: read
jobs:
build:
runs-on: ubuntu-22.04
strategy:
matrix:
include:
- spark: '3.5.0'
scala: '2.12.8'
python: '3.10'
hadoop: '3'
shapely: '1'
- spark: '3.5.0'
scala: '2.12.8'
python: '3.10'
hadoop: '3'
- spark: '3.5.0'
scala: '2.12.8'
python: '3.9'
hadoop: '3'
- spark: '3.5.0'
scala: '2.12.8'
python: '3.8'
hadoop: '3'
- spark: '3.4.0'
scala: '2.12.8'
python: '3.10'
hadoop: '3'
- spark: '3.4.0'
scala: '2.12.8'
python: '3.9'
hadoop: '3'
- spark: '3.4.0'
scala: '2.12.8'
python: '3.8'
hadoop: '3'
- spark: '3.4.0'
scala: '2.12.8'
python: '3.7'
hadoop: '3'
- spark: '3.4.0'
scala: '2.12.8'
python: '3.7'
hadoop: '3'
shapely: '1'
- spark: '3.3.0'
scala: '2.12.8'
python: '3.8'
hadoop: '3'
- spark: '3.2.0'
scala: '2.12.8'
python: '3.7'
hadoop: '2.7'
- spark: '3.1.2'
scala: '2.12.8'
python: '3.7'
hadoop: '2.7'
- spark: '3.0.3'
scala: '2.12.8'
python: '3.7'
hadoop: '2.7'
steps:
- uses: actions/checkout@v4
- uses: actions/setup-java@v4
with:
distribution: 'zulu'
java-version: '8'
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python }}
- name: Cache Maven packages
uses: actions/cache@v3
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-m2
- env:
SPARK_VERSION: ${{ matrix.spark }}
SCALA_VERSION: ${{ matrix.scala }}
run: |
SPARK_COMPAT_VERSION=${SPARK_VERSION:0:3}
mvn -q clean install -DskipTests -Dspark=${SPARK_COMPAT_VERSION} -Dscala=${SCALA_VERSION:0:4} -Dgeotools
- run: sudo apt-get -y install python3-pip python-dev-is-python3
- run: sudo pip3 install -U setuptools
- run: sudo pip3 install -U wheel
- run: sudo pip3 install -U virtualenvwrapper
- run: python3 -m pip install pipenv
- run: cd python; python3 setup.py build_ext --inplace
- env:
SPARK_VERSION: ${{ matrix.spark }}
PYTHON_VERSION: ${{ matrix.python }}
SHAPELY_VERSION: ${{ matrix.shapely }}
run: |
cd python
if [ "${SHAPELY_VERSION}" == "1" ]; then
echo "Patching Pipfile to use Shapely 1.x"
sed -i 's/^shapely.*$/shapely="<2.0.0"/g' Pipfile
fi
export WORKON_HOME=./.venv
export PIPENV_CUSTOM_VENV_NAME=python-${PYTHON_VERSION}
pipenv --python ${PYTHON_VERSION}
pipenv install pyspark==${SPARK_VERSION}
pipenv install --dev
pipenv graph
pipenv run pip show pyspark
- env:
SPARK_VERSION: ${{ matrix.spark }}
HADOOP_VERSION: ${{ matrix.hadoop }}
PYTHON_VERSION: ${{ matrix.python }}
run: |
wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_core/${JAI_CORE_VERSION}/jai_core-${JAI_CORE_VERSION}.jar
wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_codec/${JAI_CODEC_VERSION}/jai_codec-${JAI_CODEC_VERSION}.jar
wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_imageio/${JAI_IMAGEIO_VERSION}/jai_imageio-${JAI_IMAGEIO_VERSION}.jar
mv -v jai_core-${JAI_CORE_VERSION}.jar .venv/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars
mv -v jai_codec-${JAI_CODEC_VERSION}.jar .venv/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars
mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar .venv/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars
- env:
SPARK_VERSION: ${{ matrix.spark }}
HADOOP_VERSION: ${{ matrix.hadoop }}
run: find spark-shaded/target -name sedona-*.jar -exec cp {} .venv/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars/ \;
- name: Setup tmate session
uses: mxschmitt/action-tmate@v3.19
- env:
SPARK_VERSION: ${{ matrix.spark }}
HADOOP_VERSION: ${{ matrix.hadoop }}
PYTHON_VERSION: ${{ matrix.python }}
run: (export SPARK_HOME=.venv/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark;cd python;pipenv run pytest tests)
- env:
SPARK_VERSION: ${{ matrix.spark }}
HADOOP_VERSION: ${{ matrix.hadoop }}
run: |
if [ ! -f "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/sbin/start-connect-server.sh" ]
then
echo "Skipping connect tests for Spark $SPARK_VERSION"
exit
fi
export SPARK_HOME=.venv/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark
export SPARK_REMOTE=local
cd python
pipenv install "pyspark[connect]==${SPARK_VERSION}"
pipenv run pytest tests/sql/test_dataframe_api.py