diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 791a8037e77f3..1b44839f98b03 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -102,37 +102,40 @@ jobs: java-version: ${{ matrix.java }} # PySpark - name: Install PyPy3 - # SQL component also has Python related tests, for example, IntegratedUDFTestUtils. # Note that order of Python installations here matters because default python3 is # overridden by pypy3. uses: actions/setup-python@v2 - if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) + if: contains(matrix.modules, 'pyspark') with: python-version: pypy3 architecture: x64 - - name: Install Python 3.6 + - name: Install Python 3.8 uses: actions/setup-python@v2 - if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) + if: contains(matrix.modules, 'pyspark') with: - python-version: 3.6 + python-version: 3.8 architecture: x64 - name: Install Python 2.7 uses: actions/setup-python@v2 - if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) + # Yarn has a Python specific test too, for example, YarnClusterSuite. + if: contains(matrix.modules, 'yarn') || contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) with: python-version: 2.7 architecture: x64 - - name: Install Python packages - if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) + - name: Install Python packages (Python 3.8 and PyPy3) + if: contains(matrix.modules, 'pyspark') # PyArrow is not supported in PyPy yet, see ARROW-2651. # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason. run: | - python3 -m pip install numpy "pyarrow==0.14.*" pandas scipy - python3 -m pip list - python2 -m pip install numpy "pyarrow==0.14.*" pandas scipy - python2 -m pip list + python3.8 -m pip install numpy "pyarrow==0.14.*" pandas scipy + python3.8 -m pip list pypy3 -m pip install numpy pandas pypy3 -m pip list + - name: Install Python packages (Python 2.7) + if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) + run: | + python2.7 -m pip install numpy "pyarrow==0.14.*" pandas scipy + python2.7 -m pip list # SparkR - name: Install R 3.6 uses: r-lib/actions/setup-r@v1 diff --git a/python/run-tests.py b/python/run-tests.py index c34e48aad1211..fdb7a40102079 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -160,7 +160,7 @@ def run_individual_python_test(target_dir, test_name, pyspark_python): def get_default_python_executables(): - python_execs = [x for x in ["python2.7", "python3.6", "pypy"] if which(x)] + python_execs = [x for x in ["python2.7", "python3.8", "pypy"] if which(x)] if "python2.7" not in python_execs: LOGGER.warning("Not testing against `python2.7` because it could not be found; falling" " back to `python` instead")