Merge pull request #345 from roocs/fix-tests

Streamline fixtures, use pytest-xdist, drop Python3.8
roocs · Oct 3, 2024 · 898c672 · 898c672
2 parents 002941a + 6906e97
commit 898c672
Show file tree

Hide file tree

Showing 43 changed files with 2,415 additions and 1,842 deletions.
diff --git a/.github/workflows/cache_cleaner.yml b/.github/workflows/cache_cleaner.yml
@@ -0,0 +1,49 @@
+# Example taken from https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#managing-caches
+name: Cleanup Caches on PR Merge
+on:
+  pull_request:
+    types:
+      - closed
+
+permissions:
+  contents: read
+
+jobs:
+  cleanup:
+    runs-on: ubuntu-latest
+    permissions:
+      actions: write
+    steps:
+      - name: Harden Runner
+        uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1
+        with:
+          disable-sudo: true
+          egress-policy: block
+          allowed-endpoints: >
+            api.github.com:443
+            github.com:443
+            objects.githubusercontent.com:443
+
+      - name: Checkout Repository
+        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+
+      - name: Cleanup
+        run: |
+          gh extension install actions/gh-actions-cache
+
+          REPO=${{ github.repository }}
+          BRANCH="refs/pull/${{ github.event.pull_request.number }}/merge"
+
+          echo "Fetching list of cache key"
+          cacheKeysForPR=$(gh actions-cache list -R $REPO -B $BRANCH -L 100 | cut -f 1 )
+
+          ## Setting this to not fail the workflow while deleting cache keys.
+          set +e
+          echo "Deleting caches..."
+          for cacheKey in $cacheKeysForPR
+          do
+              gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm
+          done
+          echo "Done"
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -11,6 +11,9 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: ${{ github.ref != 'refs/heads/master' }}
 
+env:
+  ESGF_TEST_DATA_VERSION: v1
+
 permissions:
   contents: read
 
@@ -29,9 +32,16 @@ jobs:
         uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
         with:
           python-version: "3.9"
+          cache: pip
       - name: Install tox
         run: |
-          python -m pip install tox~=4.0
+          python -m pip install tox~=4.18.0
+      - name: Environment Caching
+        uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
+        with:
+          path: |
+            .tox
+          key: ${{ runner.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-lint-${{ env.ESGF_TEST_DATA_VERSION }}
       - name: Run linting suite
         run: |
           python -m tox -e ${{ matrix.tox-env }}
@@ -44,8 +54,6 @@ jobs:
     strategy:
       matrix:
         include:
-          - python-version: "3.8"
-            tox-env: py38
           - python-version: "3.9"
             tox-env: py39
           - python-version: "3.10"
@@ -61,9 +69,18 @@ jobs:
         uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
         with:
           python-version: ${{ matrix.python-version }}
+          cache: "pip"
       - name: Install tox
         run: |
-          python -m pip install tox~=4.0
+          python -m pip install tox~=4.18.0
+      - name: Environment Caching
+        uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
+        with:
+          path: |
+            ~/.cache/mini-esgf-data
+            ~/.cache/xclim-testdata
+            .tox
+          key: ${{ runner.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-Python${{ matrix.python-version }}-${{ env.ESGF_TEST_DATA_VERSION }}
       - name: Test with tox
         run: |
           python -m tox -e ${{ matrix.tox-env }}
@@ -104,20 +121,28 @@ jobs:
           environment-file: environment.yml
           create-args: >-
             python=${{ matrix.python-version }}
+          micromamba-version: "1.5.10-0"  # pinned to avoid the breaking changes with mamba and micromamba (2.0.0).
       - name: Install CLISOPS
         run: |
           python -m pip install --no-user --editable ".[dev]"
       - name: Install upstream dependencies
         if: ${{ matrix.upstream }}
         run: |
           python -m pip install -r requirements_upstream.txt
+      - name: Test Data Caching
+        uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
+        with:
+          path: |
+            ~/.cache/mini-esgf-data
+            ~/.cache/xclim-testdata
+          key: ${{ runner.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-conda-Python${{ matrix.python-version }}-${{ env.ESGF_TEST_DATA_VERSION }}
       - name: Check versions
         run: |
           micromamba list
           python -m pip check || true
       - name: Test with conda
         run: |
-          python -m pytest --cov tests
+          python -m pytest --numprocesses=logical --durations=10 --cov=clisops --cov-report=term-missing
 
   finish:
     needs:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -6,7 +6,7 @@ repos:
     rev: v3.17.0
     hooks:
       - id: pyupgrade
-        args: [ '--py38-plus' ]
+        args: [ '--py39-plus' ]
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.6.0
     hooks:
@@ -72,5 +72,5 @@ ci:
   autoupdate_branch: ''
   autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate'
   autoupdate_schedule: quarterly
-  skip: []
+  skip: [ ]
   submodules: false
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
@@ -204,7 +204,7 @@ Before you submit a pull request, please follow these guidelines:
     Remember to add the feature or bug fixes explanation to the appropriate section in the HISTORY.rst.
 
 #.
-    The pull request should work for Python 3.8+ as well as raise test coverage.
+    The pull request should work for Python 3.9+ as well as raise test coverage.
     Pull requests are also checked for documentation build status and for `PEP8`_ compliance.
 
     The build statuses and build errors for pull requests can be found at:

diff --git a/HISTORY.rst b/HISTORY.rst
@@ -1,6 +1,29 @@
 Version History
 ===============
 
+v0.14.0 (2024-10-03)
+--------------------
+
+New Features
+^^^^^^^^^^^^
+* `clisops` now makes use of `pytest-xdist` for parallel testing. This can be enabled using `--numprocesses={int}`. See the `pytest-xdist documentation <https://pytest-xdist.readthedocs.io/en/latest/>`_ for more information (#345).
+* Testing data caching is now handled by `pooch` and testing data registries ('stratus' for `roocs/mini-esgf-data` and 'nimbus' for `Ouranosinc/xclim-testdata`) (#345).
+* `clisops` coding conventions now use Python3.9+ conventions (#345).
+
+Breaking Changes
+^^^^^^^^^^^^^^^^
+* `clisops` has dropped support for Python 3.8 (#345).
+* `clisops` now requires `filelock >=3.15.4`, `jinja2 >=2.11`, `numpy >=1.17`, and `scipy >=1.6` (#345).
+* `clisops` no longer requires `gitpython >=3.1.30` and `requests >=2.0` (#345).
+* The development dependencies have been updated to include `deptry >=0.20.0` and `pytest-xdist[psutil] >=3.2` (#345).
+* `netCDF4` has been moved from core dependency to development dependency (#345).
+
+Other Changes
+^^^^^^^^^^^^^
+* `clisops.utils.testing` has replaced `clisops.utils.tutorial`. This submodule contains several functions and variables for allowing user control over testing data fetching (#345).
+* The `_common` testing tools have been migrated to `clisops.utils.testing` or rewritten as `pytest` fixtures (#345).
+* Testing data fetching now uses worker threads to copy cached data to threadsafe data caches that are separated by worker (#345).
+
 v0.13.1 (2024-08-20)
 --------------------
 

diff --git a/Makefile b/Makefile
@@ -51,9 +51,10 @@ clean-test: ## remove test and coverage artifacts
 	rm -fr .pytest_cache
 
 lint: ## check style with flake8
-	black --check clisops tests
-	isort --check-only clisops tests
-	flake8 --config=.flake8 clisops tests
+	python -m black --check clisops tests
+	python -m isort --check-only clisops tests
+	python -m flake8 --config=.flake8 clisops tests
+	python -m deptry .
 
 test: ## run tests quickly with the default Python
 	python -m pytest

diff --git a/clisops/core/average.py b/clisops/core/average.py
@@ -1,8 +1,9 @@
 """Average module."""
 
 import warnings
+from collections.abc import Sequence
 from pathlib import Path
-from typing import Sequence, Tuple, Union
+from typing import Union
 
 import cf_xarray  # noqa
 import geopandas as gpd
@@ -15,11 +16,10 @@
     known_coord_types,
 )
 
+from clisops.core.regrid import XESMF_MINIMUM_VERSION
+from clisops.core.subset import shape_bbox_indexer
 from clisops.utils.time_utils import create_time_bounds
 
-from .regrid import XESMF_MINIMUM_VERSION
-from .subset import shape_bbox_indexer
-
 __all__ = ["average_over_dims", "average_shape", "average_time"]
 
 # see https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects

diff --git a/clisops/core/regrid.py b/clisops/core/regrid.py
@@ -99,8 +99,7 @@ def weights_cache_init(
         weights_dir = config["clisops:grid_weights"]["local_weights_dir"]
 
     # Create directory tree if required
-    if not os.path.isdir(weights_dir):
-        os.makedirs(weights_dir)
+    os.makedirs(weights_dir, exist_ok=True)
 
 
 # Initialize weights cache as defined in the clisops configuration (roocs.ini)
@@ -638,7 +637,7 @@ def detect_type(self) -> str:
                         )
                         and all(
                             [
-                                self.ds.dims[dim] > 2
+                                self.ds.sizes[dim] > 2
                                 for dim in [
                                     self.ds[self.lon_bnds].dims[-1],
                                     self.ds[self.lat_bnds].dims[-1],

diff --git a/clisops/core/subset.py b/clisops/core/subset.py
@@ -2,9 +2,10 @@
 
 import numbers
 import re
+from collections.abc import Sequence
 from functools import wraps
 from pathlib import Path
-from typing import Callable, Dict, List, Optional, Sequence, Tuple, Union
+from typing import Callable, Optional, Union
 
 import cf_xarray  # noqa
 import geopandas as gpd
@@ -1186,8 +1187,8 @@ def subset_shape(
 @check_lons
 def subset_bbox(
     da: Union[xarray.DataArray, xarray.Dataset],
-    lon_bnds: Union[np.array, Tuple[Optional[float], Optional[float]]] = None,
-    lat_bnds: Union[np.array, Tuple[Optional[float], Optional[float]]] = None,
+    lon_bnds: Union[np.array, tuple[Optional[float], Optional[float]]] = None,
+    lat_bnds: Union[np.array, tuple[Optional[float], Optional[float]]] = None,
     start_date: Optional[str] = None,
     end_date: Optional[str] = None,
     first_level: Optional[Union[float, int]] = None,
@@ -1371,8 +1372,8 @@ def subset_bbox(
 
 
 def assign_bounds(
-    bounds: Tuple[Optional[float], Optional[float]], coord: xarray.DataArray
-) -> Tuple[Optional[float], Optional[float]]:
+    bounds: tuple[Optional[float], Optional[float]], coord: xarray.DataArray
+) -> tuple[Optional[float], Optional[float]]:
     """Replace unset boundaries by the minimum and maximum coordinates.
 
     Parameters
@@ -1396,7 +1397,7 @@ def assign_bounds(
     return bn, bx
 
 
-def in_bounds(bounds: Tuple[float, float], coord: xarray.DataArray) -> xarray.DataArray:
+def in_bounds(bounds: tuple[float, float], coord: xarray.DataArray) -> xarray.DataArray:
     """Check which coordinates are within the boundaries.
 
     Parameters
@@ -1417,9 +1418,9 @@ def in_bounds(bounds: Tuple[float, float], coord: xarray.DataArray) -> xarray.Da
 
 def _check_desc_coords(
     coord: xarray.Dataset,
-    bounds: Union[Tuple[float, float], List[np.ndarray]],
+    bounds: Union[tuple[float, float], list[np.ndarray]],
     dim: str,
-) -> Tuple[float, float]:
+) -> tuple[float, float]:
     """If Dataset coordinates are descending, and bounds are ascending, reverse bounds."""
     if np.all(coord.diff(dim=dim) < 0) and len(coord) > 1 and bounds[1] > bounds[0]:
         bounds = np.flip(bounds)
@@ -1716,7 +1717,7 @@ def subset_time_by_values(
 def subset_time_by_components(
     da: Union[xarray.DataArray, xarray.Dataset],
     *,
-    time_components: Union[Dict, None] = None,
+    time_components: Union[dict, None] = None,
 ) -> xarray.DataArray:
     """Subsets by one or more time components (year, month, day etc).
 

diff --git a/clisops/ops/average.py b/clisops/ops/average.py
@@ -1,5 +1,6 @@
+from collections.abc import Sequence
 from pathlib import Path
-from typing import List, Optional, Sequence, Tuple, Union
+from typing import Optional, Union
 
 import geopandas as gpd
 import xarray as xr
@@ -50,7 +51,7 @@ def average_over_dims(
     output_type: str = "netcdf",
     split_method: str = "time:auto",
     file_namer: str = "standard",
-) -> List[Union[xr.Dataset, str]]:
+) -> list[Union[xr.Dataset, str]]:
     """Calculate an average over given dimensions.
 
     Parameters
@@ -125,7 +126,7 @@ def average_shape(
     output_type: str = "netcdf",
     split_method: str = "time:auto",
     file_namer: str = "standard",
-) -> List[Union[xr.Dataset, str]]:
+) -> list[Union[xr.Dataset, str]]:
     """Calculate a spatial average over a given shape.
 
     Parameters
@@ -201,7 +202,7 @@ def average_time(
     output_type: str = "netcdf",
     split_method: str = "time:auto",
     file_namer: str = "standard",
-) -> List[Union[xr.Dataset, str]]:
+) -> list[Union[xr.Dataset, str]]:
     """
 
     Parameters

diff --git a/clisops/ops/base_operation.py b/clisops/ops/base_operation.py
@@ -1,10 +1,10 @@
 from collections import ChainMap
 from pathlib import Path
-from typing import List, Optional, Union
+from typing import Optional, Union
 
 import xarray as xr
 from loguru import logger
-from roocs_utils.xarray_utils.xarray_utils import get_main_variable, open_xr_dataset
+from roocs_utils.xarray_utils.xarray_utils import open_xr_dataset
 
 from clisops.utils.common import expand_wildcards
 from clisops.utils.file_namers import get_file_namer
@@ -199,7 +199,7 @@ def _remove_redundant_coordinates_attr(ds):
                 ds[var].attrs.pop("coordinates", None)
         return ds
 
-    def process(self) -> List[Union[xr.Dataset, Path]]:
+    def process(self) -> list[Union[xr.Dataset, Path]]:
         """Main processing method used by all subclasses.
 
         Returns

diff --git a/clisops/ops/regrid.py b/clisops/ops/regrid.py
@@ -1,6 +1,6 @@
 from datetime import datetime as dt
 from pathlib import Path
-from typing import List, Optional, Union
+from typing import Optional, Union
 
 import xarray as xr
 from loguru import logger
@@ -179,7 +179,7 @@ def regrid(
     split_method: Optional[str] = "time:auto",
     file_namer: Optional[str] = "standard",
     keep_attrs: Optional[Union[bool, str]] = True,
-) -> List[Union[xr.Dataset, str]]:
+) -> list[Union[xr.Dataset, str]]:
     """Regrid specified input file or xarray object.
 
     Parameters