diff --git a/.github/workflows/r-ci.yml b/.github/workflows/r-ci.yml index 30a95deed9..4ff8af355f 100644 --- a/.github/workflows/r-ci.yml +++ b/.github/workflows/r-ci.yml @@ -5,10 +5,12 @@ on: paths: - '**' - '!**.md' - - '!apis/python/**' - - '!docs/**' - '!.github/**' - '.github/workflows/r-ci.yml' + - '!.pre-commit-config.yaml' + - '!apis/python/**' + - '!docs/**' + - '!LICENSE'' push: branches: - main diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 26fbea1dd7..ca264ac706 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,10 @@ repos: hooks: - id: mypy additional_dependencies: - - pandas-stubs + # Pandas types changed between 1.x and 2.x. Our setup.py permits both, but for type-checking purposes we use the + # Pandas 2.x types (e.g. `pd.Series[Any]`). See `_types.py` or https://github.com/single-cell-data/TileDB-SOMA/issues/2839 + # for more info. + - "pandas-stubs>=2" - "somacore==1.0.13" - types-setuptools args: ["--config-file=apis/python/pyproject.toml", "apis/python/src", "apis/python/devtools"] diff --git a/apis/python/src/tiledbsoma/_indexer.py b/apis/python/src/tiledbsoma/_indexer.py index f38868e02d..bf289d6777 100644 --- a/apis/python/src/tiledbsoma/_indexer.py +++ b/apis/python/src/tiledbsoma/_indexer.py @@ -10,29 +10,20 @@ from tiledbsoma import pytiledbsoma as clib +from ._types import PDSeries + if TYPE_CHECKING: from .options import SOMATileDBContext - IndexerDataType = Union[ - npt.NDArray[np.int64], - pa.Array, - pa.IntegerArray, - pd.Series[Any], - pd.arrays.IntegerArray, - pa.ChunkedArray, - List[int], - ] - -else: - IndexerDataType = Union[ - npt.NDArray[np.int64], - pa.Array, - pa.IntegerArray, - pd.Series, - pd.arrays.IntegerArray, - pa.ChunkedArray, - List[int], - ] +IndexerDataType = Union[ + npt.NDArray[np.int64], + pa.Array, + pa.IntegerArray, + PDSeries, + pd.arrays.IntegerArray, + pa.ChunkedArray, + List[int], +] def tiledbsoma_build_index( diff --git a/apis/python/src/tiledbsoma/_types.py b/apis/python/src/tiledbsoma/_types.py index 5afaae971f..ab40eb5890 100644 --- a/apis/python/src/tiledbsoma/_types.py +++ b/apis/python/src/tiledbsoma/_types.py @@ -17,26 +17,35 @@ from typing_extensions import Literal if TYPE_CHECKING: + # `pd.{Series,Index}` require type parameters iff `pandas>=2`. Our pandas dependency (in `setup.py`) is unpinned, + # which generally resolves to `pandas>=2`, but may be pandas<2 if something else in the user's environment requires + # that. For type-checking purposes, `.pre-commit-config.yaml` specifies `pandas-stubs>=2`, and we type-check against + # the `pandas>=2` types here. + PDSeries = pd.Series[Any] + PDIndex = pd.Index[Any] + NPInteger = np.integer[npt.NBitBase] NPFloating = np.floating[npt.NBitBase] NPNDArray = npt.NDArray[np.number[npt.NBitBase]] - # A pd.Series of "Any" type will raise mypy error: - PDSeries = pd.Series[Any] # type: ignore[misc] else: + # When not-type-checking, but running with `pandas>=2`, the "missing" type-params don't affect anything. + PDSeries = pd.Series + PDIndex = pd.Index + + # Type subscription requires python >= 3.9, and we currently only type-check against 3.11. + # TODO: remove these (and unify around subscripted types above) when we drop support for 3.8. NPInteger = np.integer NPFloating = np.floating + # This alias likely needs to remain special-cased, even in Python ≥3.11, as tests pass the `Matrix` type alias + # (which includes `NPNDArray` via `DenseMatrix`) to `isinstance`, causing error "argument 2 cannot be a + # parameterized generic". NPNDArray = np.ndarray - PDSeries = pd.Series - Path = Union[str, pathlib.Path] Ids = Union[List[str], List[bytes], List[int]] -if TYPE_CHECKING: - Labels = Union[Sequence[str], pd.Index[Any]] -else: - Labels = Union[Sequence[str], pd.Index] +Labels = Union[Sequence[str], PDIndex] NTuple = Tuple[int, ...]