Skip to content

implement and test mean #44

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,7 @@
release = version

# default settings
source_suffix = ".rst"
master_doc = "index"
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

extensions = [
"sphinx.ext.intersphinx",
"sphinx.ext.autodoc",
Expand Down
4 changes: 2 additions & 2 deletions src/fast_array_utils/_validation.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# SPDX-License-Identifier: MPL-2.0
from __future__ import annotations

from numbers import Integral
import numpy as np


def validate_axis(axis: int | None) -> None:
if axis is None:
return
if not isinstance(axis, Integral): # pragma: no cover
if not isinstance(axis, int | np.integer): # pragma: no cover
msg = "axis must be integer or None."
raise TypeError(msg)
if axis not in (0, 1): # pragma: no cover
Expand Down
10 changes: 6 additions & 4 deletions src/fast_array_utils/conv/_to_dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,17 @@ def _to_dense(


@_to_dense.register(types.CSBase) # type: ignore[call-overload,misc]
def _(x: types.CSBase, /, *, to_memory: bool = False) -> NDArray[Any]:
def _to_dense_cs(x: types.CSBase, /, *, to_memory: bool = False) -> NDArray[Any]:
from . import scipy

del to_memory # it already is
return scipy.to_dense(x)


@_to_dense.register(types.DaskArray)
def _(x: types.DaskArray, /, *, to_memory: bool = False) -> NDArray[Any] | types.DaskArray:
def _to_dense_dask(
x: types.DaskArray, /, *, to_memory: bool = False
) -> NDArray[Any] | types.DaskArray:
if TYPE_CHECKING:
from dask.array.core import map_blocks
else:
Expand All @@ -109,7 +111,7 @@ def _(x: types.DaskArray, /, *, to_memory: bool = False) -> NDArray[Any] | types


@_to_dense.register(types.OutOfCoreDataset)
def _(
def _to_dense_ooc(
x: types.OutOfCoreDataset[types.CSBase | NDArray[Any]], /, *, to_memory: bool = False
) -> NDArray[Any]:
if not to_memory:
Expand All @@ -120,7 +122,7 @@ def _(


@_to_dense.register(types.CupyArray | types.CupySparseMatrix) # type: ignore[call-overload,misc]
def _(
def _to_dense_cupy(
x: types.CupyArray | types.CupySparseMatrix, /, *, to_memory: bool = False
) -> NDArray[Any] | types.CupyArray:
x = x.toarray() if isinstance(x, types.CupySparseMatrix) else x
Expand Down
3 changes: 2 additions & 1 deletion src/fast_array_utils/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from __future__ import annotations

from ._is_constant import is_constant
from ._mean import mean
from ._sum import sum


__all__ = ["is_constant", "sum"]
__all__ = ["is_constant", "mean", "sum"]
16 changes: 11 additions & 5 deletions src/fast_array_utils/stats/_is_constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ def _is_constant(


@_is_constant.register(np.ndarray)
def _(a: NDArray[Any], /, *, axis: Literal[0, 1, None] = None) -> bool | NDArray[np.bool]:
def _is_constant_ndarray(
a: NDArray[Any], /, *, axis: Literal[0, 1, None] = None
) -> bool | NDArray[np.bool]:
# Should eventually support nd, not now.
match axis:
case None:
Expand All @@ -87,7 +89,9 @@ def _is_constant_rows(a: NDArray[Any]) -> NDArray[np.bool]:


@_is_constant.register(types.CSBase) # type: ignore[call-overload,misc]
def _(a: types.CSBase, /, *, axis: Literal[0, 1, None] = None) -> bool | NDArray[np.bool]:
def _is_constant_cs(
a: types.CSBase, /, *, axis: Literal[0, 1, None] = None
) -> bool | NDArray[np.bool]:
if len(a.shape) == 1: # pragma: no cover
msg = "array must have 2 dimensions"
raise ValueError(msg)
Expand All @@ -102,11 +106,11 @@ def _(a: types.CSBase, /, *, axis: Literal[0, 1, None] = None) -> bool | NDArray
a = a.T.tocsr()
case 1, "csc":
a = a.T.tocsc()
return _is_constant_csr_rows(a.data, a.indptr, shape)
return _is_constant_cs_major(a.data, a.indptr, shape)


@numba.njit(cache=True)
def _is_constant_csr_rows(
def _is_constant_cs_major(
data: NDArray[np.number[Any]],
indptr: NDArray[np.integer[Any]],
shape: tuple[int, int],
Expand All @@ -125,7 +129,9 @@ def _is_constant_csr_rows(


@_is_constant.register(types.DaskArray)
def _(a: types.DaskArray, /, *, axis: Literal[0, 1, None] = None) -> types.DaskArray:
def _is_constant_dask(
a: types.DaskArray, /, *, axis: Literal[0, 1, None] = None
) -> types.DaskArray:
import dask.array as da

if axis is not None:
Expand Down
55 changes: 55 additions & 0 deletions src/fast_array_utils/stats/_mean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# SPDX-License-Identifier: MPL-2.0
from __future__ import annotations

from typing import TYPE_CHECKING

import numpy as np

from ._sum import sum as sum_


if TYPE_CHECKING:
from typing import Any, Literal

from numpy._typing._array_like import _ArrayLikeFloat_co as ArrayLike
from numpy.typing import DTypeLike, NDArray

from .. import types

# all supported types except OutOfCoreDataset (TODO)
Array = (
NDArray[Any]
| types.CSBase
| types.H5Dataset
| types.ZarrArray
| types.CupyArray
| types.CupySparseMatrix
| types.DaskArray
)


def mean(
x: ArrayLike | Array,
/,
*,
axis: Literal[0, 1, None] = None,
dtype: DTypeLike | None = None,
) -> NDArray[Any] | types.DaskArray:
"""Mean over both or one axis.

Returns
-------
If ``axis`` is :data:`None`, then the sum over all elements is returned as a scalar.
Otherwise, the sum over the given axis is returned as a 1D array.

See Also
--------
:func:`numpy.mean`
"""
if not hasattr(x, "shape"):
raise NotImplementedError # TODO(flying-sheep): infer shape # noqa: TD003

Check warning on line 50 in src/fast_array_utils/stats/_mean.py

View check run for this annotation

Codecov / codecov/patch

src/fast_array_utils/stats/_mean.py#L50

Added line #L50 was not covered by tests
if TYPE_CHECKING:
assert isinstance(x, Array)
total = sum_(x, axis=axis, dtype=dtype)
n = np.prod(x.shape) if axis is None else x.shape[axis]
return total / n
23 changes: 16 additions & 7 deletions src/fast_array_utils/stats/_sum.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
if TYPE_CHECKING:
from typing import Literal

from numpy.typing import ArrayLike, DTypeLike
from numpy._typing._array_like import _ArrayLikeFloat_co as ArrayLike
from numpy.typing import DTypeLike

# all supported types except CSBase, Dask and OutOfCoreDataset (TODO)
Array = (
Expand All @@ -24,11 +25,19 @@

@overload
def sum(
x: Array | types.CSBase, /, *, axis: None = None, dtype: DTypeLike | None = None
x: ArrayLike | Array | types.CSBase,
/,
*,
axis: None = None,
dtype: DTypeLike | None = None,
) -> np.number[Any]: ...
@overload
def sum(
x: Array | types.CSBase, /, *, axis: Literal[0, 1], dtype: DTypeLike | None = None
x: ArrayLike | Array | types.CSBase,
/,
*,
axis: Literal[0, 1],
dtype: DTypeLike | None = None,
) -> NDArray[Any]: ...
@overload
def sum(
Expand All @@ -37,7 +46,7 @@ def sum(


def sum(
x: Array | types.CSBase | types.DaskArray,
x: ArrayLike | Array | types.CSBase | types.DaskArray,
/,
*,
axis: Literal[0, 1, None] = None,
Expand All @@ -61,7 +70,7 @@ def sum(

@singledispatch
def _sum(
x: Array | types.CSBase | types.DaskArray,
x: ArrayLike | Array | types.CSBase | types.DaskArray,
/,
*,
axis: Literal[0, 1, None] = None,
Expand All @@ -78,7 +87,7 @@ def _sum(


@_sum.register(types.CSBase) # type: ignore[call-overload,misc]
def _(
def _sum_cs(
x: types.CSBase, /, *, axis: Literal[0, 1, None] = None, dtype: DTypeLike | None = None
) -> NDArray[Any] | np.number[Any]:
import scipy.sparse as sp
Expand All @@ -91,7 +100,7 @@ def _(


@_sum.register(types.DaskArray)
def _(
def _sum_dask(
x: types.DaskArray, /, *, axis: Literal[0, 1, None] = None, dtype: DTypeLike | None = None
) -> types.DaskArray:
import dask.array as da
Expand Down
4 changes: 2 additions & 2 deletions src/testing/fast_array_utils/_array_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@


if TYPE_CHECKING:
from typing import Any, Protocol, TypeAlias
from typing import Any, Protocol

import h5py
from numpy.typing import ArrayLike, DTypeLike, NDArray

Array: TypeAlias = (
Array = (
NDArray[Any]
| types.CSBase
| types.CupyArray
Expand Down
21 changes: 16 additions & 5 deletions tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,18 @@

if TYPE_CHECKING:
from collections.abc import Callable
from typing import Any, Protocol, TypeAlias
from typing import Any, Protocol

from numpy.typing import NDArray
from pytest_codspeed import BenchmarkFixture

from fast_array_utils.stats._sum import Array
from fast_array_utils.stats._mean import Array
from testing.fast_array_utils import ArrayType

DTypeIn = type[np.float32 | np.float64 | np.int32 | np.bool]
DTypeOut = type[np.float32 | np.float64 | np.int64]

Benchmarkable: TypeAlias = NDArray[Any] | types.CSBase
Benchmarkable = NDArray[Any] | types.CSBase

class BenchFun(Protocol): # noqa: D101
def __call__( # noqa: D102
Expand Down Expand Up @@ -54,7 +54,7 @@ def dtype_arg(request: pytest.FixtureRequest) -> DTypeOut | None:


def test_sum(
array_type: ArrayType[Array | types.CSBase | types.DaskArray],
array_type: ArrayType[Array],
dtype_in: DTypeIn,
dtype_arg: DTypeOut | None,
axis: Literal[0, 1, None],
Expand Down Expand Up @@ -88,6 +88,17 @@ def test_sum(
np.testing.assert_array_equal(sum_, np.sum(np_arr, axis=axis, dtype=dtype_arg))


@pytest.mark.parametrize(("axis", "expected"), [(None, 3.5), (0, [2.5, 3.5, 4.5]), (1, [2.0, 5.0])])
def test_mean(
array_type: ArrayType[Array], axis: Literal[0, 1, None], expected: list[float]
) -> None:
arr = array_type(np.array([[1, 2, 3], [4, 5, 6]]))
result = stats.mean(arr, axis=axis)
if isinstance(result, types.DaskArray):
result = result.compute() # type: ignore[no-untyped-call]
np.testing.assert_array_equal(result, expected)


# TODO(flying-sheep): enable for GPU # noqa: TD003
@pytest.mark.array_type(skip=Flags.Disk | Flags.Gpu)
@pytest.mark.parametrize(
Expand Down Expand Up @@ -138,7 +149,7 @@ def test_dask_constant_blocks(

@pytest.mark.benchmark
@pytest.mark.array_type(skip=Flags.Matrix | Flags.Dask | Flags.Disk | Flags.Gpu)
@pytest.mark.parametrize("func", [stats.sum, stats.is_constant])
@pytest.mark.parametrize("func", [stats.sum, stats.mean, stats.is_constant])
@pytest.mark.parametrize("dtype", [np.float32, np.float64]) # random only supports float
def test_stats_benchmark(
benchmark: BenchmarkFixture,
Expand Down