diff --git a/.conda/openfisca-core/meta.yaml b/.conda/openfisca-core/meta.yaml index be31e84b9..1c90e6191 100644 --- a/.conda/openfisca-core/meta.yaml +++ b/.conda/openfisca-core/meta.yaml @@ -44,8 +44,10 @@ test: outputs: - name: openfisca-core + type: conda_v2 - name: openfisca-core-api + type: conda_v2 build: noarch: python requirements: @@ -61,6 +63,7 @@ outputs: - {{ pin_subpackage('openfisca-core', exact=True) }} - name: openfisca-core-dev + type: conda_v2 build: noarch: python requirements: diff --git a/.github/workflows/_before-conda.yaml b/.github/workflows/_before-conda.yaml index 7528a6a1c..06d0067ef 100644 --- a/.github/workflows/_before-conda.yaml +++ b/.github/workflows/_before-conda.yaml @@ -66,7 +66,7 @@ jobs: if: steps.cache-env.outputs.cache-hit != 'true' - name: Install dependencies - run: mamba install boa rattler-build anaconda-client + run: mamba install boa rattler-build if: steps.cache-env.outputs.cache-hit != 'true' - name: Update conda & dependencies diff --git a/.github/workflows/merge.yaml b/.github/workflows/merge.yaml index 57e0bb80a..31e863a96 100644 --- a/.github/workflows/merge.yaml +++ b/.github/workflows/merge.yaml @@ -217,12 +217,11 @@ jobs: use-mamba: true - name: Publish to conda - shell: bash -l {0} run: | - anaconda upload ~/conda-rel/noarch/openfisca-core-* \ - --token ${{ secrets.ANACONDA_TOKEN }} - --user openfisca - --force + rattler-build upload anaconda ~/conda-rel/noarch/*.conda \ + --force \ + --owner openfisca \ + --api-key ${{ secrets.ANACONDA_TOKEN }} test-on-windows: runs-on: windows-2019 @@ -245,9 +244,7 @@ jobs: uses: actions/checkout@v4 - name: Install with conda - shell: bash -l {0} run: conda install -c openfisca openfisca-core - name: Test openfisca - shell: bash -l {0} run: openfisca --help diff --git a/CHANGELOG.md b/CHANGELOG.md index b58b1a74d..7f900c330 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +### 42.0.7 [#1264](https://github.com/openfisca/openfisca-core/pull/1264) + +#### Technical changes + +- Add typing to `data_storage` module + ### 42.0.6 [#1263](https://github.com/openfisca/openfisca-core/pull/1263) #### Documentation diff --git a/openfisca_core/data_storage/__init__.py b/openfisca_core/data_storage/__init__.py index e2b4d8911..9f63047fb 100644 --- a/openfisca_core/data_storage/__init__.py +++ b/openfisca_core/data_storage/__init__.py @@ -21,5 +21,8 @@ # # See: https://www.python.org/dev/peps/pep-0008/#imports -from .in_memory_storage import InMemoryStorage # noqa: F401 -from .on_disk_storage import OnDiskStorage # noqa: F401 +from . import types +from .in_memory_storage import InMemoryStorage +from .on_disk_storage import OnDiskStorage + +__all__ = ["InMemoryStorage", "OnDiskStorage", "types"] diff --git a/openfisca_core/data_storage/in_memory_storage.py b/openfisca_core/data_storage/in_memory_storage.py index e6a5a866c..18387ff64 100644 --- a/openfisca_core/data_storage/in_memory_storage.py +++ b/openfisca_core/data_storage/in_memory_storage.py @@ -1,8 +1,14 @@ +from __future__ import annotations + +from collections.abc import KeysView, MutableMapping + import numpy from openfisca_core import periods from openfisca_core.periods import DateUnit +from . import types as t + class InMemoryStorage: """Storing and retrieving calculated vectors in memory. @@ -16,13 +22,13 @@ class InMemoryStorage: is_eternal: bool #: A dictionary containing data that has been stored in memory. - _arrays: dict + _arrays: MutableMapping[t.Period, t.Array[t.DTypeGeneric]] - def __init__(self, is_eternal=False) -> None: + def __init__(self, is_eternal: bool = False) -> None: self._arrays = {} self.is_eternal = is_eternal - def get(self, period): + def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]: """Retrieve the data for the specified period from memory. Args: @@ -57,7 +63,7 @@ def get(self, period): return None return values - def put(self, value, period) -> None: + def put(self, value: t.Array[t.DTypeGeneric], period: None | t.Period) -> None: """Store the specified data in memory for the specified period. Args: @@ -87,7 +93,7 @@ def put(self, value, period) -> None: self._arrays[period] = value - def delete(self, period=None) -> None: + def delete(self, period: None | t.Period = None) -> None: """Delete the data for the specified period from memory. Args: @@ -137,7 +143,7 @@ def delete(self, period=None) -> None: if not period.contains(period_item) } - def get_known_periods(self): + def get_known_periods(self) -> KeysView[t.Period]: """List of storage's known periods. Returns: @@ -161,7 +167,7 @@ def get_known_periods(self): return self._arrays.keys() - def get_memory_usage(self): + def get_memory_usage(self) -> t.MemoryUsage: """Memory usage of the storage. Returns: @@ -190,3 +196,6 @@ def get_memory_usage(self): "total_nb_bytes": array.nbytes * nb_arrays, "cell_size": array.itemsize, } + + +__all__ = ["InMemoryStorage"] diff --git a/openfisca_core/data_storage/on_disk_storage.py b/openfisca_core/data_storage/on_disk_storage.py index 4b4075613..d1b8e2c4e 100644 --- a/openfisca_core/data_storage/on_disk_storage.py +++ b/openfisca_core/data_storage/on_disk_storage.py @@ -1,3 +1,7 @@ +from __future__ import annotations + +from collections.abc import KeysView, MutableMapping + import os import shutil @@ -7,6 +11,8 @@ from openfisca_core.indexed_enums import EnumArray from openfisca_core.periods import DateUnit +from . import types as t + class OnDiskStorage: """Storing and retrieving calculated vectors on disk. @@ -28,13 +34,16 @@ class OnDiskStorage: preserve_storage_dir: bool #: Mapping of file paths to possible Enum values. - _enums: dict + _enums: MutableMapping[str, type[t.Enum]] #: Mapping of periods to file paths. - _files: dict + _files: MutableMapping[t.Period, str] def __init__( - self, storage_dir, is_eternal=False, preserve_storage_dir=False + self, + storage_dir: str, + is_eternal: bool = False, + preserve_storage_dir: bool = False, ) -> None: self._files = {} self._enums = {} @@ -42,7 +51,7 @@ def __init__( self.preserve_storage_dir = preserve_storage_dir self.storage_dir = storage_dir - def _decode_file(self, file): + def _decode_file(self, file: str) -> t.Array[t.DTypeGeneric]: """Decode a file by loading its contents as a ``numpy`` array. Args: @@ -82,11 +91,15 @@ def _decode_file(self, file): """ enum = self._enums.get(file) + if enum is not None: return EnumArray(numpy.load(file), enum) - return numpy.load(file) - def get(self, period): + array: t.Array[t.DTypeGeneric] = numpy.load(file) + + return array + + def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]: """Retrieve the data for the specified period from disk. Args: @@ -124,7 +137,7 @@ def get(self, period): return None return self._decode_file(values) - def put(self, value, period) -> None: + def put(self, value: t.Array[t.DTypeGeneric], period: None | t.Period) -> None: """Store the specified data on disk for the specified period. Args: @@ -156,13 +169,13 @@ def put(self, value, period) -> None: filename = str(period) path = os.path.join(self.storage_dir, filename) + ".npy" - if isinstance(value, EnumArray): + if isinstance(value, EnumArray) and value.possible_values is not None: self._enums[path] = value.possible_values value = value.view(numpy.ndarray) numpy.save(path, value) self._files[period] = path - def delete(self, period=None) -> None: + def delete(self, period: None | t.Period = None) -> None: """Delete the data for the specified period from disk. Args: @@ -208,14 +221,13 @@ def delete(self, period=None) -> None: period = periods.period(DateUnit.ETERNITY) period = periods.period(period) - if period is not None: - self._files = { - period_item: value - for period_item, value in self._files.items() - if not period.contains(period_item) - } + self._files = { + period_item: value + for period_item, value in self._files.items() + if not period.contains(period_item) + } - def get_known_periods(self): + def get_known_periods(self) -> KeysView[t.Period]: """List of storage's known periods. Returns: @@ -296,3 +308,6 @@ def __del__(self) -> None: parent_dir = os.path.abspath(os.path.join(self.storage_dir, os.pardir)) if not os.listdir(parent_dir): shutil.rmtree(parent_dir) + + +__all__ = ["OnDiskStorage"] diff --git a/openfisca_core/data_storage/types.py b/openfisca_core/data_storage/types.py new file mode 100644 index 000000000..db71abbf5 --- /dev/null +++ b/openfisca_core/data_storage/types.py @@ -0,0 +1,14 @@ +from typing_extensions import TypedDict + +from openfisca_core.types import Array, DTypeGeneric, Enum, Period + + +class MemoryUsage(TypedDict, total=True): + """Memory usage information.""" + + cell_size: float + nb_arrays: int + total_nb_bytes: int + + +__all__ = ["Array", "DTypeGeneric", "Enum", "Period"] diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index ec1afa45a..a6fd5d7f9 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -1,14 +1,13 @@ from __future__ import annotations -import enum - import numpy +from . import types as t from .config import ENUM_ARRAY_DTYPE from .enum_array import EnumArray -class Enum(enum.Enum): +class Enum(t.Enum): """Enum based on `enum34 `_, whose items have an index. """ diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index 1b6c512b8..a1479d5b8 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -1,17 +1,14 @@ from __future__ import annotations -import typing from typing import Any, NoReturn +from typing_extensions import Self import numpy from . import types as t -if typing.TYPE_CHECKING: - from openfisca_core.indexed_enums import Enum - -class EnumArray(numpy.ndarray): +class EnumArray(t.EnumArray): """NumPy array subclass representing an array of enum items. EnumArrays are encoded as ``int`` arrays to improve performance @@ -22,9 +19,9 @@ class EnumArray(numpy.ndarray): # https://docs.scipy.org/doc/numpy-1.13.0/user/basics.subclassing.html#slightly-more-realistic-example-attribute-added-to-existing-array. def __new__( cls, - input_array: t.Array[numpy.int16], - possible_values: type[Enum] | None = None, - ) -> EnumArray: + input_array: t.Array[t.DTypeEnum], + possible_values: None | type[t.Enum] = None, + ) -> Self: obj = numpy.asarray(input_array).view(cls) obj.possible_values = possible_values return obj diff --git a/openfisca_core/indexed_enums/types.py b/openfisca_core/indexed_enums/types.py index 43c38780f..d69eb098a 100644 --- a/openfisca_core/indexed_enums/types.py +++ b/openfisca_core/indexed_enums/types.py @@ -1,3 +1,3 @@ -from openfisca_core.types import Array +from openfisca_core.types import Array, DTypeEnum, Enum, EnumArray -__all__ = ["Array"] +__all__ = ["Array", "DTypeEnum", "Enum", "EnumArray"] diff --git a/openfisca_core/types.py b/openfisca_core/types.py index 711e6c512..b922cde09 100644 --- a/openfisca_core/types.py +++ b/openfisca_core/types.py @@ -3,24 +3,57 @@ from collections.abc import Iterable, Sequence, Sized from numpy.typing import NDArray from typing import Any, NewType, TypeVar, Union -from typing_extensions import Protocol, TypeAlias +from typing_extensions import Protocol, Self, TypeAlias + +import abc +import enum import numpy import pendulum -_N_co = TypeVar("_N_co", bound=numpy.generic, covariant=True) +#: Generic covariant type var. +_T_co = TypeVar("_T_co", covariant=True) + +# Commons + +#: Type var for numpy arrays. +_N_co = TypeVar("_N_co", covariant=True, bound="DTypeGeneric") #: Type representing an numpy array. Array: TypeAlias = NDArray[_N_co] +#: Type var for array-like objects. _L = TypeVar("_L") #: Type representing an array-like object. ArrayLike: TypeAlias = Sequence[_L] -#: Generic type vars. -_T_co = TypeVar("_T_co", covariant=True) +#: Type for bool arrays. +DTypeBool: TypeAlias = numpy.bool_ + +#: Type for int arrays. +DTypeInt: TypeAlias = numpy.int32 + +#: Type for float arrays. +DTypeFloat: TypeAlias = numpy.float32 + +#: Type for string arrays. +DTypeStr: TypeAlias = numpy.str_ + +#: Type for bytes arrays. +DTypeBytes: TypeAlias = numpy.bytes_ + +#: Type for Enum arrays. +DTypeEnum: TypeAlias = numpy.int16 +#: Type for date arrays. +DTypeDate: TypeAlias = numpy.datetime64 + +#: Type for "object" arrays. +DTypeObject: TypeAlias = numpy.object_ + +#: Type for "generic" arrays. +DTypeGeneric: TypeAlias = numpy.generic # Entities @@ -72,6 +105,22 @@ def key(self, /) -> RoleKey: ... def plural(self, /) -> None | RolePlural: ... +# Indexed enums + + +class Enum(enum.Enum, metaclass=enum.EnumMeta): + index: int + + +class EnumArray(Array[DTypeEnum], metaclass=abc.ABCMeta): + possible_values: None | type[Enum] + + @abc.abstractmethod + def __new__( + cls, input_array: Array[DTypeEnum], possible_values: None | type[Enum] = ... + ) -> Self: ... + + # Holders @@ -130,6 +179,7 @@ def start(self, /) -> Instant: ... def size(self, /) -> int: ... @property def stop(self, /) -> Instant: ... + def contains(self, other: Period, /) -> bool: ... def offset(self, offset: str | int, unit: None | DateUnit = None, /) -> Period: ... diff --git a/openfisca_tasks/lint.mk b/openfisca_tasks/lint.mk index 912d0567d..a3f5a8e45 100644 --- a/openfisca_tasks/lint.mk +++ b/openfisca_tasks/lint.mk @@ -29,7 +29,7 @@ lint-doc-%: @## @## They can be integrated into setup.cfg once all checks pass. @## The reason they're here is because otherwise we wouldn't be - @## able to integrate documentation improvements progresively. + @## able to integrate documentation improvements progressively. @## @$(call print_help,$(subst $*,%,$@:)) @python -m flake8 --select=D101,D102,D103,DAR openfisca_core/$* @@ -41,6 +41,7 @@ check-types: @$(call print_help,$@:) @python -m mypy \ openfisca_core/commons \ + openfisca_core/data_storage \ openfisca_core/entities \ openfisca_core/periods \ openfisca_core/types.py diff --git a/setup.py b/setup.py index 1e4a46479..202e5e449 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ setup( name="OpenFisca-Core", - version="42.0.6", + version="42.0.7", author="OpenFisca Team", author_email="contact@openfisca.org", classifiers=[