Skip to content

Commit

Permalink
docs(storage): fix data_storage types (#1264)
Browse files Browse the repository at this point in the history
  • Loading branch information
bonjourmauko authored Oct 4, 2024
2 parents bccbc41 + 2005df1 commit df9e9d7
Show file tree
Hide file tree
Showing 14 changed files with 146 additions and 52 deletions.
3 changes: 3 additions & 0 deletions .conda/openfisca-core/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,10 @@ test:

outputs:
- name: openfisca-core
type: conda_v2

- name: openfisca-core-api
type: conda_v2
build:
noarch: python
requirements:
Expand All @@ -61,6 +63,7 @@ outputs:
- {{ pin_subpackage('openfisca-core', exact=True) }}

- name: openfisca-core-dev
type: conda_v2
build:
noarch: python
requirements:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/_before-conda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:
if: steps.cache-env.outputs.cache-hit != 'true'

- name: Install dependencies
run: mamba install boa rattler-build anaconda-client
run: mamba install boa rattler-build
if: steps.cache-env.outputs.cache-hit != 'true'

- name: Update conda & dependencies
Expand Down
11 changes: 4 additions & 7 deletions .github/workflows/merge.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -217,12 +217,11 @@ jobs:
use-mamba: true

- name: Publish to conda
shell: bash -l {0}
run: |
anaconda upload ~/conda-rel/noarch/openfisca-core-* \
--token ${{ secrets.ANACONDA_TOKEN }}
--user openfisca
--force
rattler-build upload anaconda ~/conda-rel/noarch/*.conda \
--force \
--owner openfisca \
--api-key ${{ secrets.ANACONDA_TOKEN }}
test-on-windows:
runs-on: windows-2019
Expand All @@ -245,9 +244,7 @@ jobs:
uses: actions/checkout@v4

- name: Install with conda
shell: bash -l {0}
run: conda install -c openfisca openfisca-core

- name: Test openfisca
shell: bash -l {0}
run: openfisca --help
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

### 42.0.7 [#1264](https://github.com/openfisca/openfisca-core/pull/1264)

#### Technical changes

- Add typing to `data_storage` module

### 42.0.6 [#1263](https://github.com/openfisca/openfisca-core/pull/1263)

#### Documentation
Expand Down
7 changes: 5 additions & 2 deletions openfisca_core/data_storage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,8 @@
#
# See: https://www.python.org/dev/peps/pep-0008/#imports

from .in_memory_storage import InMemoryStorage # noqa: F401
from .on_disk_storage import OnDiskStorage # noqa: F401
from . import types
from .in_memory_storage import InMemoryStorage
from .on_disk_storage import OnDiskStorage

__all__ = ["InMemoryStorage", "OnDiskStorage", "types"]
23 changes: 16 additions & 7 deletions openfisca_core/data_storage/in_memory_storage.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
from __future__ import annotations

from collections.abc import KeysView, MutableMapping

import numpy

from openfisca_core import periods
from openfisca_core.periods import DateUnit

from . import types as t


class InMemoryStorage:
"""Storing and retrieving calculated vectors in memory.
Expand All @@ -16,13 +22,13 @@ class InMemoryStorage:
is_eternal: bool

#: A dictionary containing data that has been stored in memory.
_arrays: dict
_arrays: MutableMapping[t.Period, t.Array[t.DTypeGeneric]]

def __init__(self, is_eternal=False) -> None:
def __init__(self, is_eternal: bool = False) -> None:
self._arrays = {}
self.is_eternal = is_eternal

def get(self, period):
def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]:
"""Retrieve the data for the specified period from memory.
Args:
Expand Down Expand Up @@ -57,7 +63,7 @@ def get(self, period):
return None
return values

def put(self, value, period) -> None:
def put(self, value: t.Array[t.DTypeGeneric], period: None | t.Period) -> None:
"""Store the specified data in memory for the specified period.
Args:
Expand Down Expand Up @@ -87,7 +93,7 @@ def put(self, value, period) -> None:

self._arrays[period] = value

def delete(self, period=None) -> None:
def delete(self, period: None | t.Period = None) -> None:
"""Delete the data for the specified period from memory.
Args:
Expand Down Expand Up @@ -137,7 +143,7 @@ def delete(self, period=None) -> None:
if not period.contains(period_item)
}

def get_known_periods(self):
def get_known_periods(self) -> KeysView[t.Period]:
"""List of storage's known periods.
Returns:
Expand All @@ -161,7 +167,7 @@ def get_known_periods(self):

return self._arrays.keys()

def get_memory_usage(self):
def get_memory_usage(self) -> t.MemoryUsage:
"""Memory usage of the storage.
Returns:
Expand Down Expand Up @@ -190,3 +196,6 @@ def get_memory_usage(self):
"total_nb_bytes": array.nbytes * nb_arrays,
"cell_size": array.itemsize,
}


__all__ = ["InMemoryStorage"]
47 changes: 31 additions & 16 deletions openfisca_core/data_storage/on_disk_storage.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from __future__ import annotations

from collections.abc import KeysView, MutableMapping

import os
import shutil

Expand All @@ -7,6 +11,8 @@
from openfisca_core.indexed_enums import EnumArray
from openfisca_core.periods import DateUnit

from . import types as t


class OnDiskStorage:
"""Storing and retrieving calculated vectors on disk.
Expand All @@ -28,21 +34,24 @@ class OnDiskStorage:
preserve_storage_dir: bool

#: Mapping of file paths to possible Enum values.
_enums: dict
_enums: MutableMapping[str, type[t.Enum]]

#: Mapping of periods to file paths.
_files: dict
_files: MutableMapping[t.Period, str]

def __init__(
self, storage_dir, is_eternal=False, preserve_storage_dir=False
self,
storage_dir: str,
is_eternal: bool = False,
preserve_storage_dir: bool = False,
) -> None:
self._files = {}
self._enums = {}
self.is_eternal = is_eternal
self.preserve_storage_dir = preserve_storage_dir
self.storage_dir = storage_dir

def _decode_file(self, file):
def _decode_file(self, file: str) -> t.Array[t.DTypeGeneric]:
"""Decode a file by loading its contents as a ``numpy`` array.
Args:
Expand Down Expand Up @@ -82,11 +91,15 @@ def _decode_file(self, file):
"""

enum = self._enums.get(file)

if enum is not None:
return EnumArray(numpy.load(file), enum)
return numpy.load(file)

def get(self, period):
array: t.Array[t.DTypeGeneric] = numpy.load(file)

return array

def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]:
"""Retrieve the data for the specified period from disk.
Args:
Expand Down Expand Up @@ -124,7 +137,7 @@ def get(self, period):
return None
return self._decode_file(values)

def put(self, value, period) -> None:
def put(self, value: t.Array[t.DTypeGeneric], period: None | t.Period) -> None:
"""Store the specified data on disk for the specified period.
Args:
Expand Down Expand Up @@ -156,13 +169,13 @@ def put(self, value, period) -> None:

filename = str(period)
path = os.path.join(self.storage_dir, filename) + ".npy"
if isinstance(value, EnumArray):
if isinstance(value, EnumArray) and value.possible_values is not None:
self._enums[path] = value.possible_values
value = value.view(numpy.ndarray)
numpy.save(path, value)
self._files[period] = path

def delete(self, period=None) -> None:
def delete(self, period: None | t.Period = None) -> None:
"""Delete the data for the specified period from disk.
Args:
Expand Down Expand Up @@ -208,14 +221,13 @@ def delete(self, period=None) -> None:
period = periods.period(DateUnit.ETERNITY)
period = periods.period(period)

if period is not None:
self._files = {
period_item: value
for period_item, value in self._files.items()
if not period.contains(period_item)
}
self._files = {
period_item: value
for period_item, value in self._files.items()
if not period.contains(period_item)
}

def get_known_periods(self):
def get_known_periods(self) -> KeysView[t.Period]:
"""List of storage's known periods.
Returns:
Expand Down Expand Up @@ -296,3 +308,6 @@ def __del__(self) -> None:
parent_dir = os.path.abspath(os.path.join(self.storage_dir, os.pardir))
if not os.listdir(parent_dir):
shutil.rmtree(parent_dir)


__all__ = ["OnDiskStorage"]
14 changes: 14 additions & 0 deletions openfisca_core/data_storage/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from typing_extensions import TypedDict

from openfisca_core.types import Array, DTypeGeneric, Enum, Period


class MemoryUsage(TypedDict, total=True):
"""Memory usage information."""

cell_size: float
nb_arrays: int
total_nb_bytes: int


__all__ = ["Array", "DTypeGeneric", "Enum", "Period"]
5 changes: 2 additions & 3 deletions openfisca_core/indexed_enums/enum.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
from __future__ import annotations

import enum

import numpy

from . import types as t
from .config import ENUM_ARRAY_DTYPE
from .enum_array import EnumArray


class Enum(enum.Enum):
class Enum(t.Enum):
"""Enum based on `enum34 <https://pypi.python.org/pypi/enum34/>`_, whose items
have an index.
"""
Expand Down
13 changes: 5 additions & 8 deletions openfisca_core/indexed_enums/enum_array.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
from __future__ import annotations

import typing
from typing import Any, NoReturn
from typing_extensions import Self

import numpy

from . import types as t

if typing.TYPE_CHECKING:
from openfisca_core.indexed_enums import Enum


class EnumArray(numpy.ndarray):
class EnumArray(t.EnumArray):
"""NumPy array subclass representing an array of enum items.
EnumArrays are encoded as ``int`` arrays to improve performance
Expand All @@ -22,9 +19,9 @@ class EnumArray(numpy.ndarray):
# https://docs.scipy.org/doc/numpy-1.13.0/user/basics.subclassing.html#slightly-more-realistic-example-attribute-added-to-existing-array.
def __new__(
cls,
input_array: t.Array[numpy.int16],
possible_values: type[Enum] | None = None,
) -> EnumArray:
input_array: t.Array[t.DTypeEnum],
possible_values: None | type[t.Enum] = None,
) -> Self:
obj = numpy.asarray(input_array).view(cls)
obj.possible_values = possible_values
return obj
Expand Down
4 changes: 2 additions & 2 deletions openfisca_core/indexed_enums/types.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from openfisca_core.types import Array
from openfisca_core.types import Array, DTypeEnum, Enum, EnumArray

__all__ = ["Array"]
__all__ = ["Array", "DTypeEnum", "Enum", "EnumArray"]
Loading

0 comments on commit df9e9d7

Please sign in to comment.