Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions changelog/36.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- [pd.Index][pandas.Index] accessor [pandas_openscm.accessors.index.PandasIndexOpenSCMAccessor]
- ensure multiindex related functionality: [pandas_openscm.accessors.series.PandasSeriesOpenSCMAccessor.ensure_index_is_multiindex] and [pandas_openscm.accessors.index.PandasIndexOpenSCMAccessor.ensure_is_multiindex]
15 changes: 10 additions & 5 deletions docs/pandas-accessors.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ you will need to run something like:
```python
from pandas_openscm.accessors import register_pandas_accessors

# The 'pd.DataFrame.openscm' and 'pd.Series.openscm' namespaces
# The `pd.DataFrame.openscm` and `pd.Series.openscm` namespaces
# will not be available at this point.

# Register the accessors
register_pandas_accessors()

# The 'pd.DataFrame.openscm' and 'pd.Series.openscm' namespaces
# The `pd.DataFrame.openscm`, `pd.Series.openscm` and `pd.Index.openscm` namespaces
# will now be available.
# I.e. you could now do something like
df = pd.DataFrame(
Expand All @@ -53,9 +53,14 @@ df.openscm.to_long_data()
register_pandas_accessors(namespace="my_custom_namespace")

# Doing it this way will make the custom namespace available under
# 'pd.DataFrame.my_custom_namespace' and 'pd.Series.my_custom_namespace'.
# `pd.DataFrame.my_custom_namespace`,
# `pd.Series.my_custom_namespace`
# and `pd.Index.my_custom_namespace`.
df.my_custom_namespace.to_long_data()
```

The full accessor APIs are documented at
[pandas_openscm.accessors.dataframe.PandasDataFrameOpenSCMAccessor][]
and [pandas_openscm.accessors.series.PandasSeriesOpenSCMAccessor][].
[pandas_openscm.accessors.dataframe.PandasDataFrameOpenSCMAccessor][],
[pandas_openscm.accessors.series.PandasSeriesOpenSCMAccessor][]
and
[pandas_openscm.accessors.index.PandasIndexOpenSCMAccessor][].
3 changes: 2 additions & 1 deletion src/pandas_openscm/accessors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import pandas as pd

from pandas_openscm.accessors.dataframe import PandasDataFrameOpenSCMAccessor
from pandas_openscm.accessors.index import PandasIndexOpenSCMAccessor
from pandas_openscm.accessors.series import PandasSeriesOpenSCMAccessor


Expand Down Expand Up @@ -73,4 +74,4 @@ def register_pandas_accessors(namespace: str = "openscm") -> None:
PandasDataFrameOpenSCMAccessor
)
pd.api.extensions.register_series_accessor(namespace)(PandasSeriesOpenSCMAccessor)
# pd.api.extensions.register_index_accessor(namespace)(PandasIndexOpenSCMAccessor)
pd.api.extensions.register_index_accessor(namespace)(PandasIndexOpenSCMAccessor)
74 changes: 74 additions & 0 deletions src/pandas_openscm/accessors/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""
Accessor for [pd.Index][pandas.Index] (and sub-classes)
"""

from __future__ import annotations

from typing import TYPE_CHECKING, Any, Generic, TypeVar

import pandas as pd

from pandas_openscm.index_manipulation import ensure_is_multiindex

if TYPE_CHECKING:
# Hmm this is somehow not correct.
# Figuring it out is a job for another day
Idx = TypeVar("Idx", bound=pd.Index[Any])


else:
Idx = TypeVar("Idx")


class PandasIndexOpenSCMAccessor(Generic[Idx]):
"""
[pd.Index][pandas.Index] accessor

For details, see
[pandas' docs](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors).
"""

def __init__(self, index: Idx):
"""
Initialise

Parameters
----------
index
[pd.Index][pandas.Index] to use via the accessor
"""
# It is possible to validate here.
# However, it's probably better to do validation closer to the data use.
self._index = index

def ensure_is_multiindex(self) -> pd.MultiIndex:
"""
Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex]

Returns
-------
:
`index` as a [pd.MultiIndex][pandas.MultiIndex]

If the index was already a [pd.MultiIndex][pandas.MultiIndex],
this is a no-op.
"""
res = ensure_is_multiindex(self._index)

return res

def eim(self) -> pd.MultiIndex:
"""
Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex]

Alias for [ensure_is_multiindex][pandas_openscm.index_manipulation.]

Returns
-------
:
`index` as a [pd.MultiIndex][pandas.MultiIndex]

If the index was already a [pd.MultiIndex][pandas.MultiIndex],
this is a no-op (although the value of copy is respected).
"""
return self.ensure_is_multiindex()
83 changes: 43 additions & 40 deletions src/pandas_openscm/accessors/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import pandas as pd

from pandas_openscm.index_manipulation import (
ensure_index_is_multiindex,
set_index_levels_func,
)
from pandas_openscm.unit_conversion import convert_unit, convert_unit_like
Expand Down Expand Up @@ -181,46 +182,48 @@ def convert_unit_like(
# If users want correct type hints, they should use the functional form.
return res # type: ignore

# def ensure_index_is_multiindex(self, copy: bool = True) -> pd.DataFrame:
# """
# Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex]
#
# Parameters
# ----------
# copy
# Whether to copy `df` before manipulating the index name
#
# Returns
# -------
# :
# `df` with a [pd.MultiIndex][pandas.MultiIndex]
#
# If the index was already a [pd.MultiIndex][pandas.MultiIndex],
# this is a no-op (although the value of copy is respected).
# """
# return ensure_index_is_multiindex(self._df, copy=copy)
#
# def eiim(self, copy: bool = True) -> pd.DataFrame:
# """
# Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex]
#
# Alias for [ensure_index_is_multiindex][pandas_openscm.index_manipulation.]
#
# Parameters
# ----------
# copy
# Whether to copy `df` before manipulating the index name
#
# Returns
# -------
# :
# `df` with a [pd.MultiIndex][pandas.MultiIndex]
#
# If the index was already a [pd.MultiIndex][pandas.MultiIndex],
# this is a no-op (although the value of copy is respected).
# """
# return self.ensure_index_is_multiindex(copy=copy)
#
def ensure_index_is_multiindex(self, copy: bool = True) -> S:
"""
Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex]

Parameters
----------
copy
Whether to copy `series` before manipulating the index name

Returns
-------
:
`series` with a [pd.MultiIndex][pandas.MultiIndex]

If the index was already a [pd.MultiIndex][pandas.MultiIndex],
this is a no-op (although the value of copy is respected).
"""
res = ensure_index_is_multiindex(self._series, copy=copy)

return res # type: ignore # something wront with generic type hinting

def eiim(self, copy: bool = True) -> S:
"""
Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex]

Alias for [ensure_index_is_multiindex][pandas_openscm.index_manipulation.]

Parameters
----------
copy
Whether to copy `series` before manipulating the index name

Returns
-------
:
`series` with a [pd.MultiIndex][pandas.MultiIndex]

If the index was already a [pd.MultiIndex][pandas.MultiIndex],
this is a no-op (although the value of copy is respected).
"""
return self.ensure_index_is_multiindex(copy=copy)

# def fix_index_name_after_groupby_quantile(
# self, new_name: str = "quantile", copy: bool = False
# ) -> pd.DataFrame:
Expand Down
4 changes: 4 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,7 @@ def setup_pandas_accessors() -> None:
pd.Series._accessors.discard("openscm")
if hasattr(pd.Series, "openscm"):
del pd.Series.openscm

pd.Index._accessors.discard("openscm")
if hasattr(pd.Index, "openscm"):
del pd.Index.openscm
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,14 @@
import pandas as pd
import pytest

from pandas_openscm.index_manipulation import ensure_index_is_multiindex
from pandas_openscm.testing import create_test_df
from pandas_openscm.index_manipulation import (
ensure_index_is_multiindex,
ensure_is_multiindex,
)
from pandas_openscm.testing import (
convert_to_desired_type,
create_test_df,
)


@pytest.mark.parametrize("copy, copy_exp", ((None, True), (True, True), (False, False)))
Expand Down Expand Up @@ -71,12 +77,17 @@ def test_ensure_index_is_multiindex_no_op(copy, copy_exp):


@pytest.mark.parametrize("copy, copy_exp", ((None, True), (True, True), (False, False)))
def test_accessor(setup_pandas_accessors, copy, copy_exp):
@pytest.mark.parametrize(
"pobj_type",
("DataFrame", "Series"),
)
def test_accessor(setup_pandas_accessors, copy, copy_exp, pobj_type):
start = pd.DataFrame(
[[1, 2], [3, 4]],
columns=[10, 20],
index=pd.Index(["a", "b"], name="variable"),
)
start = convert_to_desired_type(start, pobj_type)

call_kwargs = {}
if copy is not None:
Expand Down Expand Up @@ -106,3 +117,98 @@ def test_accessor(setup_pandas_accessors, copy, copy_exp):
else:
# Same object returned
assert id(start) == id(res_short)


def test_ensure_is_multiindex_index():
start = pd.Index([1, 2, 3], name="id")

res = ensure_is_multiindex(start)

assert isinstance(res, pd.MultiIndex)

exp = pd.MultiIndex.from_tuples(
[
(1,),
(2,),
(3,),
],
names=["id"],
)

pd.testing.assert_index_equal(res, exp)


def test_ensure_is_multiindex_index_no_name():
start = pd.Index([1, 2, 3])

res = ensure_is_multiindex(start)

assert isinstance(res, pd.MultiIndex)

exp = pd.MultiIndex.from_tuples(
[
(1,),
(2,),
(3,),
],
names=[None],
)

pd.testing.assert_index_equal(res, exp)


def test_ensure_is_multiindex():
start = pd.MultiIndex.from_tuples(
[
("a", "b"),
("c", "d"),
],
names=["mod", "scen"],
)

res = ensure_is_multiindex(start)

# Same object returned
assert id(start) == id(res)
assert isinstance(res, pd.MultiIndex)
pd.testing.assert_index_equal(res, start)


def test_ensure_is_multiindex_accessor_index(setup_pandas_accessors):
start = pd.Index([1, 2, 3], name="id")

res = start.openscm.ensure_is_multiindex()
res_short = start.openscm.eim()
pd.testing.assert_index_equal(res, res_short)

assert isinstance(res, pd.MultiIndex)

exp = pd.MultiIndex.from_tuples(
[
(1,),
(2,),
(3,),
],
names=["id"],
)

pd.testing.assert_index_equal(res, exp)


def test_ensure_is_multiindex_accessor_multiindex(setup_pandas_accessors):
start = pd.MultiIndex.from_tuples(
[
("a", "b"),
("c", "d"),
],
names=["mod", "scen"],
)

res = start.openscm.ensure_is_multiindex()
res_short = start.openscm.eim()
pd.testing.assert_index_equal(res, res_short)

# Same object returned
assert id(start) == id(res)
assert isinstance(res, pd.MultiIndex)
pd.testing.assert_index_equal(res, start)
Loading