Skip to content

Commit 9f27b32

Browse files
authored
Merge pull request #36 from openscm/series-eeim
Add ensure is multiindex accessors
2 parents 770fa13 + 7b86075 commit 9f27b32

File tree

7 files changed

+244
-49
lines changed

7 files changed

+244
-49
lines changed

changelog/36.feature.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
- [pd.Index][pandas.Index] accessor [pandas_openscm.accessors.index.PandasIndexOpenSCMAccessor]
2+
- ensure multiindex related functionality: [pandas_openscm.accessors.series.PandasSeriesOpenSCMAccessor.ensure_index_is_multiindex] and [pandas_openscm.accessors.index.PandasIndexOpenSCMAccessor.ensure_is_multiindex]

docs/pandas-accessors.md

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,13 @@ you will need to run something like:
2121
```python
2222
from pandas_openscm.accessors import register_pandas_accessors
2323

24-
# The 'pd.DataFrame.openscm' and 'pd.Series.openscm' namespaces
24+
# The `pd.DataFrame.openscm` and `pd.Series.openscm` namespaces
2525
# will not be available at this point.
2626

2727
# Register the accessors
2828
register_pandas_accessors()
2929

30-
# The 'pd.DataFrame.openscm' and 'pd.Series.openscm' namespaces
30+
# The `pd.DataFrame.openscm`, `pd.Series.openscm` and `pd.Index.openscm` namespaces
3131
# will now be available.
3232
# I.e. you could now do something like
3333
df = pd.DataFrame(
@@ -53,9 +53,14 @@ df.openscm.to_long_data()
5353
register_pandas_accessors(namespace="my_custom_namespace")
5454

5555
# Doing it this way will make the custom namespace available under
56-
# 'pd.DataFrame.my_custom_namespace' and 'pd.Series.my_custom_namespace'.
56+
# `pd.DataFrame.my_custom_namespace`,
57+
# `pd.Series.my_custom_namespace`
58+
# and `pd.Index.my_custom_namespace`.
59+
df.my_custom_namespace.to_long_data()
5760
```
5861

5962
The full accessor APIs are documented at
60-
[pandas_openscm.accessors.dataframe.PandasDataFrameOpenSCMAccessor][]
61-
and [pandas_openscm.accessors.series.PandasSeriesOpenSCMAccessor][].
63+
[pandas_openscm.accessors.dataframe.PandasDataFrameOpenSCMAccessor][],
64+
[pandas_openscm.accessors.series.PandasSeriesOpenSCMAccessor][]
65+
and
66+
[pandas_openscm.accessors.index.PandasIndexOpenSCMAccessor][].

src/pandas_openscm/accessors/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import pandas as pd
3939

4040
from pandas_openscm.accessors.dataframe import PandasDataFrameOpenSCMAccessor
41+
from pandas_openscm.accessors.index import PandasIndexOpenSCMAccessor
4142
from pandas_openscm.accessors.series import PandasSeriesOpenSCMAccessor
4243

4344

@@ -73,4 +74,4 @@ def register_pandas_accessors(namespace: str = "openscm") -> None:
7374
PandasDataFrameOpenSCMAccessor
7475
)
7576
pd.api.extensions.register_series_accessor(namespace)(PandasSeriesOpenSCMAccessor)
76-
# pd.api.extensions.register_index_accessor(namespace)(PandasIndexOpenSCMAccessor)
77+
pd.api.extensions.register_index_accessor(namespace)(PandasIndexOpenSCMAccessor)
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
"""
2+
Accessor for [pd.Index][pandas.Index] (and sub-classes)
3+
"""
4+
5+
from __future__ import annotations
6+
7+
from typing import TYPE_CHECKING, Any, Generic, TypeVar
8+
9+
import pandas as pd
10+
11+
from pandas_openscm.index_manipulation import ensure_is_multiindex
12+
13+
if TYPE_CHECKING:
14+
# Hmm this is somehow not correct.
15+
# Figuring it out is a job for another day
16+
Idx = TypeVar("Idx", bound=pd.Index[Any])
17+
18+
19+
else:
20+
Idx = TypeVar("Idx")
21+
22+
23+
class PandasIndexOpenSCMAccessor(Generic[Idx]):
24+
"""
25+
[pd.Index][pandas.Index] accessor
26+
27+
For details, see
28+
[pandas' docs](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors).
29+
"""
30+
31+
def __init__(self, index: Idx):
32+
"""
33+
Initialise
34+
35+
Parameters
36+
----------
37+
index
38+
[pd.Index][pandas.Index] to use via the accessor
39+
"""
40+
# It is possible to validate here.
41+
# However, it's probably better to do validation closer to the data use.
42+
self._index = index
43+
44+
def ensure_is_multiindex(self) -> pd.MultiIndex:
45+
"""
46+
Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex]
47+
48+
Returns
49+
-------
50+
:
51+
`index` as a [pd.MultiIndex][pandas.MultiIndex]
52+
53+
If the index was already a [pd.MultiIndex][pandas.MultiIndex],
54+
this is a no-op.
55+
"""
56+
res = ensure_is_multiindex(self._index)
57+
58+
return res
59+
60+
def eim(self) -> pd.MultiIndex:
61+
"""
62+
Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex]
63+
64+
Alias for [ensure_is_multiindex][pandas_openscm.index_manipulation.]
65+
66+
Returns
67+
-------
68+
:
69+
`index` as a [pd.MultiIndex][pandas.MultiIndex]
70+
71+
If the index was already a [pd.MultiIndex][pandas.MultiIndex],
72+
this is a no-op (although the value of copy is respected).
73+
"""
74+
return self.ensure_is_multiindex()

src/pandas_openscm/accessors/series.py

Lines changed: 43 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import pandas as pd
1111

1212
from pandas_openscm.index_manipulation import (
13+
ensure_index_is_multiindex,
1314
set_index_levels_func,
1415
)
1516
from pandas_openscm.unit_conversion import convert_unit, convert_unit_like
@@ -181,46 +182,48 @@ def convert_unit_like(
181182
# If users want correct type hints, they should use the functional form.
182183
return res # type: ignore
183184

184-
# def ensure_index_is_multiindex(self, copy: bool = True) -> pd.DataFrame:
185-
# """
186-
# Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex]
187-
#
188-
# Parameters
189-
# ----------
190-
# copy
191-
# Whether to copy `df` before manipulating the index name
192-
#
193-
# Returns
194-
# -------
195-
# :
196-
# `df` with a [pd.MultiIndex][pandas.MultiIndex]
197-
#
198-
# If the index was already a [pd.MultiIndex][pandas.MultiIndex],
199-
# this is a no-op (although the value of copy is respected).
200-
# """
201-
# return ensure_index_is_multiindex(self._df, copy=copy)
202-
#
203-
# def eiim(self, copy: bool = True) -> pd.DataFrame:
204-
# """
205-
# Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex]
206-
#
207-
# Alias for [ensure_index_is_multiindex][pandas_openscm.index_manipulation.]
208-
#
209-
# Parameters
210-
# ----------
211-
# copy
212-
# Whether to copy `df` before manipulating the index name
213-
#
214-
# Returns
215-
# -------
216-
# :
217-
# `df` with a [pd.MultiIndex][pandas.MultiIndex]
218-
#
219-
# If the index was already a [pd.MultiIndex][pandas.MultiIndex],
220-
# this is a no-op (although the value of copy is respected).
221-
# """
222-
# return self.ensure_index_is_multiindex(copy=copy)
223-
#
185+
def ensure_index_is_multiindex(self, copy: bool = True) -> S:
186+
"""
187+
Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex]
188+
189+
Parameters
190+
----------
191+
copy
192+
Whether to copy `series` before manipulating the index name
193+
194+
Returns
195+
-------
196+
:
197+
`series` with a [pd.MultiIndex][pandas.MultiIndex]
198+
199+
If the index was already a [pd.MultiIndex][pandas.MultiIndex],
200+
this is a no-op (although the value of copy is respected).
201+
"""
202+
res = ensure_index_is_multiindex(self._series, copy=copy)
203+
204+
return res # type: ignore # something wront with generic type hinting
205+
206+
def eiim(self, copy: bool = True) -> S:
207+
"""
208+
Ensure that the index is a [pd.MultiIndex][pandas.MultiIndex]
209+
210+
Alias for [ensure_index_is_multiindex][pandas_openscm.index_manipulation.]
211+
212+
Parameters
213+
----------
214+
copy
215+
Whether to copy `series` before manipulating the index name
216+
217+
Returns
218+
-------
219+
:
220+
`series` with a [pd.MultiIndex][pandas.MultiIndex]
221+
222+
If the index was already a [pd.MultiIndex][pandas.MultiIndex],
223+
this is a no-op (although the value of copy is respected).
224+
"""
225+
return self.ensure_index_is_multiindex(copy=copy)
226+
224227
# def fix_index_name_after_groupby_quantile(
225228
# self, new_name: str = "quantile", copy: bool = False
226229
# ) -> pd.DataFrame:

tests/conftest.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,7 @@ def setup_pandas_accessors() -> None:
4545
pd.Series._accessors.discard("openscm")
4646
if hasattr(pd.Series, "openscm"):
4747
del pd.Series.openscm
48+
49+
pd.Index._accessors.discard("openscm")
50+
if hasattr(pd.Index, "openscm"):
51+
del pd.Index.openscm

tests/integration/index_manipulation/test_integration_index_manipulation_ensure_index_is_multiindex.py

Lines changed: 109 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,14 @@
88
import pandas as pd
99
import pytest
1010

11-
from pandas_openscm.index_manipulation import ensure_index_is_multiindex
12-
from pandas_openscm.testing import create_test_df
11+
from pandas_openscm.index_manipulation import (
12+
ensure_index_is_multiindex,
13+
ensure_is_multiindex,
14+
)
15+
from pandas_openscm.testing import (
16+
convert_to_desired_type,
17+
create_test_df,
18+
)
1319

1420

1521
@pytest.mark.parametrize("copy, copy_exp", ((None, True), (True, True), (False, False)))
@@ -71,12 +77,17 @@ def test_ensure_index_is_multiindex_no_op(copy, copy_exp):
7177

7278

7379
@pytest.mark.parametrize("copy, copy_exp", ((None, True), (True, True), (False, False)))
74-
def test_accessor(setup_pandas_accessors, copy, copy_exp):
80+
@pytest.mark.parametrize(
81+
"pobj_type",
82+
("DataFrame", "Series"),
83+
)
84+
def test_accessor(setup_pandas_accessors, copy, copy_exp, pobj_type):
7585
start = pd.DataFrame(
7686
[[1, 2], [3, 4]],
7787
columns=[10, 20],
7888
index=pd.Index(["a", "b"], name="variable"),
7989
)
90+
start = convert_to_desired_type(start, pobj_type)
8091

8192
call_kwargs = {}
8293
if copy is not None:
@@ -106,3 +117,98 @@ def test_accessor(setup_pandas_accessors, copy, copy_exp):
106117
else:
107118
# Same object returned
108119
assert id(start) == id(res_short)
120+
121+
122+
def test_ensure_is_multiindex_index():
123+
start = pd.Index([1, 2, 3], name="id")
124+
125+
res = ensure_is_multiindex(start)
126+
127+
assert isinstance(res, pd.MultiIndex)
128+
129+
exp = pd.MultiIndex.from_tuples(
130+
[
131+
(1,),
132+
(2,),
133+
(3,),
134+
],
135+
names=["id"],
136+
)
137+
138+
pd.testing.assert_index_equal(res, exp)
139+
140+
141+
def test_ensure_is_multiindex_index_no_name():
142+
start = pd.Index([1, 2, 3])
143+
144+
res = ensure_is_multiindex(start)
145+
146+
assert isinstance(res, pd.MultiIndex)
147+
148+
exp = pd.MultiIndex.from_tuples(
149+
[
150+
(1,),
151+
(2,),
152+
(3,),
153+
],
154+
names=[None],
155+
)
156+
157+
pd.testing.assert_index_equal(res, exp)
158+
159+
160+
def test_ensure_is_multiindex():
161+
start = pd.MultiIndex.from_tuples(
162+
[
163+
("a", "b"),
164+
("c", "d"),
165+
],
166+
names=["mod", "scen"],
167+
)
168+
169+
res = ensure_is_multiindex(start)
170+
171+
# Same object returned
172+
assert id(start) == id(res)
173+
assert isinstance(res, pd.MultiIndex)
174+
pd.testing.assert_index_equal(res, start)
175+
176+
177+
def test_ensure_is_multiindex_accessor_index(setup_pandas_accessors):
178+
start = pd.Index([1, 2, 3], name="id")
179+
180+
res = start.openscm.ensure_is_multiindex()
181+
res_short = start.openscm.eim()
182+
pd.testing.assert_index_equal(res, res_short)
183+
184+
assert isinstance(res, pd.MultiIndex)
185+
186+
exp = pd.MultiIndex.from_tuples(
187+
[
188+
(1,),
189+
(2,),
190+
(3,),
191+
],
192+
names=["id"],
193+
)
194+
195+
pd.testing.assert_index_equal(res, exp)
196+
197+
198+
def test_ensure_is_multiindex_accessor_multiindex(setup_pandas_accessors):
199+
start = pd.MultiIndex.from_tuples(
200+
[
201+
("a", "b"),
202+
("c", "d"),
203+
],
204+
names=["mod", "scen"],
205+
)
206+
207+
res = start.openscm.ensure_is_multiindex()
208+
res_short = start.openscm.eim()
209+
pd.testing.assert_index_equal(res, res_short)
210+
211+
# Same object returned
212+
assert id(start) == id(res)
213+
assert isinstance(res, pd.MultiIndex)
214+
pd.testing.assert_index_equal(res, start)

0 commit comments

Comments
 (0)