Skip to content

Commit

Permalink
[python] test_registration_mappings.py::test_pandas_indexing reform…
Browse files Browse the repository at this point in the history
…at (#2853)

* `test_registration_mappings::test_pandas_indexing` reformat

* reorder, comment test cases
  • Loading branch information
ryan-williams authored Aug 9, 2024
1 parent 6b74aa5 commit b9ec3c5
Showing 1 changed file with 54 additions and 151 deletions.
205 changes: 54 additions & 151 deletions apis/python/tests/test_registration_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import math
import tempfile
from contextlib import nullcontext
from typing import Optional, Sequence
from typing import List, Optional, Sequence, Tuple, Union

import anndata as ad
import numpy as np
Expand All @@ -15,6 +15,9 @@
import tiledbsoma.io
import tiledbsoma.io._registration as registration
from tiledbsoma._util import verify_obs_and_var_eq
from tiledbsoma.io._registration import (
signatures,
)


def _create_anndata(
Expand Down Expand Up @@ -200,160 +203,60 @@ def soma_larger(anndata_larger):
return uri


# fmt: off
PANDAS_INDEXING_TEST_DF = pd.DataFrame(
data={
"soma_joinid": np.arange(3, dtype=np.int64),
"alt_id": ["A", "C", "G"],
"obs_id": ["AT", "CT", "GT"],
}
)
@pytest.mark.parametrize(
"args",
[
# SOMA ID column is to be obs_id, and it is the Pandas index named "obs_id"
{
"do_set_index": True,
"index_name_to_set": "obs_id",
"do_rename_axis": False,
"axis_name_to_set": None,
"registration_index_column_name": "obs_id",
"expected_signature": {"obs_id": "string", "alt_id": "string"},
},
# SOMA ID column is to be obs_id, and it is the Pandas index named "index"
{
"do_set_index": True,
"index_name_to_set": "obs_id",
"do_rename_axis": True,
"axis_name_to_set": "index",
"registration_index_column_name": "obs_id",
"expected_signature": {"obs_id": "string", "alt_id": "string"},
},
# SOMA ID column is to be obs_id, and it is the Pandas unnamed index
{
"do_set_index": True,
"index_name_to_set": "obs_id",
"do_rename_axis": True,
"axis_name_to_set": None,
"registration_index_column_name": "obs_id",
"expected_signature": {"obs_id": "string", "alt_id": "string"},
},
# SOMA ID column is to be obs_id, and the Pandas index is named something else
{
"do_set_index": True,
"index_name_to_set": "alt_id",
"do_rename_axis": False,
"axis_name_to_set": None,
"registration_index_column_name": "obs_id",
"expected_signature": {"alt_id": "string", "obs_id": "string"},
},
# SOMA ID column is to be obs_id, and the Pandas index is unnamed
{
"do_set_index": True,
"index_name_to_set": "alt_id",
"do_rename_axis": True,
"axis_name_to_set": None,
"registration_index_column_name": "obs_id",
"expected_signature": {"obs_id": "string"},
},
# SOMA ID column is to be obs_id, and the Pandas index is named "index"
{
"do_set_index": True,
"index_name_to_set": "alt_id",
"do_rename_axis": True,
"axis_name_to_set": "index",
"registration_index_column_name": "obs_id",
"expected_signature": {"obs_id": "string"},
},
# SOMA ID column is to be obs_id, and the Pandas index is implicitized integers
{
"do_set_index": False,
"index_name_to_set": None,
"do_rename_axis": False,
"axis_name_to_set": None,
"registration_index_column_name": "obs_id",
"expected_signature": {"alt_id": "string", "obs_id": "string"},
},
# SOMA ID column is to be alt_id, and it is the Pandas index named "alt_id"
{
"do_set_index": True,
"index_name_to_set": "alt_id",
"do_rename_axis": False,
"axis_name_to_set": None,
"registration_index_column_name": "alt_id",
"expected_signature": {"alt_id": "string", "obs_id": "string"},
},
# SOMA ID column is to be alt_id, and it is the Pandas index named "index"
{
"do_set_index": True,
"index_name_to_set": "alt_id",
"do_rename_axis": True,
"axis_name_to_set": "index",
"registration_index_column_name": "alt_id",
"expected_signature": {"alt_id": "string", "obs_id": "string"},
},
# SOMA ID column is to be alt_id, and it is the Pandas unnamed index
{
"do_set_index": True,
"index_name_to_set": "alt_id",
"do_rename_axis": True,
"axis_name_to_set": None,
"registration_index_column_name": "alt_id",
"expected_signature": {"alt_id": "string", "obs_id": "string"},
},
# SOMA ID column is to be alt_id, and the Pandas index is named something else
{
"do_set_index": True,
"index_name_to_set": "obs_id",
"do_rename_axis": False,
"axis_name_to_set": None,
"registration_index_column_name": "alt_id",
"expected_signature": {"obs_id": "string", "alt_id": "string"},
},
# SOMA ID column is to be alt_id, and the Pandas index is unnamed
{
"do_set_index": True,
"index_name_to_set": "obs_id",
"do_rename_axis": True,
"axis_name_to_set": None,
"registration_index_column_name": "alt_id",
"expected_signature": {"alt_id": "string"},
},
# SOMA ID column is to be alt_id, and the Pandas index is named "index"
{
"do_set_index": True,
"index_name_to_set": "obs_id",
"do_rename_axis": True,
"axis_name_to_set": "index",
"registration_index_column_name": "alt_id",
"expected_signature": {"alt_id": "string"},
},
# SOMA ID column is to be alt_id, and the Pandas index is implicitized integers
{
"do_set_index": False,
"index_name_to_set": None,
"do_rename_axis": False,
"axis_name_to_set": None,
"registration_index_column_name": "alt_id",
"expected_signature": {"alt_id": "string", "obs_id": "string"},
},
],
[ "index_col_and_name" , "default_index_name" , "signature_col_names" ],
[ # | Set this | If present, | signatures.py | Expected: |
# | col as idx | rename index | `default_index_name` | signature col names |
# `default_index_name` matches column that was made index ⇒ both columns present in signature
[ ( "obs_id" , ), "obs_id" , ( "obs_id" , "alt_id" ) ],
[ ( "obs_id" , "index" ), "obs_id" , ( "obs_id" , "alt_id" ) ],
[ ( "obs_id" , None ), "obs_id" , ( "obs_id" , "alt_id" ) ],
[ ( "alt_id" , ), "alt_id" , ( "alt_id" , "obs_id" ) ],
[ ( "alt_id" , "index" ), "alt_id" , ( "alt_id" , "obs_id" ) ],
[ ( "alt_id" , None ), "alt_id" , ( "alt_id" , "obs_id" ) ],
# `default_index_name` is the column that was not made index ⇒ index dropped when named "index" or None
[ ( "alt_id" , ), "obs_id" , ( "obs_id" , "alt_id" ) ],
[ ( "alt_id" , "index" ), "obs_id" , ( "obs_id" , ) ],
[ ( "alt_id" , None ), "obs_id" , ( "obs_id" , ) ],
[ ( "obs_id" , ), "alt_id" , ( "alt_id" , "obs_id" ) ],
[ ( "obs_id" , "index" ), "alt_id" , ( "alt_id" , ) ],
[ ( "obs_id" , None ), "alt_id" , ( "alt_id" , ) ],
# default RangeIndex ⇒ columns are preserved
[ ( None , ), "obs_id" , ( "obs_id" , "alt_id" ) ],
[ ( None , ), "alt_id" , ( "alt_id" , "obs_id" ) ],
]
)
def test_pandas_indexing(args):
def test_pandas_indexing(
index_col_and_name: Union[Tuple[Optional[str]], Tuple[str, Optional[str]]],
default_index_name: str,
signature_col_names: List[Union[str, Tuple[str, str]]],
):
"""
The index-column name for registration can take a variety of forms.
This test exercises all of them.
The `default_index_name` for registration can interact with column- and index-names in a variety of ways; this test
exercises several of them.
"""

df = pd.DataFrame(
data={
"soma_joinid": np.arange(3, dtype=np.int64),
"alt_id": ["A", "C", "G"],
"obs_id": ["AT", "CT", "GT"],
}
)
if args["do_set_index"]:
df.set_index(args["index_name_to_set"], inplace=True)
if args["do_rename_axis"]:
df.rename_axis(args["axis_name_to_set"], inplace=True)

actual_signature = registration.signatures._string_dict_from_pandas_dataframe(
df,
args["registration_index_column_name"],
)
assert actual_signature == args["expected_signature"]
df = PANDAS_INDEXING_TEST_DF.copy()
index_col = index_col_and_name[0]
if index_col is not None:
df.set_index(index_col, inplace=True)
if len(index_col_and_name) == 2:
df.index.name = index_col_and_name[1]

actual_signature = signatures._string_dict_from_pandas_dataframe(df, default_index_name)
expected_signature = dict((col, "string") for col in signature_col_names)
assert actual_signature == expected_signature
# fmt: on


@pytest.mark.parametrize("obs_field_name", ["obs_id", "cell_id"])
Expand Down

0 comments on commit b9ec3c5

Please sign in to comment.