Skip to content

Commit

Permalink
Pytest-ify reindexer unit tests (#2134)
Browse files Browse the repository at this point in the history
  • Loading branch information
jp-dark authored Feb 14, 2024
1 parent ef6cb19 commit f2eb946
Showing 1 changed file with 55 additions and 195 deletions.
250 changes: 55 additions & 195 deletions apis/python/tests/test_indexer.py
Original file line number Diff line number Diff line change
@@ -1,212 +1,72 @@
from typing import List, Union

import numpy as np
import pandas as pd
import pytest

from tiledbsoma._index_util import tiledbsoma_build_index
from tiledbsoma.options import SOMATileDBContext
from tiledbsoma.options._soma_tiledb_context import _validate_soma_tiledb_context


def indexer_test(keys: np.array, lookups: np.array, fail: bool):
if fail:
indexer_test_fail(keys, lookups)
else:
indexer_test_pass(keys, lookups)


def indexer_test_fail(keys: np.array, lookups: np.array):
try:
context = _validate_soma_tiledb_context(SOMATileDBContext())
index = tiledbsoma_build_index(keys, context=context)
index.get_indexer(lookups)
raise AssertionError("should have failed")
except pd.errors.InvalidIndexError:
pass
except Exception as e:
if str(e) == "RuntimeError: There are duplicate keys.":
pass
@pytest.mark.parametrize(
"keys, lookups",
[
([-1, -1, -1, 0, 0, 0], np.tile(np.arange(1, 6), 4)),
(np.tile(np.array([-1, 1, 2, 3, 4, 5]), 4), [-10000, 1, 2, 3, 5, 6]),
],
)
def test_duplicate_key_indexer_error(
keys: Union[np.array, List[int]], lookups: np.array
):
context = _validate_soma_tiledb_context(SOMATileDBContext())
with pytest.raises(RuntimeError, match="There are duplicate keys."):
tiledbsoma_build_index(keys, context=context)

try:
pd_index = pd.Index(keys)
pd_index = pd.Index(keys)
with pytest.raises(pd.errors.InvalidIndexError):
pd_index.get_indexer(lookups)
raise AssertionError("should have failed")
except pd.errors.InvalidIndexError:
pass
except Exception as e:
if str(e) == "RuntimeError: There are duplicate keys.":
pass


def indexer_test_pass(keys: np.array, lookups: np.array):
@pytest.mark.parametrize(
"keys, lookups",
[
([1], [1, 1, 1, 1]),
([-1, 1, 2, 3, 4, 5], np.tile(np.array([-1, 1, 2, 3, 4, 5]), 4)),
([-10000, -100000, 200000, 5, 1, 7], np.tile(np.array([-1, 1, 2, 3, 4, 5]), 4)),
(
[-10000, -200000, 1000, 3000, 1, 2],
np.tile(np.array([-1, 1, 2, 3, 4, 5]), 4),
),
(list(range(1, 10000)), list(range(1, 10))),
(
list(range(1, 10000)),
[
525,
1293,
1805,
5802,
7636,
7754,
7791,
7957,
7959,
8067,
8340,
8736,
8806,
9329,
9377,
9653,
],
),
(list(range(1, 10000)), list(range(1, 10000))),
],
)
def test_indexer(keys: np.array, lookups: np.array):
context = _validate_soma_tiledb_context(SOMATileDBContext())
indexer = tiledbsoma_build_index(keys, context=context)
results = indexer.get_indexer(lookups)
panda_indexer = pd.Index(keys)
panda_results = panda_indexer.get_indexer(lookups)
assert np.equal(results.all(), panda_results.all())


test_data = [
{"keys": [1], "lookups": [1, 1, 1, 1], "pass": True},
{
"keys": [-1, -1, -1, 0, 0, 0],
"lookups": [1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5],
"pass": False,
},
{
"keys": [
-1,
1,
2,
3,
4,
5,
-1,
1,
2,
3,
4,
5,
-1,
1,
2,
3,
4,
5,
-1,
1,
2,
3,
4,
5,
],
"lookups": [-10000, 1, 2, 3, 5, 6],
"pass": False,
},
{
"keys": [-1, 1, 2, 3, 4, 5],
"lookups": [
-1,
1,
2,
3,
4,
5,
-1,
1,
2,
3,
4,
5,
-1,
1,
2,
3,
4,
5,
-1,
1,
2,
3,
4,
5,
],
"pass": True,
},
{
"keys": [-10000, -100000, 200000, 5, 1, 7],
"lookups": [
-1,
1,
2,
3,
4,
5,
-1,
1,
2,
3,
4,
5,
-1,
1,
2,
3,
4,
5,
-1,
1,
2,
3,
4,
5,
],
"pass": True,
},
{
"keys": [-10000, -200000, 1000, 3000, 1, 2],
"lookups": [
-1,
1,
2,
3,
4,
5,
-1,
1,
2,
3,
4,
5,
-1,
1,
2,
3,
4,
5,
-1,
1,
2,
3,
4,
5,
],
"pass": True,
},
{
"keys": [i for i in range(1, 10000)],
"lookups": [i for i in range(1, 10)],
"pass": True,
},
{
"keys": [i for i in range(1, 10000)],
"lookups": [
525,
1293,
1805,
5802,
7636,
7754,
7791,
7957,
7959,
8067,
8340,
8736,
8806,
9329,
9377,
9653,
],
"pass": True,
},
{
"keys": [i for i in range(1, 10000)],
"lookups": [i for i in range(1, 10000)],
"pass": True,
},
]


def test_indexer():
for data in test_data:
indexer_test(data["keys"], data["lookups"], not data["pass"])
np.testing.assert_equal(results.all(), panda_results.all())

0 comments on commit f2eb946

Please sign in to comment.