Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions nucleus/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,10 @@ def make_request(
if payload is None:
payload = {}
if requests_command is requests.get:
if payload:
print(
"Received defined payload with GET request! Will ignore payload"
)
payload = None
return self._connection.make_request(payload, route, requests_command) # type: ignore

Expand Down
39 changes: 39 additions & 0 deletions nucleus/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1007,6 +1007,45 @@ def set_continuous_indexing(self, enable: bool = True):

return response

def get_image_indexing_status(self):
"""Gets the primary image index progress for the dataset.

Returns:
Response payload::

{
"embedding_count": int
"image_count": int
"percent_indexed": float
"additional_context": str
}
"""
return self._client.make_request(
{"image": True},
f"dataset/{self.id}/indexingStatus",
requests_command=requests.post,
)

def get_object_indexing_status(self, model_run_id=None):
"""Gets the primary object index progress of the dataset.
If model_run_id is not specified, this endpoint will retrieve the indexing progress of the ground truth objects.

Returns:
Response payload::

{
"embedding_count": int
"object_count": int
"percent_indexed": float
"additional_context": str
}
"""
return self._client.make_request(
{"image": False, "model_run_id": model_run_id},
f"dataset/{self.id}/indexingStatus",
requests_command=requests.post,
)

def create_image_index(self):
"""Creates or updates image index by generating embeddings for images that do not already have embeddings.

Expand Down
2 changes: 1 addition & 1 deletion tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
TEST_SLICE_NAME = "[PyTest] Test Slice"
TEST_PROJECT_ID = "60b699d70f139e002dd31bfc"

DATASET_WITH_AUTOTAG = "ds_c8jwdhy4y4f0078hzceg"
DATASET_WITH_EMBEDDINGS = "ds_c8jwdhy4y4f0078hzceg"
NUCLEUS_PYTEST_USER_ID = "60ad648c85db770026e9bf77"

EVAL_FUNCTION_THRESHOLD = 0.5
Expand Down
19 changes: 12 additions & 7 deletions tests/test_autotag.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,18 @@

from nucleus.dataset import Dataset
from nucleus.errors import NucleusAPIError
from tests.helpers import DATASET_WITH_AUTOTAG, running_as_nucleus_pytest_user
from tests.helpers import (
DATASET_WITH_EMBEDDINGS,
running_as_nucleus_pytest_user,
)

# TODO: Test delete_autotag once API support for autotag creation is added.


@pytest.mark.integration
def test_update_autotag(CLIENT):
if running_as_nucleus_pytest_user(CLIENT):
job = Dataset(DATASET_WITH_AUTOTAG, CLIENT).update_autotag(
job = Dataset(DATASET_WITH_EMBEDDINGS, CLIENT).update_autotag(
"tag_c8jwr0rpy1w00e134an0"
)
job.sleep_until_complete()
Expand All @@ -24,12 +27,12 @@ def test_dataset_export_autotag_training_items(CLIENT):
# This test can only run for the test user who has an indexed dataset.
# TODO: if/when we can create autotags via api, create one instead.
if running_as_nucleus_pytest_user(CLIENT):
dataset = CLIENT.get_dataset(DATASET_WITH_AUTOTAG)
dataset = CLIENT.get_dataset(DATASET_WITH_EMBEDDINGS)

with pytest.raises(NucleusAPIError) as api_error:
dataset.autotag_training_items(autotag_name="NONSENSE_GARBAGE")
assert (
f"The autotag NONSENSE_GARBAGE was not found in dataset {DATASET_WITH_AUTOTAG}"
f"The autotag NONSENSE_GARBAGE was not found in dataset {DATASET_WITH_EMBEDDINGS}"
in str(api_error.value)
)

Expand All @@ -52,7 +55,9 @@ def test_dataset_export_autotag_training_items(CLIENT):

def test_export_embeddings(CLIENT):
if running_as_nucleus_pytest_user(CLIENT):
embeddings = Dataset(DATASET_WITH_AUTOTAG, CLIENT).export_embeddings()
embeddings = Dataset(
DATASET_WITH_EMBEDDINGS, CLIENT
).export_embeddings()
assert "embedding_vector" in embeddings[0]
assert "reference_id" in embeddings[0]

Expand All @@ -61,12 +66,12 @@ def test_dataset_export_autotag_tagged_items(CLIENT):
# This test can only run for the test user who has an indexed dataset.
# TODO: if/when we can create autotags via api, create one instead.
if running_as_nucleus_pytest_user(CLIENT):
dataset = CLIENT.get_dataset(DATASET_WITH_AUTOTAG)
dataset = CLIENT.get_dataset(DATASET_WITH_EMBEDDINGS)

with pytest.raises(NucleusAPIError) as api_error:
dataset.autotag_items(autotag_name="NONSENSE_GARBAGE")
assert (
f"The autotag NONSENSE_GARBAGE was not found in dataset {DATASET_WITH_AUTOTAG}"
f"The autotag NONSENSE_GARBAGE was not found in dataset {DATASET_WITH_EMBEDDINGS}"
in str(api_error.value)
)

Expand Down
26 changes: 26 additions & 0 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from nucleus.job import AsyncJob, JobError

from .helpers import (
DATASET_WITH_EMBEDDINGS,
LOCAL_FILENAME,
TEST_BOX_ANNOTATIONS,
TEST_CATEGORY_ANNOTATIONS,
Expand Down Expand Up @@ -556,3 +557,28 @@ def test_dataset_item_iterator(dataset):
}
for key in expected_items:
assert actual_items[key] == expected_items[key]


@pytest.mark.integration
def test_dataset_get_image_indexing_status(CLIENT):
dataset = Dataset(DATASET_WITH_EMBEDDINGS, CLIENT)
resp = dataset.get_image_indexing_status()
print(resp)
assert resp["embedding_count"] == 170
assert resp["image_count"] == 170
assert "object_count" not in resp
assert round(resp["percent_indexed"], 2) == round(
resp["image_count"] / resp["embedding_count"], 2
)


@pytest.mark.integration
def test_dataset_get_object_indexing_status(CLIENT):
dataset = Dataset(DATASET_WITH_EMBEDDINGS, CLIENT)
resp = dataset.get_object_indexing_status()
assert resp["embedding_count"] == 422
assert resp["object_count"] == 423
assert "image_count" not in resp
assert round(resp["percent_indexed"], 2) == round(
resp["object_count"] / resp["embedding_count"], 2
)