Skip to content

Task-based annotation fixes #445

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Feb 27, 2025
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.17.8](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.17.7) - 2024-11-05
## [0.17.8](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.17.8) - 2025-01-02

### Added
- Adding `only_most_recent_tasks` parameter for `dataset.scene_and_annotation_generator()` and `dataset.items_and_annotation_generator()` to accommodate for multiple sets of ground truth caused by relabeled tasks. Also returns the task_id in the annotation results.
Expand Down
52 changes: 27 additions & 25 deletions nucleus/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ class BoxAnnotation(Annotation): # pylint: disable=R0902
metadata: Optional[Dict] = None
embedding_vector: Optional[list] = None
track_reference_id: Optional[str] = None
task_id: Optional[str] = None
_task_id: Optional[str] = field(default=None, repr=False)

def __post_init__(self):
self.metadata = self.metadata if self.metadata else {}
Expand All @@ -180,7 +180,7 @@ def from_json(cls, payload: dict):
metadata=payload.get(METADATA_KEY, {}),
embedding_vector=payload.get(EMBEDDING_VECTOR_KEY, None),
track_reference_id=payload.get(TRACK_REFERENCE_ID_KEY, None),
task_id=payload.get(TASK_ID_KEY, None),
_task_id=payload.get(TASK_ID_KEY, None),
)

def to_payload(self) -> dict:
Expand All @@ -198,7 +198,7 @@ def to_payload(self) -> dict:
METADATA_KEY: self.metadata,
EMBEDDING_VECTOR_KEY: self.embedding_vector,
TRACK_REFERENCE_ID_KEY: self.track_reference_id,
TASK_ID_KEY: self.task_id,
TASK_ID_KEY: self._task_id,
}

def __eq__(self, other):
Expand All @@ -213,7 +213,7 @@ def __eq__(self, other):
and sorted(self.metadata.items()) == sorted(other.metadata.items())
and self.embedding_vector == other.embedding_vector
and self.track_reference_id == other.track_reference_id
and self.task_id == other.task_id
and self._task_id == other._task_id
)


Expand Down Expand Up @@ -280,7 +280,7 @@ class LineAnnotation(Annotation):
annotation_id: Optional[str] = None
metadata: Optional[Dict] = None
track_reference_id: Optional[str] = None
task_id: Optional[str] = None
_task_id: Optional[str] = field(default=None, repr=False)

def __post_init__(self):
self.metadata = self.metadata if self.metadata else {}
Expand Down Expand Up @@ -310,7 +310,7 @@ def from_json(cls, payload: dict):
annotation_id=payload.get(ANNOTATION_ID_KEY, None),
metadata=payload.get(METADATA_KEY, {}),
track_reference_id=payload.get(TRACK_REFERENCE_ID_KEY, None),
task_id=payload.get(TASK_ID_KEY, None),
_task_id=payload.get(TASK_ID_KEY, None),
)

def to_payload(self) -> dict:
Expand All @@ -324,7 +324,7 @@ def to_payload(self) -> dict:
ANNOTATION_ID_KEY: self.annotation_id,
METADATA_KEY: self.metadata,
TRACK_REFERENCE_ID_KEY: self.track_reference_id,
TASK_ID_KEY: self.task_id,
TASK_ID_KEY: self._task_id,
}
return payload

Expand Down Expand Up @@ -375,7 +375,7 @@ class PolygonAnnotation(Annotation):
metadata: Optional[Dict] = None
embedding_vector: Optional[list] = None
track_reference_id: Optional[str] = None
task_id: Optional[str] = None
_task_id: Optional[str] = field(default=None, repr=False)

def __post_init__(self):
self.metadata = self.metadata if self.metadata else {}
Expand Down Expand Up @@ -406,7 +406,7 @@ def from_json(cls, payload: dict):
metadata=payload.get(METADATA_KEY, {}),
embedding_vector=payload.get(EMBEDDING_VECTOR_KEY, None),
track_reference_id=payload.get(TRACK_REFERENCE_ID_KEY, None),
task_id=payload.get(TASK_ID_KEY, None),
_task_id=payload.get(TASK_ID_KEY, None),
)

def to_payload(self) -> dict:
Expand All @@ -421,7 +421,7 @@ def to_payload(self) -> dict:
METADATA_KEY: self.metadata,
EMBEDDING_VECTOR_KEY: self.embedding_vector,
TRACK_REFERENCE_ID_KEY: self.track_reference_id,
TASK_ID_KEY: self.task_id,
TASK_ID_KEY: self._task_id,
}
return payload

Expand Down Expand Up @@ -518,7 +518,7 @@ class KeypointsAnnotation(Annotation):
annotation_id: Optional[str] = None
metadata: Optional[Dict] = None
track_reference_id: Optional[str] = None
task_id: Optional[str] = None
_task_id: Optional[str] = field(default=None, repr=False)

def __post_init__(self):
self.metadata = self.metadata or {}
Expand Down Expand Up @@ -571,7 +571,7 @@ def from_json(cls, payload: dict):
annotation_id=payload.get(ANNOTATION_ID_KEY, None),
metadata=payload.get(METADATA_KEY, {}),
track_reference_id=payload.get(TRACK_REFERENCE_ID_KEY, None),
task_id=payload.get(TASK_ID_KEY, None),
_task_id=payload.get(TASK_ID_KEY, None),
)

def to_payload(self) -> dict:
Expand All @@ -587,7 +587,7 @@ def to_payload(self) -> dict:
ANNOTATION_ID_KEY: self.annotation_id,
METADATA_KEY: self.metadata,
TRACK_REFERENCE_ID_KEY: self.track_reference_id,
TASK_ID_KEY: self.task_id,
TASK_ID_KEY: self._task_id,
}
return payload

Expand Down Expand Up @@ -692,7 +692,7 @@ class CuboidAnnotation(Annotation): # pylint: disable=R0902
annotation_id: Optional[str] = None
metadata: Optional[Dict] = None
track_reference_id: Optional[str] = None
task_id: Optional[str] = None
_task_id: Optional[str] = field(default=None, repr=False)

def __post_init__(self):
self.metadata = self.metadata if self.metadata else {}
Expand All @@ -709,7 +709,7 @@ def from_json(cls, payload: dict):
annotation_id=payload.get(ANNOTATION_ID_KEY, None),
metadata=payload.get(METADATA_KEY, {}),
track_reference_id=payload.get(TRACK_REFERENCE_ID_KEY, None),
task_id=payload.get(TASK_ID_KEY, None),
_task_id=payload.get(TASK_ID_KEY, None),
)

def to_payload(self) -> dict:
Expand All @@ -729,7 +729,8 @@ def to_payload(self) -> dict:
payload[METADATA_KEY] = self.metadata
if self.track_reference_id:
payload[TRACK_REFERENCE_ID_KEY] = self.track_reference_id

if self._task_id:
payload[TASK_ID_KEY] = self._task_id
return payload


Expand Down Expand Up @@ -942,7 +943,7 @@ class CategoryAnnotation(Annotation):
taxonomy_name: Optional[str] = None
metadata: Optional[Dict] = None
track_reference_id: Optional[str] = None
task_id: Optional[str] = None
_task_id: Optional[str] = field(default=None, repr=False)

def __post_init__(self):
self.metadata = self.metadata if self.metadata else {}
Expand All @@ -955,7 +956,7 @@ def from_json(cls, payload: dict):
taxonomy_name=payload.get(TAXONOMY_NAME_KEY, None),
metadata=payload.get(METADATA_KEY, {}),
track_reference_id=payload.get(TRACK_REFERENCE_ID_KEY, None),
task_id=payload.get(TASK_ID_KEY, None),
_task_id=payload.get(TASK_ID_KEY, None),
)

def to_payload(self) -> dict:
Expand All @@ -966,7 +967,7 @@ def to_payload(self) -> dict:
REFERENCE_ID_KEY: self.reference_id,
METADATA_KEY: self.metadata,
TRACK_REFERENCE_ID_KEY: self.track_reference_id,
TASK_ID_KEY: self.task_id,
TASK_ID_KEY: self._task_id,
}
if self.taxonomy_name is not None:
payload[TAXONOMY_NAME_KEY] = self.taxonomy_name
Expand All @@ -982,7 +983,7 @@ class MultiCategoryAnnotation(Annotation):
taxonomy_name: Optional[str] = None
metadata: Optional[Dict] = None
track_reference_id: Optional[str] = None
task_id: Optional[str] = None
_task_id: Optional[str] = field(default=None, repr=False)

def __post_init__(self):
self.metadata = self.metadata if self.metadata else {}
Expand All @@ -995,7 +996,7 @@ def from_json(cls, payload: dict):
taxonomy_name=payload.get(TAXONOMY_NAME_KEY, None),
metadata=payload.get(METADATA_KEY, {}),
track_reference_id=payload.get(TRACK_REFERENCE_ID_KEY, None),
task_id=payload.get(TASK_ID_KEY, None),
_task_id=payload.get(TASK_ID_KEY, None),
)

def to_payload(self) -> dict:
Expand All @@ -1006,7 +1007,7 @@ def to_payload(self) -> dict:
REFERENCE_ID_KEY: self.reference_id,
METADATA_KEY: self.metadata,
TRACK_REFERENCE_ID_KEY: self.track_reference_id,
TASK_ID_KEY: self.task_id,
TASK_ID_KEY: self._task_id,
}
if self.taxonomy_name is not None:
payload[TAXONOMY_NAME_KEY] = self.taxonomy_name
Expand Down Expand Up @@ -1045,6 +1046,7 @@ class SceneCategoryAnnotation(Annotation):
reference_id: str
taxonomy_name: Optional[str] = None
metadata: Optional[Dict] = field(default_factory=dict)
_task_id: Optional[str] = field(default=None, repr=False)

@classmethod
def from_json(cls, payload: dict):
Expand All @@ -1053,6 +1055,7 @@ def from_json(cls, payload: dict):
reference_id=payload[REFERENCE_ID_KEY],
taxonomy_name=payload.get(TAXONOMY_NAME_KEY, None),
metadata=payload.get(METADATA_KEY, {}),
_task_id=payload.get(TASK_ID_KEY, None),
)

def to_payload(self) -> dict:
Expand All @@ -1062,6 +1065,7 @@ def to_payload(self) -> dict:
GEOMETRY_KEY: {},
REFERENCE_ID_KEY: self.reference_id,
METADATA_KEY: self.metadata,
TASK_ID_KEY: self._task_id,
}
if self.taxonomy_name is not None:
payload[TAXONOMY_NAME_KEY] = self.taxonomy_name
Expand All @@ -1079,9 +1083,7 @@ class AnnotationList:
default_factory=list
)
cuboid_annotations: List[CuboidAnnotation] = field(default_factory=list)
category_annotations: List[CategoryAnnotation] = field(
default_factory=list
)
category_annotations: List[CategoryAnnotation] = field(default_factory=list)
multi_category_annotations: List[MultiCategoryAnnotation] = field(
default_factory=list
)
Expand Down
33 changes: 25 additions & 8 deletions nucleus/annotation_uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,7 @@ def get_form_data_and_file_pointers_fn(
"""

def fn():
request_json = construct_segmentation_payload(
segmentations, update
)
request_json = construct_segmentation_payload(segmentations, update)
form_data = [
FileFormField(
name=SERIALIZED_REQUEST_KEY,
Expand Down Expand Up @@ -212,15 +210,17 @@ def fn():

return fn

@staticmethod
def check_for_duplicate_ids(annotations: Iterable[Annotation]):
def check_for_duplicate_ids(self, annotations: Iterable[Annotation]):
"""Do not allow annotations to have the same (annotation_id, reference_id, task_id) tuple"""

# some annotations like CategoryAnnotation do not have annotation_id attribute, and as such, we allow duplicates
tuple_ids = [
(ann.reference_id, ann.annotation_id, ann.task_id) # type: ignore
(
ann.reference_id,
ann.annotation_id,
getattr(ann, "_task_id", None),
)
for ann in annotations
if hasattr(ann, "annotation_id") and hasattr(ann, "task_id")
if hasattr(ann, "annotation_id")
]
tuple_count = Counter(tuple_ids)
duplicates = {key for key, value in tuple_count.items() if value > 1}
Expand Down Expand Up @@ -255,3 +255,20 @@ def __init__(
self._route = (
f"dataset/{dataset_id}/model/{model_id}/uploadPredictions"
)

def check_for_duplicate_ids(self, annotations: Iterable[Annotation]):
"""Do not allow predictions to have the same (annotation_id, reference_id) tuple"""
tuple_ids = [
(pred.annotation_id, pred.reference_id) # type: ignore
for pred in annotations
if hasattr(pred, "annotation_id") and hasattr(pred, "reference_id")
]
tuple_count = Counter(tuple_ids)
duplicates = {key for key, value in tuple_count.items() if value > 1}
if len(duplicates) > 0:
raise DuplicateIDError(
f"Duplicate predictions with the same (annotation_id, reference_id) properties found.\n"
f"Duplicates: {duplicates}\n"
f"To fix this, avoid duplicate predictions, or specify a different annotation_id attribute "
f"for the failing items."
)
28 changes: 9 additions & 19 deletions tests/test_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,7 @@ def test_polygon_gt_upload(dataset):
assert response["annotations_processed"] == 1
assert response["annotations_ignored"] == 0

response = dataset.refloc(annotation.reference_id)["annotations"][
"polygon"
]
response = dataset.refloc(annotation.reference_id)["annotations"]["polygon"]
assert len(response) == 1
response_annotation = response[0]
assert_polygon_annotation_matches_dict(
Expand Down Expand Up @@ -370,7 +368,7 @@ def test_mixed_annotation_upload(dataset):


def test_box_gt_upload_update(dataset):
TEST_BOX_ANNOTATIONS[0]["task_id"] = "test_task_id"
TEST_BOX_ANNOTATIONS[0]["_task_id"] = "test_task_id"
annotation = BoxAnnotation(**TEST_BOX_ANNOTATIONS[0])
response = dataset.annotate(annotations=[annotation])

Expand All @@ -384,7 +382,7 @@ def test_box_gt_upload_update(dataset):
annotation_update_params["reference_id"] = TEST_BOX_ANNOTATIONS[0][
"reference_id"
]
annotation_update_params["task_id"] = TEST_BOX_ANNOTATIONS[0]["task_id"]
annotation_update_params["_task_id"] = TEST_BOX_ANNOTATIONS[0]["_task_id"]

annotation_update = BoxAnnotation(**annotation_update_params)
response = dataset.annotate(annotations=[annotation_update], update=True)
Expand All @@ -401,7 +399,7 @@ def test_box_gt_upload_update(dataset):


def test_box_gt_upload_ignore(dataset):
TEST_BOX_ANNOTATIONS[0]["task_id"] = "test_task_id"
TEST_BOX_ANNOTATIONS[0]["_task_id"] = "test_task_id"
annotation = BoxAnnotation(**TEST_BOX_ANNOTATIONS[0])

print(annotation)
Expand All @@ -418,7 +416,7 @@ def test_box_gt_upload_ignore(dataset):
annotation_update_params["reference_id"] = TEST_BOX_ANNOTATIONS[0][
"reference_id"
]
annotation_update_params["task_id"] = TEST_BOX_ANNOTATIONS[0]["task_id"]
annotation_update_params["_task_id"] = TEST_BOX_ANNOTATIONS[0]["_task_id"]
annotation_update = BoxAnnotation(**annotation_update_params)

# Default behavior is ignore.
Expand Down Expand Up @@ -450,19 +448,15 @@ def test_polygon_gt_upload_update(dataset):
annotation_update_params["reference_id"] = TEST_POLYGON_ANNOTATIONS[0][
"reference_id"
]
annotation_update_params["task_id"] = TEST_POLYGON_ANNOTATIONS[0][
"task_id"
]
annotation_update_params["task_id"] = TEST_POLYGON_ANNOTATIONS[0]["task_id"]

annotation_update = PolygonAnnotation.from_json(annotation_update_params)
response = dataset.annotate(annotations=[annotation_update], update=True)

assert response["annotations_processed"] == 1
assert response["annotations_ignored"] == 0

response = dataset.refloc(annotation.reference_id)["annotations"][
"polygon"
]
response = dataset.refloc(annotation.reference_id)["annotations"]["polygon"]
assert len(response) == 1
response_annotation = response[0]
assert_polygon_annotation_matches_dict(
Expand All @@ -485,9 +479,7 @@ def test_polygon_gt_upload_ignore(dataset):
annotation_update_params["reference_id"] = TEST_POLYGON_ANNOTATIONS[0][
"reference_id"
]
annotation_update_params["task_id"] = TEST_POLYGON_ANNOTATIONS[0][
"task_id"
]
annotation_update_params["task_id"] = TEST_POLYGON_ANNOTATIONS[0]["task_id"]

annotation_update = PolygonAnnotation.from_json(annotation_update_params)
# Default behavior is ignore.
Expand All @@ -496,9 +488,7 @@ def test_polygon_gt_upload_ignore(dataset):
assert response["annotations_processed"] == 0
assert response["annotations_ignored"] == 1

response = dataset.refloc(annotation.reference_id)["annotations"][
"polygon"
]
response = dataset.refloc(annotation.reference_id)["annotations"]["polygon"]
assert len(response) == 1
response_annotation = response[0]
assert_polygon_annotation_matches_dict(
Expand Down
Loading