Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.14.11](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.12) - 2022-08-05

### Added
- Added auto-paginated `Slice.export_predictions_generator`
### Fixed
- Change `{Dataset,Slice}.items_and_annotation_generator` to work with improved paginate endpoint

## [0.14.11](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.11) - 2022-07-20

### Fixed
Expand Down
1 change: 1 addition & 0 deletions nucleus/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
AUTOTAG_SCORE_THRESHOLD = "score_threshold"
EXPORTED_ROWS = "exportedRows"
EXPORTED_SCALE_TASK_INFO_ROWS = "exportedScaleTaskInfoRows"
EXPORT_FOR_TRAINING_KEY = "data"
CAMERA_MODEL_KEY = "camera_model"
CAMERA_PARAMS_KEY = "camera_params"
CLASS_PDF_KEY = "class_pdf"
Expand Down
12 changes: 10 additions & 2 deletions nucleus/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
DEFAULT_ANNOTATION_UPDATE_MODE,
EMBEDDING_DIMENSION_KEY,
EMBEDDINGS_URL_KEY,
EXPORT_FOR_TRAINING_KEY,
EXPORTED_ROWS,
FRAME_RATE_KEY,
ITEMS_KEY,
Expand Down Expand Up @@ -1250,8 +1251,15 @@ def items_and_annotation_generator(
}
}]
"""
for item in self.items_generator():
yield self.refloc(reference_id=item.reference_id)
json_generator = paginate_generator(
client=self._client,
endpoint=f"dataset/{self.id}/exportForTrainingPage",
result_key=EXPORT_FOR_TRAINING_KEY,
page_size=100000,
)
for data in json_generator:
for ia in convert_export_payload([data], has_predictions=False):
yield ia

def export_embeddings(
self,
Expand Down
55 changes: 45 additions & 10 deletions nucleus/slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@
import requests

from nucleus.annotation import Annotation
from nucleus.constants import EXPORTED_ROWS, ITEMS_KEY
from nucleus.constants import EXPORT_FOR_TRAINING_KEY, EXPORTED_ROWS, ITEMS_KEY
from nucleus.dataset_item import DatasetItem
from nucleus.errors import NucleusAPIError
from nucleus.job import AsyncJob
from nucleus.utils import (
KeyErrorDict,
convert_export_payload,
format_dataset_item_response,
format_scale_task_info_response,
paginate_generator,
)
Expand Down Expand Up @@ -203,13 +202,15 @@ def items_and_annotation_generator(
}
}]
"""
for item in self.items_generator():
yield format_dataset_item_response(
self._client.dataitem_ref_id(
dataset_id=self.dataset_id,
reference_id=item.reference_id,
)
)
json_generator = paginate_generator(
client=self._client,
endpoint=f"slice/{self.id}/exportForTrainingPage",
result_key=EXPORT_FOR_TRAINING_KEY,
page_size=100000,
)
for data in json_generator:
for ia in convert_export_payload([data], has_predictions=False):
yield ia

def items_and_annotations(
self,
Expand Down Expand Up @@ -256,7 +257,7 @@ def export_predictions(

List[{
"item": DatasetItem,
"predicions": {
"predictions": {
"box": List[BoxAnnotation],
"polygon": List[PolygonAnnotation],
"cuboid": List[CuboidAnnotation],
Expand All @@ -272,6 +273,40 @@ def export_predictions(
)
return convert_export_payload(api_payload[EXPORTED_ROWS], True)

def export_predictions_generator(
self, model
) -> Iterable[Dict[str, Union[DatasetItem, Dict[str, List[Annotation]]]]]:
"""Provides a list of all DatasetItems and Predictions in the Slice for the given Model.

Parameters:
model (Model): the nucleus model objects representing the model for which to export predictions.

Returns:
Iterable where each element is a dict containing the DatasetItem
and all of its associated Predictions, grouped by type (e.g. box).
::

List[{
"item": DatasetItem,
"predictions": {
"box": List[BoxAnnotation],
"polygon": List[PolygonAnnotation],
"cuboid": List[CuboidAnnotation],
"segmentation": List[SegmentationAnnotation],
"category": List[CategoryAnnotation],
}
}]
"""
json_generator = paginate_generator(
client=self._client,
endpoint=f"slice/{self.id}/{model.id}/exportForTrainingPage",
result_key=EXPORT_FOR_TRAINING_KEY,
page_size=100000,
)
for data in json_generator:
for ip in convert_export_payload([data], has_predictions=True):
yield ip

def export_scale_task_info(self):
"""Fetches info for all linked Scale tasks of items/scenes in the slice.

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ exclude = '''

[tool.poetry]
name = "scale-nucleus"
version = "0.14.11"
version = "0.14.12"
description = "The official Python client library for Nucleus, the Data Platform for AI"
license = "MIT"
authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]
Expand Down
2 changes: 1 addition & 1 deletion tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ def sort_labelmap(segmentation_annotation):
assert row[ITEM_KEY] == ds_items[0]
assert row[ANNOTATIONS_KEY][BOX_TYPE][0] == box_annotation
assert sort_labelmap(
row[ANNOTATIONS_KEY][SEGMENTATION_TYPE][0]
row[ANNOTATIONS_KEY][SEGMENTATION_TYPE]
) == sort_labelmap(clear_fields(segmentation_annotation))
assert row[ANNOTATIONS_KEY][POLYGON_TYPE][0] == polygon_annotation
assert row[ANNOTATIONS_KEY][CATEGORY_TYPE][0] == category_annotation
Expand Down