scaleapi · gatli · Mar 20, 2025 · Mar 17, 2025 · Mar 19, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,12 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+
+## [0.17.10](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.17.10) - 2025-03-19
+
+### Added
+- Adding page size variable to `items_and_annotation_generator()` to reduce timeout errors for customers with large datasets
+
 ## [0.17.9](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.17.9) - 2025-03-11
 
 ### Added

diff --git a/nucleus/dataset.py b/nucleus/dataset.py
@@ -1518,13 +1518,15 @@ def items_and_annotation_generator(
         query: Optional[str] = None,
         use_mirrored_images: bool = False,
         only_most_recent_tasks: bool = True,
+        page_size=10000
     ) -> Iterable[Dict[str, Union[DatasetItem, Dict[str, List[Annotation]]]]]:
         """Provides a generator of all DatasetItems and Annotations in the dataset.
 
         Args:
             query: Structured query compatible with the `Nucleus query language <https://nucleus.scale.com/docs/query-language-reference>`_.
             use_mirrored_images: If True, returns the location of the mirrored image hosted in Scale S3. Useful when the original image is no longer available.
             only_most_recent_tasks: If True, only the annotations corresponding to the most recent task for each item is returned.
+            page_size: Number of items to fetch per page. Default is maximum ES page size of 10000.
 
         Returns:
             Generator where each element is a dict containing the DatasetItem
@@ -1548,7 +1550,7 @@ def items_and_annotation_generator(
             client=self._client,
             endpoint=f"dataset/{self.id}/exportForTrainingPage",
             result_key=EXPORT_FOR_TRAINING_KEY,
-            page_size=10000,  # max ES page size
+            page_size=page_size,  # default is max ES page size of 10000
             query=query,
             chip=use_mirrored_images,
             onlyMostRecentTask=only_most_recent_tasks,

diff --git a/pyproject.toml b/pyproject.toml
@@ -25,7 +25,7 @@ ignore = ["E501", "E741", "E731", "F401"]  # Easy ignore for getting it running
 
 [tool.poetry]
 name = "scale-nucleus"
-version = "0.17.9"
+version = "0.17.10"
 description = "The official Python client library for Nucleus, the Data Platform for AI"
 license =  "MIT"
 authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]