lsst · dhirving · Aug 28, 2025 · Aug 28, 2025 · Aug 28, 2025 · Sep 2, 2025
diff --git a/doc/changes/DM-52397.misc.md b/doc/changes/DM-52397.misc.md
@@ -0,0 +1,4 @@
+`Butler.registry.queryDatasets`, `Butler.registry.queryDataIds`, and `Butler.registry.queryDimensionRecords` have been re-implemented using the same query framework backing `Butler.query()` and the various `Butler.query_*()` methods.  Though the `Butler.registry.*` methods are not yet deprecated, users are encouraged to begin migrating to the equivalent `Butler.query_*` functions.  The new implementation comes with a number of minor behavior changes:
+- We no longer raise an exception if governor dimension values in a query constraint are not known to the registry.  Instead, we return no results.
+- Result rows are now deduplicated.
+- `findFirst` searches are now supported in calibration collections.
diff --git a/doc/changes/DM-52397.removal.md b/doc/changes/DM-52397.removal.md
@@ -0,0 +1,4 @@
+Removed deprecated `DatasetQueryResults.materialize()` method.
+Removed deprecated `offset` parameter from the `limit` method in the `DataCoordinateQueryResults`, `DatasetQueryResults`, and `DimensionRecordQueryResults` classes.
+Removed the deprecated feature where HTM and HEALPix spatial dimensions like `htm11` or `healpix10` could be used in data ID constraints passed to queries. The exception is `htm7`, which will continue to work.  Users should instead use region `OVERLAPS` constraints in query `where` expressions.
+Removed support for `<` and `>` operators in `Butler.registry.query*()` `where` strings for comparisons of `Timespan` vs `Timespan`, or `Timespan` vs `Time`.  Instead, use `timespan.begin < ts` or `timespan.end > ts` to explicitly indicate which timespan bound you are comparing with.
diff --git a/doc/changes/DM-52398.bugfix.md b/doc/changes/DM-52398.bugfix.md
@@ -0,0 +1 @@
+Query generation will no longer throw `sqlalchemy.exc.ArgumentError` when attempting to use a dataset calibration timespan constraint for a dataset search without any calibration collections.
diff --git a/doc/changes/DM-52398.misc.md b/doc/changes/DM-52398.misc.md
@@ -0,0 +1 @@
+`Butler.find_dataset()`/`Butler.registry.findDataset()` have been re-implemented using the same query framework backing `Butler.query()`.
diff --git a/python/lsst/daf/butler/_registry_shim.py b/python/lsst/daf/butler/_registry_shim.py
@@ -27,16 +27,17 @@
 
 from __future__ import annotations
 
-__all__ = ("Registry",)
+__all__ = ("RegistryShim",)
 
 import contextlib
 from collections.abc import Iterable, Iterator, Mapping, Sequence
 from typing import TYPE_CHECKING, Any
 
 from ._collection_type import CollectionType
-from ._dataset_association import DatasetAssociation
 from ._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef
 from ._dataset_type import DatasetType
+from ._exceptions import CalibrationLookupError
+from ._storage_class import StorageClassFactory
 from ._timespan import Timespan
 from .dimensions import (
     DataCoordinate,
@@ -46,18 +47,19 @@
     DimensionRecord,
     DimensionUniverse,
 )
-from .registry import Registry
 from .registry._collection_summary import CollectionSummary
 from .registry._defaults import RegistryDefaults
-from .registry.queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults
+from .registry._exceptions import NoDefaultCollectionError
+from .registry._registry_base import RegistryBase
+from .registry.queries._query_common import resolve_collections
 
 if TYPE_CHECKING:
     from .direct_butler import DirectButler
     from .registry._registry import CollectionArgType
     from .registry.interfaces import ObsCoreTableManager
 
 
-class RegistryShim(Registry):
+class RegistryShim(RegistryBase):
     """Implementation of `Registry` interface exposed to clients by `Butler`.
 
     Parameters
@@ -74,7 +76,7 @@ class RegistryShim(Registry):
     """
 
     def __init__(self, butler: DirectButler):
-        self._butler = butler
+        super().__init__(butler)
         self._registry = butler._registry
 
     def isWriteable(self) -> bool:
@@ -183,13 +185,74 @@ def findDataset(
         *,
         collections: CollectionArgType | None = None,
         timespan: Timespan | None = None,
+        datastore_records: bool = False,
         **kwargs: Any,
     ) -> DatasetRef | None:
         # Docstring inherited from a base class.
-        return self._registry.findDataset(
-            datasetType, dataId, collections=collections, timespan=timespan, **kwargs
+        if not isinstance(datasetType, DatasetType):
+            datasetType = self.getDatasetType(datasetType)
+
+        dataId = DataCoordinate.standardize(
+            dataId,
+            dimensions=datasetType.dimensions,
+            universe=self.dimensions,
+            defaults=self.defaults.dataId,
+            **kwargs,
         )
 
+        with self._butler.query() as query:
+            resolved_collections = resolve_collections(self._butler, collections)
+            if not resolved_collections:
+                if collections is None:
+                    raise NoDefaultCollectionError("No collections provided, and no default collections set")
+                else:
+                    return None
+
+            if datasetType.isCalibration() and timespan is None:
+                # Filter out calibration collections, because with no timespan
+                # we have no way of selecting a dataset from them.
+                collection_info = self._butler.collections.query_info(
+                    resolved_collections, flatten_chains=True
+                )
+                resolved_collections = [
+                    info.name for info in collection_info if info.type != CollectionType.CALIBRATION
+                ]
+                if not resolved_collections:
+                    return None
+
+            result = query.datasets(datasetType, resolved_collections, find_first=True).limit(2)
+            dataset_type_name = result.dataset_type.name
+            result = result.where(dataId)
+            if (
+                datasetType.isCalibration()
+                and timespan is not None
+                and (timespan.begin is not None or timespan.end is not None)
+            ):
+                timespan_column = query.expression_factory[dataset_type_name].timespan
+                # The 'logical_or(timespan.is_null)' allows non-calibration
+                # collections to participate in the search, with the assumption
+                # that they are valid for any time range.
+                result = result.where(timespan_column.overlaps(timespan).logical_or(timespan_column.is_null))
+
+            datasets = list(result)
+            if len(datasets) == 1:
+                ref = datasets[0]
+                if dataId.hasRecords():
+                    ref = ref.expanded(dataId)
+                # Propagate storage class from user-provided DatasetType, which
+                # may not match the definition in the database.
+                ref = ref.overrideStorageClass(datasetType.storageClass_name)
+                if datastore_records:
+                    ref = self._registry.get_datastore_records(ref)
+                return ref
+            elif len(datasets) == 0:
+                return None
+            else:
+                raise CalibrationLookupError(
+                    f"Ambiguous calibration lookup for {datasetType} with timespan {timespan}"
+                    f" in collections {resolved_collections}."
+                )
+
     def insertDatasets(
         self,
         datasetType: DatasetType | str,
@@ -299,97 +362,11 @@ def queryCollections(
             expression, datasetType, collectionTypes, flattenChains, includeChains
         )
 
-    def queryDatasets(
-        self,
-        datasetType: Any,
-        *,
-        collections: CollectionArgType | None = None,
-        dimensions: Iterable[str] | None = None,
-        dataId: DataId | None = None,
-        where: str = "",
-        findFirst: bool = False,
-        bind: Mapping[str, Any] | None = None,
-        check: bool = True,
-        **kwargs: Any,
-    ) -> DatasetQueryResults:
-        # Docstring inherited from a base class.
-        return self._registry.queryDatasets(
-            datasetType,
-            collections=collections,
-            dimensions=dimensions,
-            dataId=dataId,
-            where=where,
-            findFirst=findFirst,
-            bind=bind,
-            check=check,
-            **kwargs,
-        )
-
-    def queryDataIds(
-        self,
-        dimensions: DimensionGroup | Iterable[str] | str,
-        *,
-        dataId: DataId | None = None,
-        datasets: Any = None,
-        collections: CollectionArgType | None = None,
-        where: str = "",
-        bind: Mapping[str, Any] | None = None,
-        check: bool = True,
-        **kwargs: Any,
-    ) -> DataCoordinateQueryResults:
-        # Docstring inherited from a base class.
-        return self._registry.queryDataIds(
-            dimensions,
-            dataId=dataId,
-            datasets=datasets,
-            collections=collections,
-            where=where,
-            bind=bind,
-            check=check,
-            **kwargs,
-        )
-
-    def queryDimensionRecords(
-        self,
-        element: DimensionElement | str,
-        *,
-        dataId: DataId | None = None,
-        datasets: Any = None,
-        collections: CollectionArgType | None = None,
-        where: str = "",
-        bind: Mapping[str, Any] | None = None,
-        check: bool = True,
-        **kwargs: Any,
-    ) -> DimensionRecordQueryResults:
-        # Docstring inherited from a base class.
-        return self._registry.queryDimensionRecords(
-            element,
-            dataId=dataId,
-            datasets=datasets,
-            collections=collections,
-            where=where,
-            bind=bind,
-            check=check,
-            **kwargs,
-        )
-
-    def queryDatasetAssociations(
-        self,
-        datasetType: str | DatasetType,
-        collections: CollectionArgType | None = ...,
-        *,
-        collectionTypes: Iterable[CollectionType] = CollectionType.all(),
-        flattenChains: bool = False,
-    ) -> Iterator[DatasetAssociation]:
-        # Docstring inherited from a base class.
-        return self._registry.queryDatasetAssociations(
-            datasetType,
-            collections,
-            collectionTypes=collectionTypes,
-            flattenChains=flattenChains,
-        )
-
     @property
     def obsCoreTableManager(self) -> ObsCoreTableManager | None:
         # Docstring inherited from a base class.
         return self._registry.obsCoreTableManager
+
+    @property
+    def storageClasses(self) -> StorageClassFactory:
+        return self._registry.storageClasses
diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler.py b/python/lsst/daf/butler/direct_butler/_direct_butler.py
@@ -1302,7 +1302,7 @@ def find_dataset(
 
         data_id, kwargs = self._rewrite_data_id(data_id, parent_type, **kwargs)
 
-        ref = self._registry.findDataset(
+        ref = self.registry.findDataset(
             parent_type,
             data_id,
             collections=collections,

diff --git a/python/lsst/daf/butler/direct_query_driver/_driver.py b/python/lsst/daf/butler/direct_query_driver/_driver.py
@@ -641,7 +641,11 @@ def _analyze_query_tree(self, tree: qt.QueryTree, allow_duplicate_overlaps: bool
         # datasets later.
         predicate_constraints = PredicateConstraintsSummary(tree.predicate)
         # Use the default data ID to apply additional constraints where needed.
-        predicate_constraints.apply_default_data_id(self._default_data_id, tree.dimensions)
+        predicate_constraints.apply_default_data_id(
+            self._default_data_id,
+            tree.dimensions,
+            validate_governor_constraints=tree.validateGovernorConstraints,
+        )
         predicate = predicate_constraints.predicate
         # Delegate to the dimensions manager to rewrite the predicate and start
         # a SqlSelectBuilder to cover any spatial overlap joins or constraints.

diff --git a/python/lsst/daf/butler/queries/_expression_strings.py b/python/lsst/daf/butler/queries/_expression_strings.py
@@ -244,7 +244,7 @@ def visitIdentifier(self, name: str, node: Node) -> _VisitorResult:
     def visitBind(self, name: str, node: Node) -> _VisitorResult:
         name = name.lower()
         if name not in self.context.bind:
-            raise InvalidQueryError("Name {name!r} is not in the bind map.")
+            raise InvalidQueryError(f"Name {name!r} is not in the bind map.")
         # Logic in visitIdentifier handles binds.
         return self.visitIdentifier(name, node)
 

diff --git a/python/lsst/daf/butler/queries/_query.py b/python/lsst/daf/butler/queries/_query.py
@@ -746,6 +746,10 @@ def where(
             driver=self._driver,
         )
 
+    def _skip_governor_validation(self) -> Query:
+        tree = self._tree.model_copy(update={"validateGovernorConstraints": False})
+        return Query(tree=tree, driver=self._driver)
+
     def _join_dataset_search_impl(
         self,
         dataset_type: str | DatasetType,

diff --git a/python/lsst/daf/butler/queries/predicate_constraints_summary.py b/python/lsst/daf/butler/queries/predicate_constraints_summary.py
@@ -71,7 +71,10 @@ def __init__(self, predicate: qt.Predicate) -> None:
         )
 
     def apply_default_data_id(
-        self, default_data_id: DataCoordinate, query_dimensions: DimensionGroup
+        self,
+        default_data_id: DataCoordinate,
+        query_dimensions: DimensionGroup,
+        validate_governor_constraints: bool,
     ) -> None:
         """Augment the predicate and summary by adding missing constraints for
         governor dimensions using a default data ID.
@@ -81,9 +84,11 @@ def apply_default_data_id(
         default_data_id : `DataCoordinate`
             Data ID values that will be used to constrain the query if governor
             dimensions have not already been constrained by the predicate.
-
         query_dimensions : `DimensionGroup`
             The set of dimensions returned in result rows from the query.
+        validate_governor_constraints : `bool`
+            If `True`, enforce the requirement that governor dimensions must be
+            constrained if any dimensions that depend on them have constraints.
         """
         # Find governor dimensions required by the predicate.
         # If these are not constrained by the predicate or the default data ID,
@@ -108,7 +113,7 @@ def apply_default_data_id(
                     self.predicate = self.predicate.logical_and(
                         _create_data_id_predicate(governor, data_id_value, query_dimensions.universe)
                     )
-                elif governor in where_governors:
+                elif governor in where_governors and validate_governor_constraints:
                     # Check that the predicate doesn't reference any dimensions
                     # without constraining their governor dimensions, since
                     # that's a particularly easy mistake to make and it's

diff --git a/python/lsst/daf/butler/queries/tree/_query_tree.py b/python/lsst/daf/butler/queries/tree/_query_tree.py
@@ -146,6 +146,11 @@ class QueryTree(QueryTreeBase):
     predicate: Predicate = Predicate.from_bool(True)
     """Boolean expression trees whose logical AND defines a row filter."""
 
+    validateGovernorConstraints: bool = True
+    """If True, enforce the requirement that governor dimensions must be
+    constrained if any dimensions that depend on them have constraints.
+    """
+
     def iter_all_dataset_searches(self) -> Iterator[tuple[str | AnyDatasetType, DatasetSearch]]:
         yield from self.datasets.items()
         if self.any_dataset is not None:

diff --git a/python/lsst/daf/butler/registry/_registry.py b/python/lsst/daf/butler/registry/_registry.py
@@ -1449,6 +1449,9 @@ def obsCoreTableManager(self) -> ObsCoreTableManager | None:
         """
         return None
 
-    storageClasses: StorageClassFactory
-    """All storage classes known to the registry (`StorageClassFactory`).
-    """
+    @property
+    def storageClasses(self) -> StorageClassFactory:
+        """All storage classes known to the registry
+        (`StorageClassFactory`).
+        """
+        raise NotImplementedError()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Query generation will no longer throw `sqlalchemy.exc.ArgumentError` when attempting to use a dataset calibration timespan constraint for a dataset search without any calibration collections.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		`Butler.find_dataset()`/`Butler.registry.findDataset()` have been re-implemented using the same query framework backing `Butler.query()`.