|
36 | 36 | import warnings
|
37 | 37 | from collections import defaultdict
|
38 | 38 | from collections.abc import Iterable, Iterator, Mapping, Sequence
|
39 |
| -from typing import TYPE_CHECKING, Any, cast |
| 39 | +from typing import TYPE_CHECKING, Any |
40 | 40 |
|
41 | 41 | import sqlalchemy
|
42 | 42 |
|
43 | 43 | from lsst.resources import ResourcePathExpression
|
44 | 44 | from lsst.utils.iteration import ensure_iterable
|
45 | 45 |
|
46 | 46 | from .._collection_type import CollectionType
|
47 |
| -from .._column_tags import DatasetColumnTag |
48 | 47 | from .._config import Config
|
49 | 48 | from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef
|
50 | 49 | from .._dataset_type import DatasetType
|
51 |
| -from .._exceptions import ( |
52 |
| - CalibrationLookupError, |
53 |
| - DataIdValueError, |
54 |
| - DimensionNameError, |
55 |
| - InconsistentDataIdError, |
56 |
| -) |
| 50 | +from .._exceptions import DataIdValueError, DimensionNameError, InconsistentDataIdError |
57 | 51 | from .._storage_class import StorageClassFactory
|
58 | 52 | from .._timespan import Timespan
|
59 | 53 | from ..dimensions import (
|
|
90 | 84 | from .._butler_config import ButlerConfig
|
91 | 85 | from ..datastore._datastore import DatastoreOpaqueTable
|
92 | 86 | from ..datastore.stored_file_info import StoredDatastoreItemInfo
|
93 |
| - from ..registry._registry import CollectionArgType |
94 | 87 | from ..registry.interfaces import (
|
95 | 88 | CollectionRecord,
|
96 | 89 | Database,
|
@@ -830,169 +823,6 @@ def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
|
830 | 823 | """
|
831 | 824 | return True
|
832 | 825 |
|
833 |
| - def findDataset( |
834 |
| - self, |
835 |
| - datasetType: DatasetType | str, |
836 |
| - dataId: DataId | None = None, |
837 |
| - *, |
838 |
| - collections: CollectionArgType | None = None, |
839 |
| - timespan: Timespan | None = None, |
840 |
| - datastore_records: bool = False, |
841 |
| - **kwargs: Any, |
842 |
| - ) -> DatasetRef | None: |
843 |
| - """Find a dataset given its `DatasetType` and data ID. |
844 |
| -
|
845 |
| - This can be used to obtain a `DatasetRef` that permits the dataset to |
846 |
| - be read from a `Datastore`. If the dataset is a component and can not |
847 |
| - be found using the provided dataset type, a dataset ref for the parent |
848 |
| - will be returned instead but with the correct dataset type. |
849 |
| -
|
850 |
| - Parameters |
851 |
| - ---------- |
852 |
| - datasetType : `DatasetType` or `str` |
853 |
| - A `DatasetType` or the name of one. If this is a `DatasetType` |
854 |
| - instance, its storage class will be respected and propagated to |
855 |
| - the output, even if it differs from the dataset type definition |
856 |
| - in the registry, as long as the storage classes are convertible. |
857 |
| - dataId : `dict` or `DataCoordinate`, optional |
858 |
| - A `dict`-like object containing the `Dimension` links that identify |
859 |
| - the dataset within a collection. |
860 |
| - collections : collection expression, optional |
861 |
| - An expression that fully or partially identifies the collections to |
862 |
| - search for the dataset; see |
863 |
| - :ref:`daf_butler_collection_expressions` for more information. |
864 |
| - Defaults to ``self.defaults.collections``. |
865 |
| - timespan : `Timespan`, optional |
866 |
| - A timespan that the validity range of the dataset must overlap. |
867 |
| - If not provided, any `~CollectionType.CALIBRATION` collections |
868 |
| - matched by the ``collections`` argument will not be searched. |
869 |
| - datastore_records : `bool`, optional |
870 |
| - Whether to attach datastore records to the `DatasetRef`. |
871 |
| - **kwargs |
872 |
| - Additional keyword arguments passed to |
873 |
| - `DataCoordinate.standardize` to convert ``dataId`` to a true |
874 |
| - `DataCoordinate` or augment an existing one. |
875 |
| -
|
876 |
| - Returns |
877 |
| - ------- |
878 |
| - ref : `DatasetRef` |
879 |
| - A reference to the dataset, or `None` if no matching Dataset |
880 |
| - was found. |
881 |
| -
|
882 |
| - Raises |
883 |
| - ------ |
884 |
| - lsst.daf.butler.registry.NoDefaultCollectionError |
885 |
| - Raised if ``collections`` is `None` and |
886 |
| - ``self.defaults.collections`` is `None`. |
887 |
| - LookupError |
888 |
| - Raised if one or more data ID keys are missing. |
889 |
| - lsst.daf.butler.registry.MissingDatasetTypeError |
890 |
| - Raised if the dataset type does not exist. |
891 |
| - lsst.daf.butler.registry.MissingCollectionError |
892 |
| - Raised if any of ``collections`` does not exist in the registry. |
893 |
| -
|
894 |
| - Notes |
895 |
| - ----- |
896 |
| - This method simply returns `None` and does not raise an exception even |
897 |
| - when the set of collections searched is intrinsically incompatible with |
898 |
| - the dataset type, e.g. if ``datasetType.isCalibration() is False``, but |
899 |
| - only `~CollectionType.CALIBRATION` collections are being searched. |
900 |
| - This may make it harder to debug some lookup failures, but the behavior |
901 |
| - is intentional; we consider it more important that failed searches are |
902 |
| - reported consistently, regardless of the reason, and that adding |
903 |
| - additional collections that do not contain a match to the search path |
904 |
| - never changes the behavior. |
905 |
| -
|
906 |
| - This method handles component dataset types automatically, though most |
907 |
| - other registry operations do not. |
908 |
| - """ |
909 |
| - if collections is None: |
910 |
| - if not self.defaults.collections: |
911 |
| - raise NoDefaultCollectionError( |
912 |
| - "No collections provided to findDataset, and no defaults from registry construction." |
913 |
| - ) |
914 |
| - collections = self.defaults.collections |
915 |
| - backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache) |
916 |
| - with backend.caching_context(): |
917 |
| - collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True) |
918 |
| - if collection_wildcard.empty(): |
919 |
| - return None |
920 |
| - matched_collections = backend.resolve_collection_wildcard(collection_wildcard) |
921 |
| - resolved_dataset_type = backend.resolve_single_dataset_type_wildcard(datasetType) |
922 |
| - dataId = DataCoordinate.standardize( |
923 |
| - dataId, |
924 |
| - dimensions=resolved_dataset_type.dimensions, |
925 |
| - universe=self.dimensions, |
926 |
| - defaults=self.defaults.dataId, |
927 |
| - **kwargs, |
928 |
| - ) |
929 |
| - governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.dimensions.governors} |
930 |
| - (filtered_collections,) = backend.filter_dataset_collections( |
931 |
| - [resolved_dataset_type], |
932 |
| - matched_collections, |
933 |
| - governor_constraints=governor_constraints, |
934 |
| - ).values() |
935 |
| - if not filtered_collections: |
936 |
| - return None |
937 |
| - if timespan is None: |
938 |
| - filtered_collections = [ |
939 |
| - collection_record |
940 |
| - for collection_record in filtered_collections |
941 |
| - if collection_record.type is not CollectionType.CALIBRATION |
942 |
| - ] |
943 |
| - if filtered_collections: |
944 |
| - requested_columns = {"dataset_id", "run", "collection"} |
945 |
| - with backend.context() as context: |
946 |
| - predicate = context.make_data_coordinate_predicate( |
947 |
| - dataId.subset(resolved_dataset_type.dimensions), full=False |
948 |
| - ) |
949 |
| - if timespan is not None: |
950 |
| - requested_columns.add("timespan") |
951 |
| - predicate = predicate.logical_and( |
952 |
| - context.make_timespan_overlap_predicate( |
953 |
| - DatasetColumnTag(resolved_dataset_type.name, "timespan"), timespan |
954 |
| - ) |
955 |
| - ) |
956 |
| - relation = backend.make_dataset_query_relation( |
957 |
| - resolved_dataset_type, filtered_collections, requested_columns, context |
958 |
| - ).with_rows_satisfying(predicate) |
959 |
| - rows = list(context.fetch_iterable(relation)) |
960 |
| - else: |
961 |
| - rows = [] |
962 |
| - if not rows: |
963 |
| - return None |
964 |
| - elif len(rows) == 1: |
965 |
| - best_row = rows[0] |
966 |
| - else: |
967 |
| - rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)} |
968 |
| - collection_tag = DatasetColumnTag(resolved_dataset_type.name, "collection") |
969 |
| - row_iter = iter(rows) |
970 |
| - best_row = next(row_iter) |
971 |
| - best_rank = rank_by_collection_key[best_row[collection_tag]] |
972 |
| - have_tie = False |
973 |
| - for row in row_iter: |
974 |
| - if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank: |
975 |
| - best_row = row |
976 |
| - best_rank = rank |
977 |
| - have_tie = False |
978 |
| - elif rank == best_rank: |
979 |
| - have_tie = True |
980 |
| - assert timespan is not None, "Rank ties should be impossible given DB constraints." |
981 |
| - if have_tie: |
982 |
| - raise CalibrationLookupError( |
983 |
| - f"Ambiguous calibration lookup for {resolved_dataset_type.name} in collections " |
984 |
| - f"{collection_wildcard.strings} with timespan {timespan}." |
985 |
| - ) |
986 |
| - reader = queries.DatasetRefReader( |
987 |
| - resolved_dataset_type, |
988 |
| - translate_collection=lambda k: self._managers.collections[k].name, |
989 |
| - ) |
990 |
| - ref = reader.read(best_row, data_id=dataId) |
991 |
| - if datastore_records: |
992 |
| - ref = self.get_datastore_records(ref) |
993 |
| - |
994 |
| - return ref |
995 |
| - |
996 | 826 | @transactional
|
997 | 827 | def insertDatasets(
|
998 | 828 | self,
|
|
0 commit comments