44
44
45
45
from .._butler import Butler
46
46
from .._collection_type import CollectionType
47
+ from .._dataset_ref import DatasetRef
47
48
from .._dataset_type import DatasetType
48
- from .._exceptions import EmptyQueryResultError , InvalidQueryError
49
+ from .._exceptions import (
50
+ EmptyQueryResultError ,
51
+ InvalidQueryError ,
52
+ MissingCollectionError ,
53
+ MissingDatasetTypeError ,
54
+ )
49
55
from .._timespan import Timespan
50
56
from ..dimensions import DataCoordinate , DimensionRecord
51
57
from ..direct_query_driver import DirectQueryDriver
@@ -2007,6 +2013,130 @@ def test_unusual_column_literals(self) -> None:
2007
2013
names = [x .full_name for x in result ]
2008
2014
self .assertEqual (names , ["Ba" ])
2009
2015
2016
+ def test_query_all_datasets (self ) -> None :
2017
+ butler = self .make_butler ("base.yaml" , "datasets.yaml" )
2018
+
2019
+ # Make sure that refs are coming out well-formed.
2020
+ datasets = butler ._query_all_datasets ("imported_r" , where = "detector = 2" , instrument = "Cam1" )
2021
+ datasets .sort (key = lambda ref : ref .datasetType .name )
2022
+ self .assertEqual (len (datasets ), 2 )
2023
+ bias = datasets [0 ]
2024
+ self .assertEqual (bias .datasetType .name , "bias" )
2025
+ self .assertEqual (bias .dataId ["instrument" ], "Cam1" )
2026
+ self .assertEqual (bias .dataId ["detector" ], 2 )
2027
+ self .assertEqual (bias .run , "imported_r" )
2028
+ self .assertEqual (bias .id , UUID ("87f3e68d-258d-41b7-8ea5-edf3557ccb30" ))
2029
+ flat = datasets [1 ]
2030
+ self .assertEqual (flat .datasetType .name , "flat" )
2031
+ self .assertEqual (flat .dataId ["instrument" ], "Cam1" )
2032
+ self .assertEqual (flat .dataId ["detector" ], 2 )
2033
+ self .assertEqual (flat .dataId ["physical_filter" ], "Cam1-R1" )
2034
+ self .assertEqual (flat .dataId ["band" ], "r" )
2035
+ self .assertEqual (flat .run , "imported_r" )
2036
+ self .assertEqual (flat .id , UUID ("c1296796-56c5-4acf-9b49-40d920c6f840" ))
2037
+
2038
+ # Querying for everything finds everything.
2039
+ results = butler ._query_all_datasets ("*" , find_first = False )
2040
+ self .assertEqual (len (results ), 13 )
2041
+
2042
+ # constraining by data ID works
2043
+ detector_1_ids = ("d0bb04cd-d697-4a83-ba53-cdfcd58e3a0c" , "e15ab039-bc8b-4135-87c5-90902a7c0b22" )
2044
+ results = butler ._query_all_datasets (
2045
+ "*" , data_id = {"detector" : 1 , "instrument" : "Cam1" }, find_first = False
2046
+ )
2047
+ self .assertCountEqual (detector_1_ids , _ref_uuids (results ))
2048
+
2049
+ # bind values work.
2050
+ results = butler ._query_all_datasets (
2051
+ "*" , where = "detector=my_bind and instrument='Cam1'" , bind = {"my_bind" : 1 }, find_first = False
2052
+ )
2053
+ self .assertCountEqual (detector_1_ids , _ref_uuids (results ))
2054
+
2055
+ # find_first requires ordered collections.
2056
+ with self .assertRaisesRegex (InvalidQueryError , "Can not use wildcards" ):
2057
+ results = butler ._query_all_datasets ("*" )
2058
+
2059
+ butler .collections .register ("chain" , CollectionType .CHAINED )
2060
+ butler .collections .redefine_chain ("chain" , ["imported_g" , "imported_r" ])
2061
+ results = butler ._query_all_datasets (
2062
+ "chain" , where = "detector=2 and instrument = 'Cam1'" , find_first = True
2063
+ )
2064
+ # find_first searches the collection chain in order.
2065
+ self .assertCountEqual (
2066
+ _ref_uuids (results ),
2067
+ [
2068
+ "51352db4-a47a-447c-b12d-a50b206b17cd" , # imported_g bias
2069
+ "60c8a65c-7290-4c38-b1de-e3b1cdcf872d" , # imported_g flat
2070
+ "c1296796-56c5-4acf-9b49-40d920c6f840" , # imported_r flat
2071
+ # There is also a bias dataset with detector=2 in imported_r,
2072
+ # but it is masked by the presence of the same data ID in
2073
+ # imported_g.
2074
+ ],
2075
+ )
2076
+
2077
+ # collection searches work.
2078
+ results = butler ._query_all_datasets (
2079
+ "*g" , where = "detector=1 and instrument = 'Cam1'" , find_first = False
2080
+ )
2081
+ self .assertEqual (_ref_uuids (results ), ["e15ab039-bc8b-4135-87c5-90902a7c0b22" ])
2082
+
2083
+ # we raise for missing collections with explicit names.
2084
+ with self .assertRaises (MissingCollectionError ):
2085
+ results = butler ._query_all_datasets ("nonexistent" )
2086
+ # we don't raise for collection wildcard searches that find nothing.
2087
+ results = butler ._query_all_datasets ("nonexistent*" , find_first = False )
2088
+ self .assertEqual (results , [])
2089
+
2090
+ # dataset type searches work.
2091
+ results = butler ._query_all_datasets (
2092
+ "*" , name = "b*" , where = "detector=1 and instrument = 'Cam1'" , find_first = False
2093
+ )
2094
+ self .assertEqual (_ref_uuids (results ), ["e15ab039-bc8b-4135-87c5-90902a7c0b22" ])
2095
+
2096
+ # Missing dataset types raise.
2097
+ with self .assertRaises (MissingDatasetTypeError ):
2098
+ results = butler ._query_all_datasets ("chain" , name = ["notfound" , "flat" ])
2099
+ with self .assertRaises (MissingDatasetTypeError ):
2100
+ results = butler ._query_all_datasets ("chain" , name = "notfound*" )
2101
+
2102
+ # Limit of 3 lands at the boundary of a dataset type.
2103
+ # Limit of 4 is in the middle of a dataset type.
2104
+ for limit in [3 , 4 ]:
2105
+ with self .subTest (limit = limit ):
2106
+ results = butler ._query_all_datasets ("imported_g" , limit = limit )
2107
+ self .assertEqual (len (results ), limit )
2108
+ with self .assertLogs (level = "WARNING" ) as log :
2109
+ results = butler ._query_all_datasets ("imported_g" , limit = - limit )
2110
+ self .assertEqual (len (results ), limit )
2111
+ self .assertIn ("requested limit" , log .output [0 ])
2112
+
2113
+ results = butler ._query_all_datasets ("imported_g" , limit = 0 )
2114
+ self .assertEqual (len (results ), 0 )
2115
+
2116
+ # 'where' constraints that don't apply to all dataset types follow the
2117
+ # same rules as query_datasets.
2118
+ results = butler ._query_all_datasets (
2119
+ "*" , where = "detector = 2 and band = 'g' and instrument = 'Cam1'" , find_first = False
2120
+ )
2121
+ self .assertCountEqual (
2122
+ _ref_uuids (results ),
2123
+ [
2124
+ # bias does not have 'band'
2125
+ "51352db4-a47a-447c-b12d-a50b206b17cd" ,
2126
+ "87f3e68d-258d-41b7-8ea5-edf3557ccb30" ,
2127
+ # flat does have 'band', and we filter based on it
2128
+ "60c8a65c-7290-4c38-b1de-e3b1cdcf872d" ,
2129
+ ],
2130
+ )
2131
+
2132
+ # Default collections and data ID apply.
2133
+ butler .registry .defaults = RegistryDefaults (collections = "imported_g" )
2134
+ results = butler ._query_all_datasets (where = "detector = 2" )
2135
+ self .assertCountEqual (
2136
+ _ref_uuids (results ),
2137
+ ["51352db4-a47a-447c-b12d-a50b206b17cd" , "60c8a65c-7290-4c38-b1de-e3b1cdcf872d" ],
2138
+ )
2139
+
2010
2140
2011
2141
def _get_exposure_ids_from_dimension_records (dimension_records : Iterable [DimensionRecord ]) -> list [int ]:
2012
2142
output = []
@@ -2016,3 +2146,7 @@ def _get_exposure_ids_from_dimension_records(dimension_records: Iterable[Dimensi
2016
2146
output .append (id )
2017
2147
2018
2148
return output
2149
+
2150
+
2151
+ def _ref_uuids (refs : list [DatasetRef ]) -> list [str ]:
2152
+ return [str (ref .id ) for ref in refs ]
0 commit comments