@@ -164,21 +164,46 @@ def fetch_existing_datasets(
164
164
165
165
166
166
def validate_dataset (
167
+ database_config : Database ,
168
+ dataset : str | None ,
169
+ ) -> bool :
170
+ """
171
+ Checks if the provided dataset currently exists in the metadata database.
172
+
173
+ Primarily used by scripts that want to validate the dataset during argument parsing stage.
174
+ :param database_config:
175
+ :param dataset:
176
+ """
177
+ if not dataset :
178
+ return False
179
+ sql_adapter : SQL_Adapter = SQL_Adapter (database_config )
180
+ clp_db_connection_params : dict [str , any ] = database_config .get_clp_connection_params_and_type (
181
+ True
182
+ )
183
+ table_prefix : str = clp_db_connection_params ["table_prefix" ]
184
+ with closing (sql_adapter .create_connection (True )) as db_conn , closing (
185
+ db_conn .cursor (dictionary = True )
186
+ ) as db_cursor :
187
+ return validate_and_cache_dataset (db_cursor , table_prefix , dataset )
188
+
189
+
190
+ def validate_and_cache_dataset (
167
191
db_cursor ,
168
192
table_prefix : str ,
169
- dataset : str ,
193
+ dataset : str | None ,
170
194
existing_datasets : Set [str ] | None = None ,
171
195
) -> bool :
172
196
"""
173
- Checks if a dataset currently exists in the metadata or in the local dataset cache .
197
+ Checks if the provided dataset currently exists in the metadata database and cache it locally .
174
198
199
+ If the dataset already exists in the local cache, database query is skipped.
175
200
:param db_cursor:
176
201
:param table_prefix:
177
202
:param dataset: The dataset to validate.
178
- :param existing_datasets: Returns a refreshed cache of dataset names fetched from the metadata
179
- if the current cache doesn not contain the provided dataset and a
180
- lookup is required.
203
+ :param existing_datasets: Returns a refreshed cache of dataset names if a lookup is required.
181
204
"""
205
+ if not dataset :
206
+ return False
182
207
if existing_datasets is not None and dataset in existing_datasets :
183
208
return True
184
209
existing_datasets = fetch_existing_datasets (db_cursor , table_prefix )
0 commit comments