1
+ import hashlib
1
2
import json
2
3
import os
3
4
from copy import deepcopy
9
10
from cumulus_lambda_functions .lib .lambda_logger_generator import LambdaLoggerGenerator
10
11
from cumulus_lambda_functions .lib .metadata_extraction .echo_metadata import EchoMetadata
11
12
from cumulus_lambda_functions .lib .time_utils import TimeUtils
12
- from cumulus_lambda_functions .metadata_s4pa_generate_cmr .pds_metadata import PdsMetadata
13
13
from cumulus_lambda_functions .metadata_stac_generate_cmr .stac_input_metadata import StacInputMetadata
14
14
15
15
LOGGER = LambdaLoggerGenerator .get_logger (__name__ , LambdaLoggerGenerator .get_level_from_env ())
47
47
"key" : {
48
48
"type" : "string"
49
49
},
50
+ "url_path" : {
51
+ "type" : "string"
52
+ },
50
53
"source" : {
51
54
"type" : "string"
52
55
},
61
64
}
62
65
},
63
66
"required" : [
64
- "bucket" ,
65
- "key" ,
66
67
"type"
67
- ]
68
+ ],
69
+ "oneOf" : [
70
+ {"required" : ["bucket" , "key" ]},
71
+ {"required" : ["url_path" ]}
72
+ ],
68
73
}
69
74
}
70
75
},
73
78
"files"
74
79
]
75
80
}
81
+ },
82
+ "collection" : {
83
+ "type" : "object" ,
84
+ "properties" : {
85
+ "name" : {"type" : "string" },
86
+ "version" : {"type" : "string" },
87
+ },
88
+ "required" : ["name" , "version" ]
76
89
}
77
90
},
78
91
"required" : [
79
- "input_granules"
92
+ "input_granules" , "collection"
80
93
]
81
94
}
82
95
},
83
96
"required" : [
84
97
"meta"
85
98
]
99
+ },
100
+ "extra_config" : {
101
+ "required" : [],
102
+ "properties" : {
103
+ "add_extra_keys" : {"type" : "boolean" }
104
+ }
86
105
}
87
106
},
88
107
"required" : []
@@ -111,13 +130,20 @@ def __validate_input(self):
111
130
112
131
def __get_pds_metadata_file (self ):
113
132
self .__input_file_list = self .__event ['cma' ]['event' ]['meta' ]['input_granules' ][0 ]['files' ]
133
+ stac_metadata_file = None
114
134
for each_file in self .__input_file_list :
135
+ if 'fileName' not in each_file and 'name' in each_file : # add fileName if there is only name
136
+ each_file ['fileName' ] = each_file ['name' ]
137
+ if 'url_path' in each_file :
138
+ s3_bucket , s3_key = self .__s3 .split_s3_url (each_file ['url_path' ])
139
+ each_file ['bucket' ] = s3_bucket
140
+ each_file ['key' ] = s3_key
115
141
LOGGER .debug (f'checking file: { each_file } ' )
116
142
file_key_upper = each_file ['key' ].upper ().strip ()
117
143
LOGGER .debug (f'checking file_key_upper: { file_key_upper } against { self .__file_postfixes } ' )
118
144
if any ([file_key_upper .endswith (k ) for k in self .__file_postfixes ]):
119
- return each_file
120
- return None
145
+ stac_metadata_file = each_file
146
+ return stac_metadata_file
121
147
122
148
def __read_pds_metadata_file (self ):
123
149
self ._pds_file_dict = self .__get_pds_metadata_file ()
@@ -127,25 +153,213 @@ def __read_pds_metadata_file(self):
127
153
self .__s3 .target_key = self ._pds_file_dict ['key' ]
128
154
return self .__s3 .read_small_txt_file ()
129
155
156
+ def __is_adding_extra_keys (self ):
157
+ if 'extra_config' not in self .__event ['cma' ]:
158
+ return True
159
+ if 'add_extra_keys' not in self .__event ['cma' ]['extra_config' ]:
160
+ return True
161
+ return self .__event ['cma' ]['extra_config' ]['add_extra_keys' ]
162
+
163
+ def __generate_output_dict (self , echo_metadata_md5 : str ):
164
+ output_dict = {
165
+ "checksumType" : "md5" ,
166
+ "checksum" : echo_metadata_md5 ,
167
+ "type" : "metadata" ,
168
+
169
+ "key" : self .__s3 .target_key ,
170
+ "fileName" : os .path .basename (self .__s3 .target_key ),
171
+ "bucket" : self .__s3 .target_bucket ,
172
+ "size" : int (self .__s3 .get_size ()),
173
+ }
174
+ if not self .__is_adding_extra_keys ():
175
+ return output_dict
176
+ output_dict = {** output_dict , ** {
177
+ "path" : os .path .dirname (self .__s3 .target_key ),
178
+ "name" : os .path .basename (self .__s3 .target_key ),
179
+ "source_bucket" : self .__s3 .target_bucket ,
180
+ "url_path" : f's3://{ self .__s3 .target_bucket } /{ self .__s3 .target_key } ' ,
181
+ }}
182
+ return output_dict
183
+
130
184
def start (self ):
185
+ """
186
+ sample event
187
+ {
188
+ "cma": {
189
+ "task_config": {
190
+ "bucket": "{$.meta.buckets.internal.name}",
191
+ "collection": "{$.meta.collection}",
192
+ "cumulus_message": {
193
+ "outputs": [
194
+ {
195
+ "source": "{$.files}",
196
+ "destination": "{$.payload}"
197
+ }
198
+ ]
199
+ }
200
+ },
201
+ "event": {
202
+ "cumulus_meta": {
203
+ "cumulus_version": "11.1.1",
204
+ "execution_name": "90c9c978-ca5e-47b1-9c4a-3d20c73a4743",
205
+ "message_source": "sfn",
206
+ "queueExecutionLimits": {
207
+ "https://sqs.us-west-2.amazonaws.com/237868187491/uds-dev-cumulus-backgroundProcessing": 5
208
+ },
209
+ "state_machine": "arn:aws:states:us-west-2:237868187491:stateMachine:uds-dev-cumulus-IngestGranule",
210
+ "system_bucket": "uds-dev-cumulus-internal",
211
+ "workflow_start_time": 1655943753534,
212
+ "parentExecutionArn": "arn:aws:states:us-west-2:237868187491:execution:uds-dev-cumulus-DiscoverGranules:707b8f70-ac78-4fa8-86f6-b74dcdfed287",
213
+ "queueUrl": "arn:aws:sqs:us-west-2:237868187491:uds-dev-cumulus-startSF"
214
+ },
215
+ "exception": "None",
216
+ "meta": {
217
+ "buckets": {
218
+ "internal": {
219
+ "name": "uds-dev-cumulus-internal",
220
+ "type": "internal"
221
+ },
222
+ "private": {
223
+ "name": "uds-dev-cumulus-private",
224
+ "type": "private"
225
+ },
226
+ "protected": {
227
+ "name": "uds-dev-cumulus-protected",
228
+ "type": "protected"
229
+ },
230
+ "public": {
231
+ "name": "uds-dev-cumulus-public",
232
+ "type": "public"
233
+ },
234
+ "sps": {
235
+ "name": "uds-dev-cumulus-sps",
236
+ "type": "protected"
237
+ },
238
+ "staging": {
239
+ "name": "uds-dev-cumulus-staging",
240
+ "type": "internal"
241
+ }
242
+ },
243
+ "cmr": {
244
+ "clientId": "CHANGEME",
245
+ "cmrEnvironment": "UAT",
246
+ "cmrLimit": 100,
247
+ "cmrPageSize": 50,
248
+ "oauthProvider": "earthdata",
249
+ "passwordSecretName": "uds-dev-cumulus-message-template-cmr-password20220330223854670000000005",
250
+ "provider": "CHANGEME",
251
+ "username": "username"
252
+ },
253
+ "collection": {
254
+ "duplicateHandling": "replace",
255
+ "process": "snpp.level1",
256
+ "files": [
257
+ {
258
+ "bucket": "protected",
259
+ "regex": "^SNDR.SNPP.ATMS.L1A.*\\ .nc$",
260
+ "reportToEms": false,
261
+ "sampleFileName": "SNDR.SNPP.ATMS.L1A.nominal2.01.nc",
262
+ "type": "data"
263
+ },
264
+ {
265
+ "bucket": "protected",
266
+ "regex": "^SNDR.SNPP.ATMS.L1A.*\\ .nc\\ .cas$",
267
+ "reportToEms": false,
268
+ "sampleFileName": "SNDR.SNPP.ATMS.L1A.nominal2.01.nc.cas",
269
+ "type": "metadata"
270
+ },
271
+ {
272
+ "bucket": "protected",
273
+ "regex": "^SNDR.SNPP.ATMS.L1A.*\\ .nc\\ .cmr\\ .xml$",
274
+ "reportToEms": false,
275
+ "sampleFileName": "SNDR.SNPP.ATMS.L1A.nominal2.01.nc.cmr.xml",
276
+ "type": "metadata"
277
+ }
278
+ ],
279
+ "granuleId": "^SNDR.SNPP.ATMS.L1A.*$",
280
+ "granuleIdExtraction": "(^SNDR.SNPP.ATMS.L1A.*)(\\ .nc|\\ .nc\\ .cas|\\ .nc\\ .cmr\\ .xml)",
281
+ "name": "SNDR_SNPP_ATMS_L1A_1",
282
+ "reportToEms": false,
283
+ "sampleFileName": "SNDR.SNPP.ATMS.L1A.nominal2.01.nc",
284
+ "url_path": "{cmrMetadata.Granule.Collection.ShortName}",
285
+ "version": "1",
286
+ "updatedAt": 1655943525719,
287
+ "createdAt": 1655943525719
288
+ },
289
+ "process": "snpp.level1",
290
+ "distribution_endpoint": null,
291
+ "launchpad": {
292
+ "api": "launchpadApi",
293
+ "certificate": "launchpad.pfx",
294
+ "passphraseSecretName": ""
295
+ },
296
+ "provider": {
297
+ "id": "snpp_l1_s3",
298
+ "globalConnectionLimit": 1000,
299
+ "host": "uds-dev-cumulus-staging",
300
+ "protocol": "s3",
301
+ "createdAt": 1655943376376,
302
+ "updatedAt": 1655943376376
303
+ },
304
+ "stack": "uds-dev-cumulus",
305
+ "template": "s3://uds-dev-cumulus-internal/uds-dev-cumulus/workflow_template.json",
306
+ "workflow_name": "IngestGranule",
307
+ "workflow_tasks": {
308
+ "0": {
309
+ "name": "uds-dev-cumulus-SyncGranule",
310
+ "version": "$LATEST",
311
+ "arn": "arn:aws:lambda:us-west-2:237868187491:function:uds-dev-cumulus-SyncGranule"
312
+ }
313
+ },
314
+ "staticValue": "aStaticValue",
315
+ "interpolatedValueStackName": "uds-dev-cumulus",
316
+ "input_granules": [
317
+ {
318
+ "granuleId": "SNDR.SNPP.ATMS.L1A.nominal2.01",
319
+ "dataType": "SNDR_SNPP_ATMS_L1A_1",
320
+ "version": "1",
321
+ "files": [
322
+ {
323
+ "size": 9194361,
324
+ "bucket": "uds-dev-cumulus-internal",
325
+ "key": "file-staging/uds-dev-cumulus/SNDR_SNPP_ATMS_L1A_1___1/SNDR.SNPP.ATMS.L1A.nominal2.01.nc",
326
+ "source": "SNDR_SNPP_ATMS_L1A/SNDR.SNPP.ATMS.L1A.nominal2.01.nc",
327
+ "fileName": "SNDR.SNPP.ATMS.L1A.nominal2.01.nc",
328
+ "type": "data"
329
+ },
330
+ {
331
+ "size": 2673,
332
+ "bucket": "uds-dev-cumulus-internal",
333
+ "key": "file-staging/uds-dev-cumulus/SNDR_SNPP_ATMS_L1A_1___1/SNDR.SNPP.ATMS.L1A.nominal2.01.nc.cas",
334
+ "source": "SNDR_SNPP_ATMS_L1A/SNDR.SNPP.ATMS.L1A.nominal2.01.nc.cas",
335
+ "fileName": "SNDR.SNPP.ATMS.L1A.nominal2.01.nc.cas",
336
+ "type": "metadata"
337
+ }
338
+ ],
339
+ "sync_granule_duration": 694,
340
+ "createdAt": 1656010982847
341
+ }
342
+ ]
343
+ },
344
+ "payload": {},
345
+ "replace": {
346
+ "Bucket": "uds-dev-cumulus-internal",
347
+ "Key": "events/172fbbc4-f8ee-4974-8a77-37bc669accb0",
348
+ "TargetPath": "$.payload"
349
+ }
350
+ }
351
+ }
352
+ }
353
+ :return:
354
+ """
131
355
self .__validate_input ()
132
356
LOGGER .error (f'input: { self .__event } ' )
133
357
granules_metadata_props = StacInputMetadata (json .loads (self .__read_pds_metadata_file ())).start ()
134
358
echo_metadata = EchoMetadata (granules_metadata_props ).load ().echo_metadata
135
359
echo_metadata_xml_str = xmltodict .unparse (echo_metadata , pretty = True )
136
360
self .__s3 .target_key = os .path .join (os .path .dirname (self .__s3 .target_key ), f'{ granules_metadata_props .granule_id } .cmr.xml' )
137
361
self .__s3 .upload_bytes (echo_metadata_xml_str .encode ())
138
-
139
- # put payload
140
- # remove replace
141
- # add "task_config": {
142
- # "inputGranules": "{$.meta.input_granules}",
143
- # "granuleIdExtraction": "{$.meta.collection.granuleIdExtraction}"
144
- # },
145
- # return {
146
- # 'files': ['example', 'mock', 'return'],
147
- # 'granules': self.__event
148
- # }
362
+ echo_metadata_md5 = hashlib .md5 (echo_metadata_xml_str .encode ()).hexdigest ()
149
363
returning_dict = deepcopy (self .__event ['cma' ]['event' ])
150
364
if 'replace' in returning_dict :
151
365
returning_dict .pop ('replace' )
@@ -194,12 +408,7 @@ def start(self):
194
408
"granuleId" : self .__event ['cma' ]['event' ]['meta' ]['input_granules' ][0 ]['granuleId' ],
195
409
"dataType" : granules_metadata_props .collection_name ,
196
410
"version" : f'{ granules_metadata_props .collection_version } ' ,
197
- "files" : self .__input_file_list + [{
198
- "key" : self .__s3 .target_key ,
199
- "fileName" : os .path .basename (self .__s3 .target_key ),
200
- "bucket" : self .__s3 .target_bucket ,
201
- "size" : int (self .__s3 .get_size ()),
202
- }],
411
+ "files" : self .__input_file_list + [self .__generate_output_dict (echo_metadata_md5 )],
203
412
# "files": self.__input_file_list,
204
413
"sync_granule_duration" : 20302 ,
205
414
"createdAt" : TimeUtils .get_current_unix_milli (),
0 commit comments