Skip to content

Commit 39322d0

Browse files
authored
fix: use cas structure to generate metadata for stac (#148)
* fix: use cas to generate metadata for stac * chore: update Makefile * chore: update version * fix: need to run for every file so that every file is updated even after correct file is found
1 parent 87bb9f8 commit 39322d0

File tree

4 files changed

+240
-26
lines changed

4 files changed

+240
-26
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [3.7.1] - 2023-05-04
9+
### Changed
10+
- [#148](https://github.com/unity-sds/unity-data-services/pull/148) fix: use cas structure to generate metadata for stac
11+
812
## [3.7.0] - 2023-04-25
913
### Added
1014
- [#146](https://github.com/unity-sds/unity-data-services/pull/146) feat: Stac metadata extraction

ci.cd/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ update_lambda_function_mcp_dev_5:
4343
aws lambda update-function-code --s3-key unity_cumulus_lambda/cumulus_lambda_functions_deployment.zip --s3-bucket uds-dev-cumulus-public --function-name arn:aws:lambda:us-west-2:237868187491:function:uds-dev-cumulus-cumulus_collections_creation_dapa_facade --publish &>/dev/null
4444
update_lambda_function_mcp_dev_6:
4545
aws lambda update-function-code --s3-key unity_cumulus_lambda/cumulus_lambda_functions_deployment.zip --s3-bucket uds-dev-cumulus-public --function-name arn:aws:lambda:us-west-2:237868187491:function:uds-dev-cumulus-metadata_s4pa_generate_cmr --publish &>/dev/null
46-
46+
update_lambda_function_mcp_dev_7:
47+
aws lambda update-function-code --s3-key unity_cumulus_lambda/cumulus_lambda_functions_deployment.zip --s3-bucket uds-dev-cumulus-public --function-name arn:aws:lambda:us-west-2:237868187491:function:uds-dev-cumulus-metadata_stac_generate_cmr --publish &>/dev/null
4748

4849
mcp_dev: upload_lambda_mcp_dev update_lambda_function_mcp_dev_1 update_lambda_function_mcp_dev_2 update_lambda_function_mcp_dev_4 update_lambda_function_mcp_dev_5
4950

cumulus_lambda_functions/metadata_stac_generate_cmr/generate_cmr.py

Lines changed: 233 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import hashlib
12
import json
23
import os
34
from copy import deepcopy
@@ -9,7 +10,6 @@
910
from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator
1011
from cumulus_lambda_functions.lib.metadata_extraction.echo_metadata import EchoMetadata
1112
from cumulus_lambda_functions.lib.time_utils import TimeUtils
12-
from cumulus_lambda_functions.metadata_s4pa_generate_cmr.pds_metadata import PdsMetadata
1313
from cumulus_lambda_functions.metadata_stac_generate_cmr.stac_input_metadata import StacInputMetadata
1414

1515
LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env())
@@ -47,6 +47,9 @@
4747
"key": {
4848
"type": "string"
4949
},
50+
"url_path": {
51+
"type": "string"
52+
},
5053
"source": {
5154
"type": "string"
5255
},
@@ -61,10 +64,12 @@
6164
}
6265
},
6366
"required": [
64-
"bucket",
65-
"key",
6667
"type"
67-
]
68+
],
69+
"oneOf": [
70+
{"required": ["bucket", "key"]},
71+
{"required": ["url_path"]}
72+
],
6873
}
6974
}
7075
},
@@ -73,16 +78,30 @@
7378
"files"
7479
]
7580
}
81+
},
82+
"collection": {
83+
"type": "object",
84+
"properties": {
85+
"name": {"type": "string"},
86+
"version": {"type": "string"},
87+
},
88+
"required": ["name", "version"]
7689
}
7790
},
7891
"required": [
79-
"input_granules"
92+
"input_granules", "collection"
8093
]
8194
}
8295
},
8396
"required": [
8497
"meta"
8598
]
99+
},
100+
"extra_config": {
101+
"required": [],
102+
"properties": {
103+
"add_extra_keys": {"type": "boolean"}
104+
}
86105
}
87106
},
88107
"required": []
@@ -111,13 +130,20 @@ def __validate_input(self):
111130

112131
def __get_pds_metadata_file(self):
113132
self.__input_file_list = self.__event['cma']['event']['meta']['input_granules'][0]['files']
133+
stac_metadata_file = None
114134
for each_file in self.__input_file_list:
135+
if 'fileName' not in each_file and 'name' in each_file: # add fileName if there is only name
136+
each_file['fileName'] = each_file['name']
137+
if 'url_path' in each_file:
138+
s3_bucket, s3_key = self.__s3.split_s3_url(each_file['url_path'])
139+
each_file['bucket'] = s3_bucket
140+
each_file['key'] = s3_key
115141
LOGGER.debug(f'checking file: {each_file}')
116142
file_key_upper = each_file['key'].upper().strip()
117143
LOGGER.debug(f'checking file_key_upper: {file_key_upper} against {self.__file_postfixes}')
118144
if any([file_key_upper.endswith(k) for k in self.__file_postfixes]):
119-
return each_file
120-
return None
145+
stac_metadata_file = each_file
146+
return stac_metadata_file
121147

122148
def __read_pds_metadata_file(self):
123149
self._pds_file_dict = self.__get_pds_metadata_file()
@@ -127,25 +153,213 @@ def __read_pds_metadata_file(self):
127153
self.__s3.target_key = self._pds_file_dict['key']
128154
return self.__s3.read_small_txt_file()
129155

156+
def __is_adding_extra_keys(self):
157+
if 'extra_config' not in self.__event['cma']:
158+
return True
159+
if 'add_extra_keys' not in self.__event['cma']['extra_config']:
160+
return True
161+
return self.__event['cma']['extra_config']['add_extra_keys']
162+
163+
def __generate_output_dict(self, echo_metadata_md5: str):
164+
output_dict = {
165+
"checksumType": "md5",
166+
"checksum": echo_metadata_md5,
167+
"type": "metadata",
168+
169+
"key": self.__s3.target_key,
170+
"fileName": os.path.basename(self.__s3.target_key),
171+
"bucket": self.__s3.target_bucket,
172+
"size": int(self.__s3.get_size()),
173+
}
174+
if not self.__is_adding_extra_keys():
175+
return output_dict
176+
output_dict = {**output_dict, **{
177+
"path": os.path.dirname(self.__s3.target_key),
178+
"name": os.path.basename(self.__s3.target_key),
179+
"source_bucket": self.__s3.target_bucket,
180+
"url_path": f's3://{self.__s3.target_bucket}/{self.__s3.target_key}',
181+
}}
182+
return output_dict
183+
130184
def start(self):
185+
"""
186+
sample event
187+
{
188+
"cma": {
189+
"task_config": {
190+
"bucket": "{$.meta.buckets.internal.name}",
191+
"collection": "{$.meta.collection}",
192+
"cumulus_message": {
193+
"outputs": [
194+
{
195+
"source": "{$.files}",
196+
"destination": "{$.payload}"
197+
}
198+
]
199+
}
200+
},
201+
"event": {
202+
"cumulus_meta": {
203+
"cumulus_version": "11.1.1",
204+
"execution_name": "90c9c978-ca5e-47b1-9c4a-3d20c73a4743",
205+
"message_source": "sfn",
206+
"queueExecutionLimits": {
207+
"https://sqs.us-west-2.amazonaws.com/237868187491/uds-dev-cumulus-backgroundProcessing": 5
208+
},
209+
"state_machine": "arn:aws:states:us-west-2:237868187491:stateMachine:uds-dev-cumulus-IngestGranule",
210+
"system_bucket": "uds-dev-cumulus-internal",
211+
"workflow_start_time": 1655943753534,
212+
"parentExecutionArn": "arn:aws:states:us-west-2:237868187491:execution:uds-dev-cumulus-DiscoverGranules:707b8f70-ac78-4fa8-86f6-b74dcdfed287",
213+
"queueUrl": "arn:aws:sqs:us-west-2:237868187491:uds-dev-cumulus-startSF"
214+
},
215+
"exception": "None",
216+
"meta": {
217+
"buckets": {
218+
"internal": {
219+
"name": "uds-dev-cumulus-internal",
220+
"type": "internal"
221+
},
222+
"private": {
223+
"name": "uds-dev-cumulus-private",
224+
"type": "private"
225+
},
226+
"protected": {
227+
"name": "uds-dev-cumulus-protected",
228+
"type": "protected"
229+
},
230+
"public": {
231+
"name": "uds-dev-cumulus-public",
232+
"type": "public"
233+
},
234+
"sps": {
235+
"name": "uds-dev-cumulus-sps",
236+
"type": "protected"
237+
},
238+
"staging": {
239+
"name": "uds-dev-cumulus-staging",
240+
"type": "internal"
241+
}
242+
},
243+
"cmr": {
244+
"clientId": "CHANGEME",
245+
"cmrEnvironment": "UAT",
246+
"cmrLimit": 100,
247+
"cmrPageSize": 50,
248+
"oauthProvider": "earthdata",
249+
"passwordSecretName": "uds-dev-cumulus-message-template-cmr-password20220330223854670000000005",
250+
"provider": "CHANGEME",
251+
"username": "username"
252+
},
253+
"collection": {
254+
"duplicateHandling": "replace",
255+
"process": "snpp.level1",
256+
"files": [
257+
{
258+
"bucket": "protected",
259+
"regex": "^SNDR.SNPP.ATMS.L1A.*\\.nc$",
260+
"reportToEms": false,
261+
"sampleFileName": "SNDR.SNPP.ATMS.L1A.nominal2.01.nc",
262+
"type": "data"
263+
},
264+
{
265+
"bucket": "protected",
266+
"regex": "^SNDR.SNPP.ATMS.L1A.*\\.nc\\.cas$",
267+
"reportToEms": false,
268+
"sampleFileName": "SNDR.SNPP.ATMS.L1A.nominal2.01.nc.cas",
269+
"type": "metadata"
270+
},
271+
{
272+
"bucket": "protected",
273+
"regex": "^SNDR.SNPP.ATMS.L1A.*\\.nc\\.cmr\\.xml$",
274+
"reportToEms": false,
275+
"sampleFileName": "SNDR.SNPP.ATMS.L1A.nominal2.01.nc.cmr.xml",
276+
"type": "metadata"
277+
}
278+
],
279+
"granuleId": "^SNDR.SNPP.ATMS.L1A.*$",
280+
"granuleIdExtraction": "(^SNDR.SNPP.ATMS.L1A.*)(\\.nc|\\.nc\\.cas|\\.nc\\.cmr\\.xml)",
281+
"name": "SNDR_SNPP_ATMS_L1A_1",
282+
"reportToEms": false,
283+
"sampleFileName": "SNDR.SNPP.ATMS.L1A.nominal2.01.nc",
284+
"url_path": "{cmrMetadata.Granule.Collection.ShortName}",
285+
"version": "1",
286+
"updatedAt": 1655943525719,
287+
"createdAt": 1655943525719
288+
},
289+
"process": "snpp.level1",
290+
"distribution_endpoint": null,
291+
"launchpad": {
292+
"api": "launchpadApi",
293+
"certificate": "launchpad.pfx",
294+
"passphraseSecretName": ""
295+
},
296+
"provider": {
297+
"id": "snpp_l1_s3",
298+
"globalConnectionLimit": 1000,
299+
"host": "uds-dev-cumulus-staging",
300+
"protocol": "s3",
301+
"createdAt": 1655943376376,
302+
"updatedAt": 1655943376376
303+
},
304+
"stack": "uds-dev-cumulus",
305+
"template": "s3://uds-dev-cumulus-internal/uds-dev-cumulus/workflow_template.json",
306+
"workflow_name": "IngestGranule",
307+
"workflow_tasks": {
308+
"0": {
309+
"name": "uds-dev-cumulus-SyncGranule",
310+
"version": "$LATEST",
311+
"arn": "arn:aws:lambda:us-west-2:237868187491:function:uds-dev-cumulus-SyncGranule"
312+
}
313+
},
314+
"staticValue": "aStaticValue",
315+
"interpolatedValueStackName": "uds-dev-cumulus",
316+
"input_granules": [
317+
{
318+
"granuleId": "SNDR.SNPP.ATMS.L1A.nominal2.01",
319+
"dataType": "SNDR_SNPP_ATMS_L1A_1",
320+
"version": "1",
321+
"files": [
322+
{
323+
"size": 9194361,
324+
"bucket": "uds-dev-cumulus-internal",
325+
"key": "file-staging/uds-dev-cumulus/SNDR_SNPP_ATMS_L1A_1___1/SNDR.SNPP.ATMS.L1A.nominal2.01.nc",
326+
"source": "SNDR_SNPP_ATMS_L1A/SNDR.SNPP.ATMS.L1A.nominal2.01.nc",
327+
"fileName": "SNDR.SNPP.ATMS.L1A.nominal2.01.nc",
328+
"type": "data"
329+
},
330+
{
331+
"size": 2673,
332+
"bucket": "uds-dev-cumulus-internal",
333+
"key": "file-staging/uds-dev-cumulus/SNDR_SNPP_ATMS_L1A_1___1/SNDR.SNPP.ATMS.L1A.nominal2.01.nc.cas",
334+
"source": "SNDR_SNPP_ATMS_L1A/SNDR.SNPP.ATMS.L1A.nominal2.01.nc.cas",
335+
"fileName": "SNDR.SNPP.ATMS.L1A.nominal2.01.nc.cas",
336+
"type": "metadata"
337+
}
338+
],
339+
"sync_granule_duration": 694,
340+
"createdAt": 1656010982847
341+
}
342+
]
343+
},
344+
"payload": {},
345+
"replace": {
346+
"Bucket": "uds-dev-cumulus-internal",
347+
"Key": "events/172fbbc4-f8ee-4974-8a77-37bc669accb0",
348+
"TargetPath": "$.payload"
349+
}
350+
}
351+
}
352+
}
353+
:return:
354+
"""
131355
self.__validate_input()
132356
LOGGER.error(f'input: {self.__event}')
133357
granules_metadata_props = StacInputMetadata(json.loads(self.__read_pds_metadata_file())).start()
134358
echo_metadata = EchoMetadata(granules_metadata_props).load().echo_metadata
135359
echo_metadata_xml_str = xmltodict.unparse(echo_metadata, pretty=True)
136360
self.__s3.target_key = os.path.join(os.path.dirname(self.__s3.target_key), f'{granules_metadata_props.granule_id}.cmr.xml')
137361
self.__s3.upload_bytes(echo_metadata_xml_str.encode())
138-
139-
# put payload
140-
# remove replace
141-
# add "task_config": {
142-
# "inputGranules": "{$.meta.input_granules}",
143-
# "granuleIdExtraction": "{$.meta.collection.granuleIdExtraction}"
144-
# },
145-
# return {
146-
# 'files': ['example', 'mock', 'return'],
147-
# 'granules': self.__event
148-
# }
362+
echo_metadata_md5 = hashlib.md5(echo_metadata_xml_str.encode()).hexdigest()
149363
returning_dict = deepcopy(self.__event['cma']['event'])
150364
if 'replace' in returning_dict:
151365
returning_dict.pop('replace')
@@ -194,12 +408,7 @@ def start(self):
194408
"granuleId": self.__event['cma']['event']['meta']['input_granules'][0]['granuleId'],
195409
"dataType": granules_metadata_props.collection_name,
196410
"version": f'{granules_metadata_props.collection_version}',
197-
"files": self.__input_file_list + [{
198-
"key": self.__s3.target_key,
199-
"fileName": os.path.basename(self.__s3.target_key),
200-
"bucket": self.__s3.target_bucket,
201-
"size": int(self.__s3.get_size()),
202-
}],
411+
"files": self.__input_file_list + [self.__generate_output_dict(echo_metadata_md5)],
203412
# "files": self.__input_file_list,
204413
"sync_granule_duration": 20302,
205414
"createdAt": TimeUtils.get_current_unix_milli(),

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
setup(
2020
name="cumulus_lambda_functions",
21-
version="3.7.0",
21+
version="3.7.1",
2222
packages=find_packages(),
2323
install_requires=install_requires,
2424
tests_require=['mock', 'nose', 'sphinx', 'sphinx_rtd_theme', 'coverage', 'pystac', 'python-dotenv', 'jsonschema'],

0 commit comments

Comments
 (0)