Skip to content

Commit ca7e8cc

Browse files
authored
breaking: pystac validation test + upload / download docker unit test + update collection STAC schema update (#71)
* breaking: testing with pystac + make changes accordingly * chore: convert manual test to unittest case * fix: update test case * fix: update upload granule to match new pystac collection schema * chore: stashing collection transformer to use pystac (in-progress) * chore: remove unused file * chore: update gitignore file * fix: update test cases
1 parent e958bd5 commit ca7e8cc

File tree

14 files changed

+304
-82
lines changed

14 files changed

+304
-82
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ scratch*
55
local*
66
*egg-info*
77
dist
8-
__pycache__
8+
__pycache__
9+
.env

ci.cd/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ zip_docker:
1212
docker save "$(IMAGE_PREFIX)/$(NAME):$(VERSION)" | gzip > "$(NAME)__$(VERSION).tar.gz"
1313

1414
build_lambda:
15-
docker run --rm -v `PWD`:"/usr/src/app/cumulus_lambda_functions":z -w "/usr/src/app/cumulus_lambda_functions" cae-artifactory.jpl.nasa.gov:17001/python:3.7 ci.cd/create_s3_zip.sh
15+
docker run --rm -v `PWD`:"/usr/src/app/cumulus_lambda_functions":z -w "/usr/src/app/cumulus_lambda_functions" cae-artifactory.jpl.nasa.gov:17001/python:3.9 ci.cd/create_s3_zip.sh
1616

1717
build_lambda_public:
1818
docker run --rm -v `PWD`:"/usr/src/app/cumulus_lambda_functions":z -w "/usr/src/app/cumulus_lambda_functions" python:3.7 ci.cd/create_s3_zip.sh

ci.cd/create_s3_zip.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ zip_file="${project_root_dir}/$ZIP_NAME" ; # save the result file in current wor
88

99
tmp_proj='/tmp/cumulus_lambda_functions'
1010

11-
source_dir="/usr/local/lib/python3.7/site-packages/"
11+
source_dir="/usr/local/lib/python3.9/site-packages/"
1212

1313
mkdir -p "$tmp_proj/cumulus_lambda_functions" && \
1414
cd $tmp_proj && \

cumulus_lambda_functions/cumulus_stac/collection_transformer.py

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
import json
2+
from datetime import datetime
3+
from urllib.parse import quote_plus
4+
25

36
from cumulus_lambda_functions.cumulus_stac.stac_transformer_abstract import StacTransformerAbstract
47

@@ -318,9 +321,12 @@ def __convert_to_stac_links(self, collection_file_obj: dict):
318321
href_link[0] = collection_file_obj['bucket']
319322
if 'regex' in collection_file_obj:
320323
href_link[1] = collection_file_obj['regex']
321-
stac_link['href'] = '___'.join(href_link)
324+
stac_link['href'] = f"./collection.json?bucket={href_link[0]}&regex={quote_plus(href_link[1])}"
322325
return stac_link
323326

327+
# def to_pystac_link_obj(self, input_dict: dict):
328+
# return
329+
324330
def to_stac(self, source: dict) -> dict:
325331
source_sample = {
326332
"createdAt": 1647992847582,
@@ -366,6 +372,26 @@ def to_stac(self, source: dict) -> dict:
366372
"url_path": "{cmrMetadata.Granule.Collection.ShortName}___{cmrMetadata.Granule.Collection.VersionId}",
367373
"timestamp": 1647992849273
368374
}
375+
# TemporalIntervals([
376+
# datetime.strptime(source['dateFrom'])
377+
# ])
378+
# stac_collection = pystac.Collection(
379+
# id=f"{source['name']}___{source['version']}",
380+
# description='TODO',
381+
# extent=Extent(
382+
# SpatialExtent([[0, 0, 0, 0]]),
383+
# TemporalExtent([[source['dateFrom'] if 'dateFrom' in source else None,
384+
# source['dateTo'] if 'dateTo' in source else None]])
385+
# ),
386+
# summaries=Summaries({
387+
# "granuleId": [source['granuleId'] if 'granuleId' in source else ''],
388+
# "granuleIdExtraction": [source['granuleIdExtraction'] if 'granuleIdExtraction' in source else ''],
389+
# "process": [source['process'] if 'process' in source else ''],
390+
# "totalGranules": [source['total_size'] if 'total_size' in source else -1],
391+
# }),
392+
# )
393+
# stac_collection.get_root_link().target = './collection.json'
394+
# stac_collection.add_links([Link.from_dict(k) for k in [self.__convert_to_stac_links(k) for k in source['files']]])
369395
stac_collection = {
370396
"type": "Collection",
371397
"stac_version": "1.0.0",
@@ -380,19 +406,24 @@ def to_stac(self, source: dict) -> dict:
380406
"bbox": [[0, 0, 0, 0]]
381407
},
382408
"temporal": {
383-
"interval": [source['dateFrom'] if 'dateFrom' in source else None,
409+
"interval": [[source['dateFrom'] if 'dateFrom' in source else None,
384410
source['dateTo'] if 'dateTo' in source else None
385-
]
411+
]]
386412
}
387413
},
388414
"assets": {},
389415
"summaries": {
390-
"granuleId": source['granuleId'] if 'granuleId' in source else '',
391-
"granuleIdExtraction": source['granuleIdExtraction'] if 'granuleIdExtraction' in source else '',
392-
"process": source['process'] if 'process' in source else '',
393-
"totalGranules": source['total_size'] if 'total_size' in source else -1,
416+
"granuleId": [source['granuleId'] if 'granuleId' in source else ''],
417+
"granuleIdExtraction": [source['granuleIdExtraction'] if 'granuleIdExtraction' in source else ''],
418+
"process": [source['process'] if 'process' in source else ''],
419+
"totalGranules": [source['total_size'] if 'total_size' in source else -1],
394420
},
395-
"links": [self.__convert_to_stac_links(k) for k in source['files']],
421+
"links": [{
422+
"rel": "root",
423+
"type": "application/json",
424+
"title": f"{source['name']}___{source['version']}",
425+
"href": "./collection.json"
426+
}] + [self.__convert_to_stac_links(k) for k in source['files']],
396427
}
397428
return stac_collection
398429

cumulus_lambda_functions/cumulus_upload_granules/upload_granules.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
1-
import json
21
import logging
32
import os
43
import re
54
from collections import defaultdict
65
from glob import glob
7-
8-
import requests
6+
from urllib.parse import urlparse, unquote_plus
97

108
from cumulus_lambda_functions.cumulus_dapa_client.dapa_client import DapaClient
119
from cumulus_lambda_functions.lib.aws.aws_s3 import AwsS3
@@ -50,9 +48,19 @@ def __set_props_from_env(self):
5048
self.__delete_files = os.environ.get(self.DELETE_FILES_KEY, 'FALSE').strip().upper() == 'TRUE'
5149
return self
5250

51+
def __get_href(self, input_href: str):
52+
parse_result = urlparse(input_href)
53+
if parse_result.query == '':
54+
return ''
55+
query_dict = [k.split('=') for k in parse_result.query.split('&')]
56+
query_dict = {k[0]: unquote_plus(k[1]) for k in query_dict}
57+
if 'regex' not in query_dict:
58+
raise ValueError(f'missing regex in {input_href}')
59+
return query_dict['regex']
60+
5361
def __sort_granules(self):
54-
file_regex_list = {k['type']: k['href'].split('___')[-1] for k in self.__collection_details['links'] if not k['title'].endswith('cmr.xml')}
55-
granule_id_extraction = self.__collection_details['summaries']['granuleIdExtraction']
62+
file_regex_list = {k['type']: self.__get_href(k['href']) for k in self.__collection_details['links'] if k['rel'] != 'root' and not k['title'].endswith('cmr.xml')}
63+
granule_id_extraction = self.__collection_details['summaries']['granuleIdExtraction'][0]
5664
granules = defaultdict(dict)
5765
for each_file in self.__raw_files:
5866
each_filename = os.path.basename(each_file)

setup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
install_requires = [
44
'fastjsonschema',
55
'xmltodict',
6-
'requests===2.27.1'
6+
'requests'
77
]
88

99
flask_requires = [
@@ -19,13 +19,13 @@
1919
version="1.6.17",
2020
packages=find_packages(),
2121
install_requires=install_requires,
22-
tests_require=['mock', 'nose', 'sphinx', 'sphinx_rtd_theme', 'coverage'],
22+
tests_require=['mock', 'nose', 'sphinx', 'sphinx_rtd_theme', 'coverage', 'pystac', 'python-dotenv', 'jsonschema'],
2323
test_suite='nose.collector',
2424
author=['Wai Phyo'],
2525
author_email=['wai.phyo@jpl.nasa.gov'],
2626
license='NONE',
2727
include_package_data=True,
28-
python_requires="==3.7",
28+
python_requires="==3.9",
2929
entry_points={
3030
}
3131
)

tests/cumulus_lambda_functions/cumulus_download_granules/__init__.py

Whitespace-only changes.
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import os
2+
import tempfile
3+
from glob import glob
4+
from unittest import TestCase
5+
6+
from cumulus_lambda_functions.cumulus_download_granules.download_granules import DownloadGranules
7+
8+
9+
class TestDownloadGranules(TestCase):
10+
def test_01(self):
11+
os.environ['DAPA_API'] = 'https://k3a3qmarxh.execute-api.us-west-2.amazonaws.com/dev'
12+
os.environ['USERNAME'] = '/unity/uds/user/wphyo/username'
13+
os.environ['PASSWORD'] = '/unity/uds/user/wphyo/dwssap'
14+
os.environ['PASSWORD_TYPE'] = 'PARAM_STORE'
15+
os.environ['CLIENT_ID'] = '7a1fglm2d54eoggj13lccivp25'
16+
os.environ['COGNITO_URL'] = 'https://cognito-idp.us-west-2.amazonaws.com'
17+
18+
os.environ['COLLECTION_ID'] = 'SNDR_SNPP_ATMS_L1A_NGA___1'
19+
os.environ['DOWNLOAD_DIR'] = '/etc/granules'
20+
os.environ['VERIFY_SSL'] = 'FALSE'
21+
os.environ['LIMITS'] = '100'
22+
os.environ['LOG_LEVEL'] = '20'
23+
os.environ['DATE_FROM'] = '2016-01-14T10:00:00.000Z'
24+
os.environ['DATE_TO'] = '2016-01-15T10:06:00.000Z'
25+
26+
with tempfile.TemporaryDirectory() as tmp_dir_name:
27+
os.environ['DOWNLOAD_DIR'] = tmp_dir_name
28+
DownloadGranules().start()
29+
raw_files = glob(f'{tmp_dir_name}/*', recursive=True)
30+
self.assertEqual(2, len(raw_files), f'wrong file count: {raw_files}')
31+
return
Lines changed: 38 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,68 +1,61 @@
11
import json
22
from unittest import TestCase
33

4-
from cumulus_lambda_functions.cumulus_stac.collection_transformer import STAC_COLLECTION_SCHEMA
4+
import jsonschema
5+
6+
from cumulus_lambda_functions.cumulus_stac.collection_transformer import STAC_COLLECTION_SCHEMA, CollectionTransformer
57
from cumulus_lambda_functions.lib.json_validator import JsonValidator
68

79

810
class TestItemTransformer(TestCase):
911
def test_01(self):
1012
stac_validator = JsonValidator(json.loads(STAC_COLLECTION_SCHEMA))
11-
source = '''{
12-
"published": false,
13-
"endingDateTime": "2016-01-31T19:59:59.991043",
14-
"status": "completed",
15-
"timestamp": 1648050501578,
16-
"createdAt": 1648050499079,
17-
"processingEndDateTime": "2022-03-23T15:48:20.869Z",
18-
"productVolume": 18096656,
19-
"timeToPreprocess": 20.302,
20-
"timeToArchive": 0,
21-
"productionDateTime": "2016-02-01T02:45:59.639000Z",
22-
"execution": "https://console.aws.amazon.com/states/home?region=us-west-2#/executions/details/arn:aws:states:us-west-2:884500545225:execution:am-uds-dev-cumulus-IngestGranule:ec602ca7-0243-44df-adc0-28fb8a486d54",
13+
source = {
14+
"createdAt": 1647992847582,
15+
"granuleId": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0$",
16+
"process": "modis",
17+
"dateFrom": "1990-01-01T00:00:00Z",
18+
"dateTo": "1991-01-01T00:00:00Z",
19+
"sampleFileName": "P1570515ATMSSCIENCEAXT11344000000001.PDS",
20+
"name": "ATMS_SCIENCE_Group",
2321
"files": [
2422
{
25-
"bucket": "am-uds-dev-cumulus-internal",
26-
"key": "ATMS_SCIENCE_Group___1/P1570515ATMSSCIENCEAAT16032024518500.PDS",
27-
"size": 760,
28-
"fileName": "P1570515ATMSSCIENCEAAT16032024518500.PDS",
29-
"source": "data/SNPP_ATMS_Level0_T/ATMS_SCIENCE_Group/2016/031//P1570515ATMSSCIENCEAAT16032024518500.PDS",
30-
"type": "data"
23+
"bucket": "internal",
24+
"regex": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}00\\.PDS$",
25+
"sampleFileName": "P1570515ATMSSCIENCEAXT11344000000000.PDS",
26+
"type": "data",
27+
"reportToEms": True
3128
},
3229
{
33-
"bucket": "am-uds-dev-cumulus-internal",
34-
"key": "ATMS_SCIENCE_Group___1/P1570515ATMSSCIENCEAAT16032024518501.PDS",
35-
"size": 18084600,
36-
"fileName": "P1570515ATMSSCIENCEAAT16032024518501.PDS",
37-
"source": "data/SNPP_ATMS_Level0_T/ATMS_SCIENCE_Group/2016/031//P1570515ATMSSCIENCEAAT16032024518501.PDS",
30+
"bucket": "internal",
31+
"regex": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}01\\.PDS$",
32+
"sampleFileName": "P1570515ATMSSCIENCEAXT11344000000001.PDS",
33+
"reportToEms": True,
3834
"type": "metadata"
3935
},
4036
{
41-
"bucket": "am-uds-dev-cumulus-internal",
42-
"key": "ATMS_SCIENCE_Group___1/P1570515ATMSSCIENCEAAT16032024518501.PDS.xml",
43-
"size": 9547,
44-
"fileName": "P1570515ATMSSCIENCEAAT16032024518501.PDS.xml",
45-
"source": "data/SNPP_ATMS_Level0_T/ATMS_SCIENCE_Group/2016/031//P1570515ATMSSCIENCEAAT16032024518501.PDS.xml",
37+
"bucket": "internal",
38+
"regex": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}01\\.PDS\\.xml$",
39+
"sampleFileName": "P1570515ATMSSCIENCEAXT11344000000001.PDS.xml",
40+
"reportToEms": True,
4641
"type": "metadata"
4742
},
4843
{
49-
"bucket": "am-uds-dev-cumulus-internal",
50-
"key": "ATMS_SCIENCE_Group___1/P1570515ATMSSCIENCEAAT16032024518500.PDS.cmr.xml",
51-
"size": 1749,
52-
"fileName": "P1570515ATMSSCIENCEAAT16032024518500.PDS.cmr.xml",
44+
"bucket": "internal",
45+
"regex": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}00.PDS.cmr.xml$",
46+
"sampleFileName": "P1570515ATMSSCIENCEAXT11344000000000.PDS.cmr.xml",
47+
"reportToEms": True,
5348
"type": "metadata"
5449
}
5550
],
56-
"processingStartDateTime": "2022-03-23T15:45:03.732Z",
57-
"updatedAt": 1648050501578,
58-
"beginningDateTime": "2016-01-31T18:00:00.009057",
59-
"provider": "snpp_provider_03",
60-
"granuleId": "P1570515ATMSSCIENCEAAT16032024518500.PDS",
61-
"collectionId": "ATMS_SCIENCE_Group___001",
62-
"duration": 197.993,
63-
"error": {},
64-
"lastUpdateDateTime": "2018-04-25T21:45:45.524053"
65-
}'''
51+
"granuleIdExtraction": "(P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0).+",
52+
"reportToEms": True,
53+
"version": "001",
54+
"duplicateHandling": "replace",
55+
"updatedAt": 1647992847582,
56+
"url_path": "{cmrMetadata.Granule.Collection.ShortName}___{cmrMetadata.Granule.Collection.VersionId}",
57+
"timestamp": 1647992849273
58+
}
6659
raw = {
6760
"type": "Collection",
6861
"stac_version": "1.0.0",
@@ -85,9 +78,10 @@ def test_01(self):
8578
"links": [
8679
{
8780
"rel": "root",
88-
"href": ".",
81+
"href": "./collection.json",
8982
},
9083
]
9184
}
85+
raw = CollectionTransformer().to_stac(source)
9286
self.assertEqual(None, stac_validator.validate(raw), f'invalid stac format: {stac_validator}')
9387
return

0 commit comments

Comments
 (0)