Skip to content

breaking: pystac validation test + upload / download docker unit test + update collection STAC schema update #71

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ scratch*
local*
*egg-info*
dist
__pycache__
__pycache__
.env
2 changes: 1 addition & 1 deletion ci.cd/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ zip_docker:
docker save "$(IMAGE_PREFIX)/$(NAME):$(VERSION)" | gzip > "$(NAME)__$(VERSION).tar.gz"

build_lambda:
docker run --rm -v `PWD`:"/usr/src/app/cumulus_lambda_functions":z -w "/usr/src/app/cumulus_lambda_functions" cae-artifactory.jpl.nasa.gov:17001/python:3.7 ci.cd/create_s3_zip.sh
docker run --rm -v `PWD`:"/usr/src/app/cumulus_lambda_functions":z -w "/usr/src/app/cumulus_lambda_functions" cae-artifactory.jpl.nasa.gov:17001/python:3.9 ci.cd/create_s3_zip.sh

build_lambda_public:
docker run --rm -v `PWD`:"/usr/src/app/cumulus_lambda_functions":z -w "/usr/src/app/cumulus_lambda_functions" python:3.7 ci.cd/create_s3_zip.sh
Expand Down
2 changes: 1 addition & 1 deletion ci.cd/create_s3_zip.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ zip_file="${project_root_dir}/$ZIP_NAME" ; # save the result file in current wor

tmp_proj='/tmp/cumulus_lambda_functions'

source_dir="/usr/local/lib/python3.7/site-packages/"
source_dir="/usr/local/lib/python3.9/site-packages/"

mkdir -p "$tmp_proj/cumulus_lambda_functions" && \
cd $tmp_proj && \
Expand Down
47 changes: 39 additions & 8 deletions cumulus_lambda_functions/cumulus_stac/collection_transformer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import json
from datetime import datetime
from urllib.parse import quote_plus


from cumulus_lambda_functions.cumulus_stac.stac_transformer_abstract import StacTransformerAbstract

Expand Down Expand Up @@ -318,9 +321,12 @@ def __convert_to_stac_links(self, collection_file_obj: dict):
href_link[0] = collection_file_obj['bucket']
if 'regex' in collection_file_obj:
href_link[1] = collection_file_obj['regex']
stac_link['href'] = '___'.join(href_link)
stac_link['href'] = f"./collection.json?bucket={href_link[0]}&regex={quote_plus(href_link[1])}"
return stac_link

# def to_pystac_link_obj(self, input_dict: dict):
# return

def to_stac(self, source: dict) -> dict:
source_sample = {
"createdAt": 1647992847582,
Expand Down Expand Up @@ -366,6 +372,26 @@ def to_stac(self, source: dict) -> dict:
"url_path": "{cmrMetadata.Granule.Collection.ShortName}___{cmrMetadata.Granule.Collection.VersionId}",
"timestamp": 1647992849273
}
# TemporalIntervals([
# datetime.strptime(source['dateFrom'])
# ])
# stac_collection = pystac.Collection(
# id=f"{source['name']}___{source['version']}",
# description='TODO',
# extent=Extent(
# SpatialExtent([[0, 0, 0, 0]]),
# TemporalExtent([[source['dateFrom'] if 'dateFrom' in source else None,
# source['dateTo'] if 'dateTo' in source else None]])
# ),
# summaries=Summaries({
# "granuleId": [source['granuleId'] if 'granuleId' in source else ''],
# "granuleIdExtraction": [source['granuleIdExtraction'] if 'granuleIdExtraction' in source else ''],
# "process": [source['process'] if 'process' in source else ''],
# "totalGranules": [source['total_size'] if 'total_size' in source else -1],
# }),
# )
# stac_collection.get_root_link().target = './collection.json'
# stac_collection.add_links([Link.from_dict(k) for k in [self.__convert_to_stac_links(k) for k in source['files']]])
stac_collection = {
"type": "Collection",
"stac_version": "1.0.0",
Expand All @@ -380,19 +406,24 @@ def to_stac(self, source: dict) -> dict:
"bbox": [[0, 0, 0, 0]]
},
"temporal": {
"interval": [source['dateFrom'] if 'dateFrom' in source else None,
"interval": [[source['dateFrom'] if 'dateFrom' in source else None,
source['dateTo'] if 'dateTo' in source else None
]
]]
}
},
"assets": {},
"summaries": {
"granuleId": source['granuleId'] if 'granuleId' in source else '',
"granuleIdExtraction": source['granuleIdExtraction'] if 'granuleIdExtraction' in source else '',
"process": source['process'] if 'process' in source else '',
"totalGranules": source['total_size'] if 'total_size' in source else -1,
"granuleId": [source['granuleId'] if 'granuleId' in source else ''],
"granuleIdExtraction": [source['granuleIdExtraction'] if 'granuleIdExtraction' in source else ''],
"process": [source['process'] if 'process' in source else ''],
"totalGranules": [source['total_size'] if 'total_size' in source else -1],
},
"links": [self.__convert_to_stac_links(k) for k in source['files']],
"links": [{
"rel": "root",
"type": "application/json",
"title": f"{source['name']}___{source['version']}",
"href": "./collection.json"
}] + [self.__convert_to_stac_links(k) for k in source['files']],
}
return stac_collection

Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import json
import logging
import os
import re
from collections import defaultdict
from glob import glob

import requests
from urllib.parse import urlparse, unquote_plus

from cumulus_lambda_functions.cumulus_dapa_client.dapa_client import DapaClient
from cumulus_lambda_functions.lib.aws.aws_s3 import AwsS3
Expand Down Expand Up @@ -50,9 +48,19 @@ def __set_props_from_env(self):
self.__delete_files = os.environ.get(self.DELETE_FILES_KEY, 'FALSE').strip().upper() == 'TRUE'
return self

def __get_href(self, input_href: str):
parse_result = urlparse(input_href)
if parse_result.query == '':
return ''
query_dict = [k.split('=') for k in parse_result.query.split('&')]
query_dict = {k[0]: unquote_plus(k[1]) for k in query_dict}
if 'regex' not in query_dict:
raise ValueError(f'missing regex in {input_href}')
return query_dict['regex']

def __sort_granules(self):
file_regex_list = {k['type']: k['href'].split('___')[-1] for k in self.__collection_details['links'] if not k['title'].endswith('cmr.xml')}
granule_id_extraction = self.__collection_details['summaries']['granuleIdExtraction']
file_regex_list = {k['type']: self.__get_href(k['href']) for k in self.__collection_details['links'] if k['rel'] != 'root' and not k['title'].endswith('cmr.xml')}
granule_id_extraction = self.__collection_details['summaries']['granuleIdExtraction'][0]
granules = defaultdict(dict)
for each_file in self.__raw_files:
each_filename = os.path.basename(each_file)
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
install_requires = [
'fastjsonschema',
'xmltodict',
'requests===2.27.1'
'requests'
]

flask_requires = [
Expand All @@ -19,13 +19,13 @@
version="1.6.17",
packages=find_packages(),
install_requires=install_requires,
tests_require=['mock', 'nose', 'sphinx', 'sphinx_rtd_theme', 'coverage'],
tests_require=['mock', 'nose', 'sphinx', 'sphinx_rtd_theme', 'coverage', 'pystac', 'python-dotenv', 'jsonschema'],
test_suite='nose.collector',
author=['Wai Phyo'],
author_email=['wai.phyo@jpl.nasa.gov'],
license='NONE',
include_package_data=True,
python_requires="==3.7",
python_requires="==3.9",
entry_points={
}
)
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import os
import tempfile
from glob import glob
from unittest import TestCase

from cumulus_lambda_functions.cumulus_download_granules.download_granules import DownloadGranules


class TestDownloadGranules(TestCase):
def test_01(self):
os.environ['DAPA_API'] = 'https://k3a3qmarxh.execute-api.us-west-2.amazonaws.com/dev'
os.environ['USERNAME'] = '/unity/uds/user/wphyo/username'
os.environ['PASSWORD'] = '/unity/uds/user/wphyo/dwssap'
os.environ['PASSWORD_TYPE'] = 'PARAM_STORE'
os.environ['CLIENT_ID'] = '7a1fglm2d54eoggj13lccivp25'
os.environ['COGNITO_URL'] = 'https://cognito-idp.us-west-2.amazonaws.com'

os.environ['COLLECTION_ID'] = 'SNDR_SNPP_ATMS_L1A_NGA___1'
os.environ['DOWNLOAD_DIR'] = '/etc/granules'
os.environ['VERIFY_SSL'] = 'FALSE'
os.environ['LIMITS'] = '100'
os.environ['LOG_LEVEL'] = '20'
os.environ['DATE_FROM'] = '2016-01-14T10:00:00.000Z'
os.environ['DATE_TO'] = '2016-01-15T10:06:00.000Z'

with tempfile.TemporaryDirectory() as tmp_dir_name:
os.environ['DOWNLOAD_DIR'] = tmp_dir_name
DownloadGranules().start()
raw_files = glob(f'{tmp_dir_name}/*', recursive=True)
self.assertEqual(2, len(raw_files), f'wrong file count: {raw_files}')
return
Original file line number Diff line number Diff line change
@@ -1,68 +1,61 @@
import json
from unittest import TestCase

from cumulus_lambda_functions.cumulus_stac.collection_transformer import STAC_COLLECTION_SCHEMA
import jsonschema

from cumulus_lambda_functions.cumulus_stac.collection_transformer import STAC_COLLECTION_SCHEMA, CollectionTransformer
from cumulus_lambda_functions.lib.json_validator import JsonValidator


class TestItemTransformer(TestCase):
def test_01(self):
stac_validator = JsonValidator(json.loads(STAC_COLLECTION_SCHEMA))
source = '''{
"published": false,
"endingDateTime": "2016-01-31T19:59:59.991043",
"status": "completed",
"timestamp": 1648050501578,
"createdAt": 1648050499079,
"processingEndDateTime": "2022-03-23T15:48:20.869Z",
"productVolume": 18096656,
"timeToPreprocess": 20.302,
"timeToArchive": 0,
"productionDateTime": "2016-02-01T02:45:59.639000Z",
"execution": "https://console.aws.amazon.com/states/home?region=us-west-2#/executions/details/arn:aws:states:us-west-2:884500545225:execution:am-uds-dev-cumulus-IngestGranule:ec602ca7-0243-44df-adc0-28fb8a486d54",
source = {
"createdAt": 1647992847582,
"granuleId": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0$",
"process": "modis",
"dateFrom": "1990-01-01T00:00:00Z",
"dateTo": "1991-01-01T00:00:00Z",
"sampleFileName": "P1570515ATMSSCIENCEAXT11344000000001.PDS",
"name": "ATMS_SCIENCE_Group",
"files": [
{
"bucket": "am-uds-dev-cumulus-internal",
"key": "ATMS_SCIENCE_Group___1/P1570515ATMSSCIENCEAAT16032024518500.PDS",
"size": 760,
"fileName": "P1570515ATMSSCIENCEAAT16032024518500.PDS",
"source": "data/SNPP_ATMS_Level0_T/ATMS_SCIENCE_Group/2016/031//P1570515ATMSSCIENCEAAT16032024518500.PDS",
"type": "data"
"bucket": "internal",
"regex": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}00\\.PDS$",
"sampleFileName": "P1570515ATMSSCIENCEAXT11344000000000.PDS",
"type": "data",
"reportToEms": True
},
{
"bucket": "am-uds-dev-cumulus-internal",
"key": "ATMS_SCIENCE_Group___1/P1570515ATMSSCIENCEAAT16032024518501.PDS",
"size": 18084600,
"fileName": "P1570515ATMSSCIENCEAAT16032024518501.PDS",
"source": "data/SNPP_ATMS_Level0_T/ATMS_SCIENCE_Group/2016/031//P1570515ATMSSCIENCEAAT16032024518501.PDS",
"bucket": "internal",
"regex": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}01\\.PDS$",
"sampleFileName": "P1570515ATMSSCIENCEAXT11344000000001.PDS",
"reportToEms": True,
"type": "metadata"
},
{
"bucket": "am-uds-dev-cumulus-internal",
"key": "ATMS_SCIENCE_Group___1/P1570515ATMSSCIENCEAAT16032024518501.PDS.xml",
"size": 9547,
"fileName": "P1570515ATMSSCIENCEAAT16032024518501.PDS.xml",
"source": "data/SNPP_ATMS_Level0_T/ATMS_SCIENCE_Group/2016/031//P1570515ATMSSCIENCEAAT16032024518501.PDS.xml",
"bucket": "internal",
"regex": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}01\\.PDS\\.xml$",
"sampleFileName": "P1570515ATMSSCIENCEAXT11344000000001.PDS.xml",
"reportToEms": True,
"type": "metadata"
},
{
"bucket": "am-uds-dev-cumulus-internal",
"key": "ATMS_SCIENCE_Group___1/P1570515ATMSSCIENCEAAT16032024518500.PDS.cmr.xml",
"size": 1749,
"fileName": "P1570515ATMSSCIENCEAAT16032024518500.PDS.cmr.xml",
"bucket": "internal",
"regex": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}00.PDS.cmr.xml$",
"sampleFileName": "P1570515ATMSSCIENCEAXT11344000000000.PDS.cmr.xml",
"reportToEms": True,
"type": "metadata"
}
],
"processingStartDateTime": "2022-03-23T15:45:03.732Z",
"updatedAt": 1648050501578,
"beginningDateTime": "2016-01-31T18:00:00.009057",
"provider": "snpp_provider_03",
"granuleId": "P1570515ATMSSCIENCEAAT16032024518500.PDS",
"collectionId": "ATMS_SCIENCE_Group___001",
"duration": 197.993,
"error": {},
"lastUpdateDateTime": "2018-04-25T21:45:45.524053"
}'''
"granuleIdExtraction": "(P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0).+",
"reportToEms": True,
"version": "001",
"duplicateHandling": "replace",
"updatedAt": 1647992847582,
"url_path": "{cmrMetadata.Granule.Collection.ShortName}___{cmrMetadata.Granule.Collection.VersionId}",
"timestamp": 1647992849273
}
raw = {
"type": "Collection",
"stac_version": "1.0.0",
Expand All @@ -85,9 +78,10 @@ def test_01(self):
"links": [
{
"rel": "root",
"href": ".",
"href": "./collection.json",
},
]
}
raw = CollectionTransformer().to_stac(source)
self.assertEqual(None, stac_validator.validate(raw), f'invalid stac format: {stac_validator}')
return
Loading