unity-sds · ngachung · Sep 6, 2022 · Aug 5, 2022 · Aug 5, 2022 · Aug 8, 2022
diff --git a/.gitignore b/.gitignore
@@ -5,4 +5,5 @@ scratch*
 local*
 *egg-info*
 dist
-__pycache__
+__pycache__
+.env
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,15 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.7.0] - 2022-09-06
+### Added
+- [#62](https://github.com/unity-sds/unity-data-services/issues/66) Added OpenAPI spec for DAPA endpoints
+- [#66](https://github.com/unity-sds/unity-data-services/issues/66) Added pagination links to STAC response of DAPA endpoints
+- [#64](https://github.com/unity-sds/unity-data-services/issues/64) Added temporal coverage to DAPA collection endpoint
+### Changed
+- [#67](https://github.com/unity-sds/unity-data-services/issues/67) Updated STAC collection schema to be compatible with PySTAC library
+### Fixed
+
 ## [1.6.17] - 2022-07-28
 ### Added
 ### Fixed

diff --git a/ci.cd/Makefile b/ci.cd/Makefile
@@ -4,21 +4,24 @@ export VERSION ?= latest
 
 
 all: build_lambda upload_lambda update_lambda_function build_docker
-
+local: build_lambda upload_lambda update_lambda_function_1 update_lambda_function_2 update_lambda_function_3
 build_docker:
 	docker build -t "$(IMAGE_PREFIX)/$(NAME):$(VERSION)" -f docker/Dockerfile .
 
 zip_docker:
 	docker save "$(IMAGE_PREFIX)/$(NAME):$(VERSION)" | gzip > "$(NAME)__$(VERSION).tar.gz"
 
 build_lambda:
-	docker run --rm -v `PWD`:"/usr/src/app/cumulus_lambda_functions":z -w "/usr/src/app/cumulus_lambda_functions" cae-artifactory.jpl.nasa.gov:17001/python:3.7 ci.cd/create_s3_zip.sh
+	docker run --rm -v `PWD`:"/usr/src/app/cumulus_lambda_functions":z -w "/usr/src/app/cumulus_lambda_functions" cae-artifactory.jpl.nasa.gov:17001/python:3.9 ci.cd/create_s3_zip.sh
 
 build_lambda_public:
 	docker run --rm -v `PWD`:"/usr/src/app/cumulus_lambda_functions":z -w "/usr/src/app/cumulus_lambda_functions" python:3.7 ci.cd/create_s3_zip.sh
 
 upload_lambda:
 	aws --profile saml-pub s3 cp cumulus_lambda_functions_deployment.zip s3://am-uds-dev-cumulus-tf-state/unity_cumulus_lambda/
-
-update_lambda_function:
-	aws --profile saml-pub lambda update-function-code --s3-key unity_cumulus_lambda/cumulus_lambda_functions_deployment.zip --s3-bucket am-uds-dev-cumulus-tf-state --function-name arn:aws:lambda:us-west-2:884500545225:function:Test1 --publish
+update_lambda_function_1:
+	aws --profile saml-pub lambda update-function-code --s3-key unity_cumulus_lambda/cumulus_lambda_functions_deployment.zip --s3-bucket am-uds-dev-cumulus-tf-state --function-name arn:aws:lambda:us-west-2:884500545225:function:am-uds-dev-cumulus-cumulus_collections_dapa --publish &>/dev/null
+update_lambda_function_2:
+	aws --profile saml-pub lambda update-function-code --s3-key unity_cumulus_lambda/cumulus_lambda_functions_deployment.zip --s3-bucket am-uds-dev-cumulus-tf-state --function-name arn:aws:lambda:us-west-2:884500545225:function:am-uds-dev-cumulus-cumulus_granules_dapa --publish &>/dev/null
+update_lambda_function_3:
+	aws --profile saml-pub lambda update-function-code --s3-key unity_cumulus_lambda/cumulus_lambda_functions_deployment.zip --s3-bucket am-uds-dev-cumulus-tf-state --function-name arn:aws:lambda:us-west-2:884500545225:function:am-uds-dev-cumulus-cumulus_collections_ingest_cnm_dapa --publish &>/dev/null
diff --git a/ci.cd/create_s3_zip.sh b/ci.cd/create_s3_zip.sh
@@ -8,7 +8,7 @@ zip_file="${project_root_dir}/$ZIP_NAME" ; # save the result file in current wor
 
 tmp_proj='/tmp/cumulus_lambda_functions'
 
-source_dir="/usr/local/lib/python3.7/site-packages/"
+source_dir="/usr/local/lib/python3.9/site-packages/"
 
 mkdir -p "$tmp_proj/cumulus_lambda_functions" && \
 cd $tmp_proj && \

diff --git a/cognito_readme.md b/cognito_readme.md
@@ -9,9 +9,11 @@
            "ClientId" : "7a1fglm2d54eoggj13lccivp25"
         }
 - ask U-CS to create credentials and change password the first time
-- run this command:
+- run this command (JPL AWS):
 
         curl -X POST --data @cognito.jpl.aws.json -H 'X-Amz-Target: AWSCognitoIdentityProviderService.InitiateAuth' -H 'Content-Type: application/x-amz-json-1.1' https://cognito-idp.us-west-2.amazonaws.com/|jq
+        curl -X POST --data @cognito.mcp.test.aws.json -H 'X-Amz-Target: AWSCognitoIdentityProviderService.InitiateAuth' -H 'Content-Type: application/x-amz-json-1.1' https://cognito-idp.us-west-2.amazonaws.com/|jq
+        curl -X POST --data @cognito.mcp.dev.aws.json -H 'X-Amz-Target: AWSCognitoIdentityProviderService.InitiateAuth' -H 'Content-Type: application/x-amz-json-1.1' https://cognito-idp.us-west-2.amazonaws.com/|jq
 - successful response:
 
         {

diff --git a/cumulus_lambda_functions/cumulus_collections_dapa/cumulus_collections_dapa.py b/cumulus_lambda_functions/cumulus_collections_dapa/cumulus_collections_dapa.py
@@ -3,6 +3,7 @@
 
 from cumulus_lambda_functions.cumulus_wrapper.query_collections import CollectionsQuery
 from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator
+from cumulus_lambda_functions.lib.utils.lambda_api_gateway_utils import LambdaApiGatewayUtils
 
 LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env())
 
@@ -37,25 +38,50 @@ def __assign_values(self):
             self.__offset = int(query_str_dict['offset'])
         return self
 
+    def __get_size(self):
+        try:
+            cumulus_size = self.__cumulus.get_size(self.__cumulus_lambda_prefix)
+        except:
+            LOGGER.exception(f'cannot get cumulus_size')
+            cumulus_size = {'total_size': -1}
+        return cumulus_size
+
+    def __get_pagination_urls(self):
+        try:
+            pagination_links = LambdaApiGatewayUtils(self.__event, self.__limit).generate_pagination_links()
+        except Exception as e:
+            LOGGER.exception(f'error while generating pagination links')
+            return [{'message': f'error while generating pagination links: {str(e)}'}]
+        return pagination_links
+
     def start(self):
         try:
             cumulus_result = self.__cumulus.query_direct_to_private_api(self.__cumulus_lambda_prefix)
-        except Exception as e:
+            if 'server_error' in cumulus_result:
+                return {
+                    'statusCode': 500,
+                    'body': {'message': cumulus_result['server_error']}
+                }
+            if 'client_error' in cumulus_result:
+                return {
+                    'statusCode': 400,
+                    'body': {'message': cumulus_result['client_error']}
+                }
+            cumulus_size = self.__get_size()
             return {
-                'statusCode': 500,
-                'body': {'message': f'unpredicted error: {str(e)}'}
+                'statusCode': 200,
+                'body': json.dumps({
+                    'numberMatched': cumulus_size['total_size'],
+                    'numberReturned': len(cumulus_result['results']),
+                    'stac_version': '1.0.0',
+                    'type': 'FeatureCollection',
+                    'links': self.__get_pagination_urls(),
+                    'features': cumulus_result['results'],
+                })
             }
-        if 'server_error' in cumulus_result:
+        except Exception as e:
+            LOGGER.exception(f'unexpected error')
             return {
                 'statusCode': 500,
-                'body': {'message': cumulus_result['server_error']}
-            }
-        if 'client_error' in cumulus_result:
-            return {
-                'statusCode': 400,
-                'body': {'message': cumulus_result['client_error']}
+                'body': {'message': f'unpredicted error: {str(e)}'}
             }
-        return {
-            'statusCode': 200,
-            'body': json.dumps({'features': cumulus_result['results']})
-        }
diff --git a/cumulus_lambda_functions/cumulus_granules_dapa/cumulus_granules_dapa.py b/cumulus_lambda_functions/cumulus_granules_dapa/cumulus_granules_dapa.py
@@ -3,6 +3,7 @@
 
 from cumulus_lambda_functions.cumulus_wrapper.query_granules import GranulesQuery
 from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator
+from cumulus_lambda_functions.lib.utils.lambda_api_gateway_utils import LambdaApiGatewayUtils
 
 LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env())
 
@@ -87,25 +88,50 @@ def __assign_values(self):
             self.__offset = int(query_str_dict['offset'])
         return self
 
+    def __get_size(self):
+        try:
+            cumulus_size = self.__cumulus.get_size(self.__cumulus_lambda_prefix)
+        except:
+            LOGGER.exception(f'cannot get cumulus_size')
+            cumulus_size = {'total_size': -1}
+        return cumulus_size
+
+    def __get_pagination_urls(self):
+        try:
+            pagination_links = LambdaApiGatewayUtils(self.__event, self.__limit).generate_pagination_links()
+        except Exception as e:
+            LOGGER.exception(f'error while generating pagination links')
+            return [{'message': f'error while generating pagination links: {str(e)}'}]
+        return pagination_links
+
     def start(self):
         try:
             cumulus_result = self.__cumulus.query_direct_to_private_api(self.__cumulus_lambda_prefix)
-        except Exception as e:
+            if 'server_error' in cumulus_result:
+                return {
+                    'statusCode': 500,
+                    'body': {'message': cumulus_result['server_error']}
+                }
+            if 'client_error' in cumulus_result:
+                return {
+                    'statusCode': 400,
+                    'body': {'message': cumulus_result['client_error']}
+                }
+            cumulus_size = self.__get_size()
             return {
-                'statusCode': 500,
-                'body': {'message': f'unpredicted error: {str(e)}'}
+                'statusCode': 200,
+                'body': json.dumps({
+                    'numberMatched': cumulus_size['total_size'],
+                    'numberReturned': len(cumulus_result['results']),
+                    'stac_version': '1.0.0',
+                    'type': 'FeatureCollection',  # TODO correct name?
+                    'links': self.__get_pagination_urls(),
+                    'features': cumulus_result['results']
+                })
             }
-        if 'server_error' in cumulus_result:
+        except Exception as e:
+            LOGGER.exception(f'unexpected error')
             return {
                 'statusCode': 500,
-                'body': {'message': cumulus_result['server_error']}
-            }
-        if 'client_error' in cumulus_result:
-            return {
-                'statusCode': 400,
-                'body': {'message': cumulus_result['client_error']}
+                'body': {'message': f'unpredicted error: {str(e)}'}
             }
-        return {
-            'statusCode': 200,
-            'body': json.dumps({'features': cumulus_result['results']})
-        }
diff --git a/cumulus_lambda_functions/cumulus_stac/collection_transformer.py b/cumulus_lambda_functions/cumulus_stac/collection_transformer.py
@@ -1,4 +1,7 @@
 import json
+from datetime import datetime
+from urllib.parse import quote_plus
+
 
 from cumulus_lambda_functions.cumulus_stac.stac_transformer_abstract import StacTransformerAbstract
 
@@ -318,9 +321,12 @@ def __convert_to_stac_links(self, collection_file_obj: dict):
             href_link[0] = collection_file_obj['bucket']
         if 'regex' in collection_file_obj:
             href_link[1] = collection_file_obj['regex']
-        stac_link['href'] = '___'.join(href_link)
+        stac_link['href'] = f"./collection.json?bucket={href_link[0]}&regex={quote_plus(href_link[1])}"
         return stac_link
 
+    # def to_pystac_link_obj(self, input_dict: dict):
+    #     return
+
     def to_stac(self, source: dict) -> dict:
         source_sample = {
             "createdAt": 1647992847582,
@@ -366,6 +372,26 @@ def to_stac(self, source: dict) -> dict:
             "url_path": "{cmrMetadata.Granule.Collection.ShortName}___{cmrMetadata.Granule.Collection.VersionId}",
             "timestamp": 1647992849273
         }
+        # TemporalIntervals([
+        #     datetime.strptime(source['dateFrom'])
+        # ])
+        # stac_collection = pystac.Collection(
+        #     id=f"{source['name']}___{source['version']}",
+        #     description='TODO',
+        #     extent=Extent(
+        #         SpatialExtent([[0, 0, 0, 0]]),
+        #         TemporalExtent([[source['dateFrom'] if 'dateFrom' in source else None,
+        #                          source['dateTo'] if 'dateTo' in source else None]])
+        #     ),
+        #     summaries=Summaries({
+        #         "granuleId": [source['granuleId'] if 'granuleId' in source else ''],
+        #         "granuleIdExtraction": [source['granuleIdExtraction'] if 'granuleIdExtraction' in source else ''],
+        #         "process": [source['process'] if 'process' in source else ''],
+        #         "totalGranules": [source['total_size'] if 'total_size' in source else -1],
+        #     }),
+        # )
+        # stac_collection.get_root_link().target = './collection.json'
+        # stac_collection.add_links([Link.from_dict(k) for k in [self.__convert_to_stac_links(k) for k in source['files']]])
         stac_collection = {
             "type": "Collection",
             "stac_version": "1.0.0",
@@ -380,16 +406,24 @@ def to_stac(self, source: dict) -> dict:
                     "bbox": [[0, 0, 0, 0]]
                 },
                 "temporal": {
-                    "interval": [[None, None]]
+                    "interval": [[source['dateFrom'] if 'dateFrom' in source else None,
+                                 source['dateTo'] if 'dateTo' in source else None
+                                  ]]
                 }
             },
             "assets": {},
             "summaries": {
-                "granuleId":  source['granuleId'] if 'granuleId' in source else '',
-                "granuleIdExtraction": source['granuleIdExtraction'] if 'granuleIdExtraction' in source else '',
-                "process": source['process'] if 'process' in source else '',
+                "granuleId": [source['granuleId'] if 'granuleId' in source else ''],
+                "granuleIdExtraction": [source['granuleIdExtraction'] if 'granuleIdExtraction' in source else ''],
+                "process": [source['process'] if 'process' in source else ''],
+                "totalGranules": [source['total_size'] if 'total_size' in source else -1],
             },
-            "links": [self.__convert_to_stac_links(k) for k in source['files']],
+            "links": [{
+                        "rel": "root",
+                        "type": "application/json",
+                        "title": f"{source['name']}___{source['version']}",
+                        "href": "./collection.json"
+                    }] + [self.__convert_to_stac_links(k) for k in source['files']],
         }
         return stac_collection
 

diff --git a/cumulus_lambda_functions/cumulus_upload_granules/upload_granules.py b/cumulus_lambda_functions/cumulus_upload_granules/upload_granules.py
@@ -1,11 +1,9 @@
-import json
 import logging
 import os
 import re
 from collections import defaultdict
 from glob import glob
-
-import requests
+from urllib.parse import urlparse, unquote_plus
 
 from cumulus_lambda_functions.cumulus_dapa_client.dapa_client import DapaClient
 from cumulus_lambda_functions.lib.aws.aws_s3 import AwsS3
@@ -50,9 +48,19 @@ def __set_props_from_env(self):
         self.__delete_files = os.environ.get(self.DELETE_FILES_KEY, 'FALSE').strip().upper() == 'TRUE'
         return self
 
+    def __get_href(self, input_href: str):
+        parse_result = urlparse(input_href)
+        if parse_result.query == '':
+            return ''
+        query_dict = [k.split('=') for k in parse_result.query.split('&')]
+        query_dict = {k[0]: unquote_plus(k[1]) for k in query_dict}
+        if 'regex' not in query_dict:
+            raise ValueError(f'missing regex in {input_href}')
+        return query_dict['regex']
+
     def __sort_granules(self):
-        file_regex_list = {k['type']: k['href'].split('___')[-1] for k in self.__collection_details['links'] if not k['title'].endswith('cmr.xml')}
-        granule_id_extraction = self.__collection_details['summaries']['granuleIdExtraction']
+        file_regex_list = {k['type']: self.__get_href(k['href']) for k in self.__collection_details['links'] if k['rel'] != 'root' and not k['title'].endswith('cmr.xml')}
+        granule_id_extraction = self.__collection_details['summaries']['granuleIdExtraction'][0]
         granules = defaultdict(dict)
         for each_file in self.__raw_files:
             each_filename = os.path.basename(each_file)