diff --git a/.github/workflows/makefile.yml b/.github/workflows/makefile.yml index 72e24062..980087d7 100644 --- a/.github/workflows/makefile.yml +++ b/.github/workflows/makefile.yml @@ -19,7 +19,7 @@ jobs: with: python-version: '3.9' - run: | - python3 "${GITHUB_WORKSPACE}/setup.py" install + python3 -m pip install -r "${GITHUB_WORKSPACE}/requirements.txt" - run: | python3 "${GITHUB_WORKSPACE}/setup.py" install_lib - run: | diff --git a/README.md b/README.md index 67d9bdb8..3f8a05af 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ -
This repository contains source code that handles data ingest, data catalog, data search and data access that complies to OGC DAPA and STAC specifications.+
This repository contains source code that handles data ingest, data catalog, data search and data access that complies to OGC DAPA and STAC specificationsdiff --git a/ci.cd/Makefile b/ci.cd/Makefile index 7590edfd..d9023867 100644 --- a/ci.cd/Makefile +++ b/ci.cd/Makefile @@ -21,7 +21,7 @@ upload_lambda: aws --profile saml-pub s3 cp cumulus_lambda_functions_deployment.zip s3://am-uds-dev-cumulus-tf-state/unity_cumulus_lambda/ upload_lambda_mcp_dev: - aws s3 cp cumulus_lambda_functions_deployment.zip s3://uds-dev-cumulus-public/unity_cumulus_lambda/ + aws s3 cp tf-module/unity-cumulus/build/cumulus_lambda_functions_deployment.zip s3://uds-dev-cumulus-public/unity_cumulus_lambda/ update_lambda_function_mcp_dev_6: aws lambda update-function-code --s3-key unity_cumulus_lambda/cumulus_lambda_functions_deployment.zip --s3-bucket uds-dev-cumulus-public --function-name arn:aws:lambda:us-west-2:237868187491:function:uds-dev-cumulus-metadata_s4pa_generate_cmr --publish &>/dev/null update_lambda_function_mcp_dev_7: @@ -49,6 +49,9 @@ update_lambda_function_mcp_sbx_7: update_lambda_function_mcp_sbx_8: aws lambda update-function-code --s3-key unity_cumulus_lambda/cumulus_lambda_functions_deployment.zip --s3-bucket uds-dev-cumulus-public --function-name arn:aws:lambda:us-west-2:237868187491:function:uds-sbx-cumulus-granules_to_es --publish &>/dev/null +update_lambda_function_mcp_sbx_ingester: + aws lambda update-function-code --s3-key unity_cumulus_lambda/cumulus_lambda_functions_deployment.zip --s3-bucket uds-dev-cumulus-public --function-name arn:aws:lambda:us-west-2:237868187491:function:uds-sbx-cumulus-granules_cnm_ingester --publish &>/dev/null + mcp_sbx: upload_lambda_mcp_dev update_lambda_function_mcp_sbx_7 update_lambda_function_mcp_sbx_8 mcp_sbx_fastapi: upload_lambda_mcp_dev update_lambda_function_mcp_sbx_uds_api diff --git a/cumulus_lambda_functions/granules_cnm_ingester/granules_cnm_ingester_logic.py b/cumulus_lambda_functions/granules_cnm_ingester/granules_cnm_ingester_logic.py new file mode 100644 index 00000000..47bad3ad --- /dev/null +++ b/cumulus_lambda_functions/granules_cnm_ingester/granules_cnm_ingester_logic.py @@ -0,0 +1,193 @@ +import os +import time + +from cumulus_lambda_functions.lib.aws.aws_message_transformers import AwsMessageTransformers +from cumulus_lambda_functions.lib.uds_db.uds_collections import UdsCollections + +from cumulus_lambda_functions.stage_in_out.stage_in_out_utils import StageInOutUtils + +from cumulus_lambda_functions.uds_api.dapa.collections_dapa_cnm import CollectionsDapaCnm + +from cumulus_lambda_functions.cumulus_stac.unity_collection_stac import UnityCollectionStac +from cumulus_lambda_functions.uds_api.dapa.collections_dapa_creation import CollectionDapaCreation +from cumulus_lambda_functions.cumulus_stac.item_transformer import ItemTransformer +from pystac import ItemCollection, Item +from cumulus_lambda_functions.lib.utils.file_utils import FileUtils +from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator +from cumulus_lambda_functions.lib.aws.aws_s3 import AwsS3 + +LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env()) + +""" +TODO + +UNITY_DEFAULT_PROVIDER +CUMULUS_WORKFLOW_NAME +REPORT_TO_EMS +CUMULUS_WORKFLOW_SQS_URL +CUMULUS_LAMBDA_PREFIX +ES_URL +ES_PORT +SNS_TOPIC_ARN +""" +class GranulesCnmIngesterLogic: + def __init__(self): + self.__s3 = AwsS3() + self.__successful_features_json = None + self.__successful_features: ItemCollection = None + self.__collection_id = None + self.__chunk_size = StageInOutUtils.CATALOG_DEFAULT_CHUNK_SIZE + if 'UNITY_DEFAULT_PROVIDER' not in os.environ: + raise ValueError(f'missing UNITY_DEFAULT_PROVIDER') + self.__default_provider = os.environ.get('UNITY_DEFAULT_PROVIDER') + self.__uds_collection = UdsCollections(es_url=os.getenv('ES_URL'), es_port=int(os.getenv('ES_PORT', '443'))) + + @property + def successful_features_json(self): + return self.__successful_features_json + + @successful_features_json.setter + def successful_features_json(self, val): + """ + :param val: + :return: None + """ + self.__successful_features_json = val + return + + @property + def collection_id(self): + return self.__collection_id + + @collection_id.setter + def collection_id(self, val): + """ + :param val: + :return: None + """ + self.__collection_id = val + return + + @property + def successful_features(self): + return self.__successful_features + + @successful_features.setter + def successful_features(self, val): + """ + :param val: + :return: None + """ + self.__successful_features = val + return + + def load_successful_features_s3(self, successful_features_s3_url): + self.__s3.set_s3_url(successful_features_s3_url) + if not self.__s3.exists(self.__s3.target_bucket, self.__s3.target_key): + LOGGER.error(f'missing successful_features: {successful_features_s3_url}') + raise ValueError(f'missing successful_features: {successful_features_s3_url}') + local_successful_features = self.__s3.download('/tmp') + self.__successful_features_json = FileUtils.read_json(local_successful_features) + FileUtils.remove_if_exists(local_successful_features) + self.__successful_features = ItemCollection.from_dict(self.__successful_features_json) + return + + def validate_granules(self): + if self.successful_features is None: + raise RuntimeError(f'NULL successful_features') + missing_granules = [] + for each_granule in self.successful_features.items: + missing_assets = [] + for each_asset_name, each_asset in each_granule.assets.items(): + temp_bucket, temp_key = self.__s3.split_s3_url(each_asset.href) + if not self.__s3.exists(temp_bucket, temp_key): + missing_assets.append({each_asset_name: each_asset.href}) + if len(missing_assets) > 0: + missing_granules.append({ + 'granule_id': each_granule.id, + 'missing_assets': missing_assets + }) + if len(missing_granules) > 0: + LOGGER.error(f'missing_granules: {missing_granules}') + raise ValueError(f'missing_granules: {missing_granules}') + return + + def extract_collection_id(self): + if self.successful_features is None: + raise RuntimeError(f'NULL successful_features') + if len(self.successful_features.items) < 1: + LOGGER.error(f'not required to process. No Granules: {self.successful_features.to_dict(False)}') + return + self.collection_id = self.successful_features.items[0].collection_id + return + + def has_collection(self): + uds_collection_result = self.__uds_collection.get_collection(self.collection_id) + return len(uds_collection_result) > 0 + + def create_collection(self): + if self.collection_id is None: + raise RuntimeError(f'NULL collection_id') + if self.has_collection(): + LOGGER.debug(f'{self.collection_id} already exists. continuing..') + return + # ref: https://github.com/unity-sds/unity-py/blob/0.4.0/unity_sds_client/services/data_service.py + dapa_collection = UnityCollectionStac() \ + .with_id(self.collection_id) \ + .with_graule_id_regex("^test_file.*$") \ + .with_granule_id_extraction_regex("(^test_file.*)(\\.nc|\\.nc\\.cas|\\.cmr\\.xml)") \ + .with_title(f'Collection: {self.collection_id}') \ + .with_process('stac') \ + .with_provider(self.__default_provider) \ + .add_file_type("test_file01.nc", "^test_file.*\\.nc$", 'unknown_bucket', 'application/json', 'root') \ + .add_file_type("test_file01.nc", "^test_file.*\\.nc$", 'protected', 'data', 'item') \ + .add_file_type("test_file01.nc.cas", "^test_file.*\\.nc.cas$", 'protected', 'metadata', 'item') \ + .add_file_type("test_file01.nc.cmr.xml", "^test_file.*\\.nc.cmr.xml$", 'protected', 'metadata', 'item') \ + .add_file_type("test_file01.nc.stac.json", "^test_file.*\\.nc.stac.json$", 'protected', 'metadata', 'item') + + stac_collection = dapa_collection.start() + creation_result = CollectionDapaCreation(stac_collection).create() + if creation_result['statusCode'] >= 400: + raise RuntimeError(f'failed to create collection: {self.collection_id}. details: {creation_result["body"]}') + time.sleep(3) # cool off period before checking DB + if not self.has_collection(): + LOGGER.error(f'missing collection. (failed to create): {self.collection_id}') + raise ValueError(f'missing collection. (failed to create): {self.collection_id}') + return + + def send_cnm_msg(self): + LOGGER.debug(f'starting ingest_cnm_dapa_actual') + try: + errors = [] + for i, features_chunk in enumerate(StageInOutUtils.chunk_list(self.successful_features_json['features'], self.__chunk_size)): + try: + LOGGER.debug(f'working on chunk_index {i}') + dapa_body = { + "provider_id": self.__default_provider, + "features": features_chunk + } + collections_dapa_cnm = CollectionsDapaCnm(dapa_body) + cnm_result = collections_dapa_cnm.start() + if cnm_result['statusCode'] != 200: + errors.extend(features_chunk) + except Exception as e1: + LOGGER.exception(f'failed to queue CNM process.') + errors.extend(features_chunk) + except Exception as e: + LOGGER.exception('failed to ingest to CNM') + raise ValueError(f'failed to ingest to CNM: {e}') + if len(errors) > 0: + raise RuntimeError(f'failures during CNM ingestion: {errors}') + return + + def start(self, event): + LOGGER.debug(f'event: {event}') + sns_msg = AwsMessageTransformers().sqs_sns(event) + s3_details = AwsMessageTransformers().get_s3_from_sns(sns_msg) + s3_url = f's3://{s3_details["bucket"]}/{s3_details["key"]}' + self.load_successful_features_s3(s3_url) + self.validate_granules() + self.extract_collection_id() + self.create_collection() + self.send_cnm_msg() + return diff --git a/cumulus_lambda_functions/granules_cnm_ingester/lambda_function.py b/cumulus_lambda_functions/granules_cnm_ingester/lambda_function.py index 626431a1..b5f7cbaf 100644 --- a/cumulus_lambda_functions/granules_cnm_ingester/lambda_function.py +++ b/cumulus_lambda_functions/granules_cnm_ingester/lambda_function.py @@ -1,4 +1,6 @@ import json + +from cumulus_lambda_functions.granules_cnm_ingester.granules_cnm_ingester_logic import GranulesCnmIngesterLogic from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator @@ -7,7 +9,8 @@ def lambda_handler(event, context): :param event: :param context: :return: + {'Records': [{'messageId': '6ff7c6fd-4053-4ab4-bc12-c042be1ed275', 'receiptHandle': 'AQEBYASiFPjQT5JBI2KKCTF/uQhHfJt/tHhgucslQQdvkNVxcXCNi2E5Ux4U9N0eu7RfvlnvtycjUh0gdL7jIeoyH+VRKSF61uAJuT4p31BsNe0GYu49N9A6+kxjP/RrykR7ZofmQRdHToX1ugRc76SMRic4H/ZZ89YAHA2QeomJFMrYywIxlk8OAzYaBf2dQI7WexjY5u1CW00XNMbTGyTo4foVPxcSn6bdFpfgxW/L7yJMX/0YQvrA9ruiuQ+lrui+6fWYh5zEk3f5v1bYtUQ6DtyyfbtMHZQJTJpUlWAFRzzN+3melilH7FySyOGDXhPb0BOSzmdKq9wBbfLW/YPb7l99ejq4GfRfj8LyI4EtB96vTeUw4LCgUqbZcBrxbGBLUXMacweh+gCjHav9ylqr2SeOiqG3vWPq9pwFYQIDqNE=', 'body': '{\n "Type" : "Notification",\n "MessageId" : "33e1075a-435c-5217-a33d-59fae85e19b2",\n "TopicArn" : "arn:aws:sns:us-west-2:237868187491:uds-sbx-cumulus-granules_cnm_ingester",\n "Subject" : "Amazon S3 Notification",\n "Message" : "{\\"Service\\":\\"Amazon S3\\",\\"Event\\":\\"s3:TestEvent\\",\\"Time\\":\\"2024-04-22T18:13:22.416Z\\",\\"Bucket\\":\\"uds-sbx-cumulus-staging\\",\\"RequestId\\":\\"DQ4T0GRVFPSX45C9\\",\\"HostId\\":\\"gHBFnYNmfnGDZBmqoQwA3RScjtjBk5lr426moGxu8IDpe5UhWAqNTxHqilWBoPN1njzIrzNrf8c=\\"}",\n "Timestamp" : "2024-04-22T18:13:22.434Z",\n "SignatureVersion" : "1",\n "Signature" : "RvSxqpU7J7CCJXbin9cXqTxzjMjgAUFtk/n454mTMcOe5x3Ay1w4AHfzyeYQCFBdLHNBa8n3OdMDoDlJqyVQMb8k+nERaiZWN2oqFVDRqT9pqSr89b+4FwlhPv6TYy2pBa/bgjZ4cOSYsey1uSQ3hjl0idfssvuV5cCRxQScbA+yu8Gcv9K7Oqgy01mC0sDHiuPIifhFXxupG5ygbjqoHIB+1gdMEbBwyixoY5GOpHM/O2uHNF+dJDjax1WMxQ2FzVjiFeCa+tNcjovF059+tx2v1YmDq/kEAFrN6DAtP6R4zKag62P9jkvjU/wHYJ2jjXmZAqoG+nuzAo24HiZPSw==",\n "SigningCertURL" : "https://sns.us-west-2.amazonaws.com/SimpleNotificationService-60eadc530605d63b8e62a523676ef735.pem",\n "UnsubscribeURL" : "https://sns.us-west-2.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-west-2:237868187491:uds-sbx-cumulus-granules_cnm_ingester:76cbefa1-addf-45c2-97e1-ae16986b195b"\n}', 'attributes': {'ApproximateReceiveCount': '1', 'SentTimestamp': '1713809602474', 'SenderId': 'AIDAIYLAVTDLUXBIEIX46', 'ApproximateFirstReceiveTimestamp': '1713809602483'}, 'messageAttributes': {}, 'md5OfBody': 'c6d06d1b742ad5bd2cfe5f542640aad2', 'eventSource': 'aws:sqs', 'eventSourceARN': 'arn:aws:sqs:us-west-2:237868187491:uds-sbx-cumulus-granules_cnm_ingester', 'awsRegion': 'us-west-2'}]} """ LambdaLoggerGenerator.remove_default_handlers() - print(f'event: {event}') - raise NotImplementedError('Require implementation later') + GranulesCnmIngesterLogic().start(event) + return diff --git a/cumulus_lambda_functions/lib/aws/aws_message_transformers.py b/cumulus_lambda_functions/lib/aws/aws_message_transformers.py index 0fe119aa..101b383b 100644 --- a/cumulus_lambda_functions/lib/aws/aws_message_transformers.py +++ b/cumulus_lambda_functions/lib/aws/aws_message_transformers.py @@ -1,4 +1,5 @@ import json +from urllib.parse import unquote from cumulus_lambda_functions.lib.json_validator import JsonValidator @@ -29,8 +30,15 @@ class AwsMessageTransformers: "Type": {"type": "string"}, "MessageId": {"type": "string"}, "TopicArn": {"type": "string"}, + "Subject": {"type": "string"}, + "Timestamp": {"type": "string"}, + "SignatureVersion": {"type": "string"}, + "Signature": {"type": "string"}, + "SigningCertURL": {"type": "string"}, + "UnsubscribeURL": {"type": "string"}, "Message": {"type": "string"}, - } + }, + "required": ["Message"] } S3_RECORD_SCHEMA = { @@ -41,22 +49,25 @@ class AwsMessageTransformers: 'maxItems': 1, 'items': { 'type': 'object', - 'properties': {'s3': { - 'type': 'object', - 'properties': { - 'bucket': { - 'type': 'object', - 'properties': {'name': {'type': 'string', 'minLength': 1}}, - 'required': ['name'] - }, - 'object': { - 'type': 'object', - 'properties': {'key': {'type': 'string', 'minLength': 1}}, - 'required': ['key'] - }}, - 'required': ['bucket', 'object'] - }}, - 'required': ['s3'] + 'properties': { + 'eventName': {'type': 'string'}, + 's3': { + 'type': 'object', + 'properties': { + 'bucket': { + 'type': 'object', + 'properties': {'name': {'type': 'string', 'minLength': 1}}, + 'required': ['name'] + }, + 'object': { + 'type': 'object', + 'properties': {'key': {'type': 'string', 'minLength': 1}}, + 'required': ['key'] + }}, + 'required': ['bucket', 'object'] + } + }, + 'required': ['eventName', 's3'] } }}, 'required': ['Records'] @@ -74,3 +85,14 @@ def sqs_sns(self, raw_msg: json): sns_msg_body = sqs_msg_body['Message'] sns_msg_body = json.loads(sns_msg_body) return sns_msg_body + + def get_s3_from_sns(self, sns_msg_body): + result = JsonValidator(self.S3_RECORD_SCHEMA).validate(sns_msg_body) + if result is not None: + raise ValueError(f'sqs_msg did not pass SQS_MSG_SCHEMA: {result}') + s3_summary = { + 'eventName': sns_msg_body['Records'][0]['eventName'], + 'bucket': sns_msg_body['Records'][0]['s3']['bucket']['name'], + 'key': unquote(sns_msg_body['Records'][0]['s3']['object']['key'].replace('+', ' ')), + } + return s3_summary \ No newline at end of file diff --git a/cumulus_lambda_functions/lib/aws/aws_s3.py b/cumulus_lambda_functions/lib/aws/aws_s3.py index 4467c931..f4cbc4ae 100644 --- a/cumulus_lambda_functions/lib/aws/aws_s3.py +++ b/cumulus_lambda_functions/lib/aws/aws_s3.py @@ -58,6 +58,13 @@ def __upload_to_s3(self, bucket, prefix, file_path, delete_files=False, add_size raise e return f's3://{bucket}/{s3_key}' + def exists(self, base_path: str, relative_path: str): + try: + response = self.__s3_client.head_object(Bucket=base_path, Key=relative_path) + except: + return False + return True + def upload(self, file_path: str, base_path: str, relative_parent_path: str, delete_files: bool, s3_name: Union[str, None] = None, obj_tags: dict = {}, overwrite: bool = False): s3_url = self.__upload_to_s3(base_path, relative_parent_path, file_path, delete_files, True, obj_tags, s3_name) diff --git a/cumulus_lambda_functions/lib/uds_db/uds_collections.py b/cumulus_lambda_functions/lib/uds_db/uds_collections.py index d37f625c..17cf06c4 100644 --- a/cumulus_lambda_functions/lib/uds_db/uds_collections.py +++ b/cumulus_lambda_functions/lib/uds_db/uds_collections.py @@ -70,6 +70,25 @@ def add_collection(self, collection_id: str, start_time: int, end_time: int, bbo self.__es.index_one(indexing_dict, collection_id, DBConstants.collections_index) return self + def get_collection(self, collection_id: str): + authorized_collection_ids_dsl = { + 'size': 20, + 'query': { + 'bool': { + 'must': [ + {'term': {DBConstants.collection_id: {'value': collection_id}}} + ] + } + }, + 'sort': [ + {DBConstants.collection_id: {'order': 'asc'}} + ] + } + LOGGER.debug(f'authorized_collection_ids_dsl: {authorized_collection_ids_dsl}') + authorized_collection_ids = self.__es.query(authorized_collection_ids_dsl, DBConstants.collections_index) + authorized_collection_ids = [k['_source'] for k in authorized_collection_ids['hits']['hits']] + return authorized_collection_ids + def get_collections(self, collection_regex: list): # temp_dsl = { # 'query': {'match_all': {}}, diff --git a/cumulus_lambda_functions/stage_in_out/catalog_granules_unity.py b/cumulus_lambda_functions/stage_in_out/catalog_granules_unity.py index ed1691f0..53811749 100644 --- a/cumulus_lambda_functions/stage_in_out/catalog_granules_unity.py +++ b/cumulus_lambda_functions/stage_in_out/catalog_granules_unity.py @@ -19,7 +19,6 @@ class CatalogGranulesUnity(CatalogGranulesAbstract): DELAY_SECOND = 'DELAY_SECOND' REPEAT_TIMES = 'REPEAT_TIMES' CHUNK_SIZE = 'CHUNK_SIZE' - DEFAULT_CHUNK_SIZE = 5 def __init__(self) -> None: super().__init__() @@ -27,15 +26,15 @@ def __init__(self) -> None: self.__verify_ssl = True self.__delaying_second = 30 self.__repeating_times = 0 - self.__chunk_size = self.DEFAULT_CHUNK_SIZE + self.__chunk_size = StageInOutUtils.CATALOG_DEFAULT_CHUNK_SIZE def __set_props_from_env(self): missing_keys = [k for k in [self.UPLOADED_FILES_JSON, self.PROVIDER_ID_KEY] if k not in os.environ] if len(missing_keys) > 0: raise ValueError(f'missing environment keys: {missing_keys}') self._retrieve_stac_json() - self.__chunk_size = int(os.environ.get(self.CHUNK_SIZE, self.DEFAULT_CHUNK_SIZE)) - self.__chunk_size = self.__chunk_size if self.__chunk_size > 0 else self.DEFAULT_CHUNK_SIZE + self.__chunk_size = int(os.environ.get(self.CHUNK_SIZE, StageInOutUtils.CATALOG_DEFAULT_CHUNK_SIZE)) + self.__chunk_size = self.__chunk_size if self.__chunk_size > 0 else StageInOutUtils.CATALOG_DEFAULT_CHUNK_SIZE self.__provider_id = os.environ.get(self.PROVIDER_ID_KEY) self.__verify_ssl = os.environ.get(self.VERIFY_SSL_KEY, 'TRUE').strip().upper() == 'TRUE' self.__delaying_second = int(os.environ.get(self.DELAY_SECOND, '30')) diff --git a/cumulus_lambda_functions/stage_in_out/stage_in_out_utils.py b/cumulus_lambda_functions/stage_in_out/stage_in_out_utils.py index 70d1b6ea..3de73a1c 100644 --- a/cumulus_lambda_functions/stage_in_out/stage_in_out_utils.py +++ b/cumulus_lambda_functions/stage_in_out/stage_in_out_utils.py @@ -10,6 +10,7 @@ class StageInOutUtils: OUTPUT_FILE = 'OUTPUT_FILE' + CATALOG_DEFAULT_CHUNK_SIZE = 5 @staticmethod def write_output_to_file(output_json: Union[dict, str, list]): diff --git a/docker/Dockerfile_download_granules.jpl b/docker/Dockerfile_download_granules.jpl index 0d1b56f4..895a375f 100644 --- a/docker/Dockerfile_download_granules.jpl +++ b/docker/Dockerfile_download_granules.jpl @@ -5,8 +5,8 @@ RUN apt-get update -y && apt-get install vim -y ENV PYTHONPATH "${PYTHONPATH}:/usr/src/app/unity" RUN python -m pip install boto3 RUN mkdir -p /usr/src/app/unity -COPY setup.py ./setup.py -RUN ["python", "setup.py", "install"] +COPY requirements.txt ./requirements.txt +RUN python3 -m pip install -r requirements.txt COPY setup.py /usr/src/app/unity/setup.py COPY cumulus_lambda_functions /usr/src/app/unity/cumulus_lambda_functions diff --git a/docker/Dockerfile_download_granules.public b/docker/Dockerfile_download_granules.public index c74f9e26..16dd71a4 100644 --- a/docker/Dockerfile_download_granules.public +++ b/docker/Dockerfile_download_granules.public @@ -5,8 +5,8 @@ RUN apt-get update -y && apt-get install vim -y ENV PYTHONPATH "${PYTHONPATH}:/usr/src/app/unity" RUN python -m pip install boto3 RUN mkdir -p /usr/src/app/unity -COPY setup.py ./setup.py -RUN ["python", "setup.py", "install"] +COPY requirements.txt ./requirements.txt +RUN python3 -m pip install -r requirements.txt COPY setup.py /usr/src/app/unity/setup.py COPY cumulus_lambda_functions /usr/src/app/unity/cumulus_lambda_functions diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..291699f8 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,39 @@ +annotated-types==0.6.0 +anyio==4.3.0 +attrs==23.2.0 +certifi==2024.2.2 +charset-normalizer==3.3.2 +click==8.1.7 +dateparser==1.2.0 +elasticsearch==7.13.4 +exceptiongroup==1.2.1 +fastapi==0.110.3 +fastjsonschema==2.19.1 +h11==0.14.0 +idna==3.7 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==0.12.0 +mangum==0.17.0 +pydantic==2.7.1 +pydantic_core==2.18.2 +pygeofilter==0.2.1 +pygeoif==1.4.0 +pystac==1.9.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +pytz==2024.1 +referencing==0.35.0 +regex==2024.4.28 +requests==2.31.0 +requests-aws4auth==1.2.3 +rpds-py==0.18.0 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +tenacity==8.2.3 +typing_extensions==4.11.0 +tzlocal==5.2 +urllib3==1.26.18 +uvicorn==0.29.0 +xmltodict==0.13.0 \ No newline at end of file diff --git a/tests/cumulus_lambda_functions/granules_cnm_ingester/__init__.py b/tests/cumulus_lambda_functions/granules_cnm_ingester/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/cumulus_lambda_functions/granules_cnm_ingester/test_granules_cnm_ingester_logic.py b/tests/cumulus_lambda_functions/granules_cnm_ingester/test_granules_cnm_ingester_logic.py new file mode 100644 index 00000000..7849c00e --- /dev/null +++ b/tests/cumulus_lambda_functions/granules_cnm_ingester/test_granules_cnm_ingester_logic.py @@ -0,0 +1,65 @@ +import json +import os +from unittest import TestCase + +from cumulus_lambda_functions.lib.time_utils import TimeUtils +from pystac import ItemCollection + +from cumulus_lambda_functions.granules_cnm_ingester.granules_cnm_ingester_logic import GranulesCnmIngesterLogic + + +class TestGranulesCnmIngesterLogic(TestCase): + + def __init__(self, methodName: str = ...) -> None: + super().__init__(methodName) + os.environ['SNS_TOPIC_ARN'] = 'arn:aws:sns:us-west-2:237868187491:uds-sbx-cumulus-cnm-submission-sns' + os.environ['COLLECTION_CREATION_LAMBDA_NAME'] = 'NA' + os.environ['UNITY_DEFAULT_PROVIDER'] = 'unity' + os.environ['CUMULUS_WORKFLOW_NAME'] = 'CatalogGranule' + os.environ['REPORT_TO_EMS'] = 'FALSE' + os.environ['CUMULUS_LAMBDA_PREFIX'] = 'uds-sbx-cumulus' + os.environ['CUMULUS_WORKFLOW_SQS_URL'] = 'https://sqs.us-west-2.amazonaws.com/237868187491/uds-sbx-cumulus-cnm-submission-queue' + os.environ['ES_URL'] = 'vpc-uds-sbx-cumulus-es-qk73x5h47jwmela5nbwjte4yzq.us-west-2.es.amazonaws.com' + os.environ['ES_PORT'] = '9200' + + def test_load_successful_features_s3(self): + s3_url = 's3://uds-sbx-cumulus-staging/integration_test/stag_eout/successful_features_2024-04-11T21:21:13.019769.json' + a = GranulesCnmIngesterLogic().load_successful_features_s3(s3_url) + return + + def test_validate_granules(self): + result = {"type": "FeatureCollection", "features": [{"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file01", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file01.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file01/test_file01.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file01.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file01/test_file01.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file01.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file01/test_file01.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file02", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file02.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file02/test_file02.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file02.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file02/test_file02.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file02.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file02/test_file02.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file03", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file03.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file03/test_file03.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file03.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file03/test_file03.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file03.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file03/test_file03.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file04", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file04.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file04/test_file04.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file04.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file04/test_file04.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file04.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file04/test_file04.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file05", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file05.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file05/test_file05.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file05.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file05/test_file05.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file05.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file05/test_file05.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file06", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file06.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file06/test_file06.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file06.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file06/test_file06.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file06.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file06/test_file06.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file08", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file08.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file08/test_file08.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file08.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file08/test_file08.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file08.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file08/test_file08.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file07", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file07.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file07/test_file07.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file07.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file07/test_file07.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file07.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file07/test_file07.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file10", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file10.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file10/test_file10.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file10.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file10/test_file10.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file10.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file10/test_file10.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file09", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file09.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file09/test_file09.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file09.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file09/test_file09.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file09.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file09/test_file09.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}]} + result = ItemCollection.from_dict(result) + a = GranulesCnmIngesterLogic() + a.successful_features = result + a.validate_granules() + return + + def test_extract_collection_id(self): + result = {"type": "FeatureCollection", "features": [{"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file01", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file01.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file01/test_file01.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file01.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file01/test_file01.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file01.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file01/test_file01.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file02", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file02.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file02/test_file02.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file02.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file02/test_file02.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file02.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file02/test_file02.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file03", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file03.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file03/test_file03.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file03.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file03/test_file03.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file03.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file03/test_file03.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file04", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file04.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file04/test_file04.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file04.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file04/test_file04.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file04.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file04/test_file04.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file05", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file05.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file05/test_file05.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file05.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file05/test_file05.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file05.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file05/test_file05.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file06", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file06.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file06/test_file06.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file06.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file06/test_file06.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file06.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file06/test_file06.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file08", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file08.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file08/test_file08.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file08.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file08/test_file08.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file08.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file08/test_file08.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file07", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file07.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file07/test_file07.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file07.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file07/test_file07.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file07.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file07/test_file07.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file10", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file10.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file10/test_file10.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file10.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file10/test_file10.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file10.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file10/test_file10.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file09", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file09.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file09/test_file09.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file09.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file09/test_file09.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file09.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file09/test_file09.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}]} + result = ItemCollection.from_dict(result) + a = GranulesCnmIngesterLogic() + a.successful_features = result + a.extract_collection_id() + self.assertEqual(a.collection_id, 'NEW_COLLECTION_EXAMPLE_L1B___9') + return + + def test_create_collection(self): + a = GranulesCnmIngesterLogic() + self.tenant = 'UDS_LOCAL_TEST' # 'uds_local_test' # 'uds_sandbox' + self.tenant_venue = 'DEV' # 'DEV1' # 'dev' + self.collection_name = 'UDS_COLLECTION_CNM_INGESTION' # 'uds_collection' # 'sbx_collection' + self.collection_version = '24.04.24.06.00'.replace('.', '') # '2402011200' + temp_collection_id = f'URN:NASA:UNITY:{self.tenant}:{self.tenant_venue}:{self.collection_name}___{self.collection_version}' + a.collection_id = temp_collection_id + a.create_collection() + return + + def test_send_cnm_msg(self): + result = '''{"type": "FeatureCollection", "features": [{"type": "Feature", "stac_version": "1.0.0", "id": "URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:UDS_COLLECTION___2404240700:abcd.1234.efgh.test_file05", "properties": {"tag": "#sample", "c_data1": [1, 10, 100, 1000], "c_data2": [false, true, true, false, true], "c_data3": ["Bellman Ford"], "soil10": {"0_0": 0, "0_1": 1, "0_2": 0}, "start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"abcd.1234.efgh.test_file05.data.stac.json": {"href": "s3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:UDS_COLLECTION___2404240700/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:UDS_COLLECTION___2404240700:abcd.1234.efgh.test_file05/abcd.1234.efgh.test_file05.data.stac.json", "title": "abcd.1234.efgh.test_file05.data.stac.json", "roles": ["data"]}, "abcd.1234.efgh.test_file05.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:UDS_COLLECTION___2404240700/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:UDS_COLLECTION___2404240700:abcd.1234.efgh.test_file05/abcd.1234.efgh.test_file05.nc.cas", "title": "abcd.1234.efgh.test_file05.nc.cas", "roles": ["metadata"]}, "abcd.1234.efgh.test_file05.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:UDS_COLLECTION___2404240700/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:UDS_COLLECTION___2404240700:abcd.1234.efgh.test_file05/abcd.1234.efgh.test_file05.nc.stac.json", "title": "abcd.1234.efgh.test_file05.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:UDS_COLLECTION___2404240700"}]} + ''' + result = json.loads(result) + a = GranulesCnmIngesterLogic() + a.successful_features_json = result + a.send_cnm_msg() + return diff --git a/tests/integration_tests/test_custom_metadata_automated_ingestion.py b/tests/integration_tests/test_custom_metadata_automated_ingestion.py new file mode 100644 index 00000000..2342edd7 --- /dev/null +++ b/tests/integration_tests/test_custom_metadata_automated_ingestion.py @@ -0,0 +1,528 @@ +import base64 +import json +import os +import tempfile +from datetime import datetime +from sys import argv +from time import sleep +from unittest import TestCase + +import pystac +import requests +from cumulus_lambda_functions.lib.aws.aws_s3 import AwsS3 + +from cumulus_lambda_functions.lib.time_utils import TimeUtils +from pystac import Link, Catalog, Asset, Item, ItemCollection + +from cumulus_lambda_functions.docker_entrypoint.__main__ import choose_process + +from cumulus_lambda_functions.lib.utils.file_utils import FileUtils + +from cumulus_lambda_functions.cumulus_stac.unity_collection_stac import UnityCollectionStac + +from cumulus_lambda_functions.lib.cognito_login.cognito_login import CognitoLogin +from dotenv import load_dotenv + + +class TestCustomMetadataEndToEnd(TestCase): + # 1. setup admin for the test venue + # 2. create a custom metadata for the venue + # 3. create a collection + # 4. push granules to cnm w/ custom metadata + # 5. get granules w/ custom metadata + + def setUp(self) -> None: + super().setUp() + load_dotenv() + self.cognito_login = CognitoLogin() \ + .with_client_id(os.environ.get('CLIENT_ID', '')) \ + .with_cognito_url(os.environ.get('COGNITO_URL', '')) \ + .with_verify_ssl(False) \ + .start(base64.standard_b64decode(os.environ.get('USERNAME')).decode(), + base64.standard_b64decode(os.environ.get('PASSWORD')).decode()) + self._url_prefix = f'{os.environ.get("UNITY_URL")}/{os.environ.get("UNITY_STAGE", "sbx-uds-dapa")}' + self.tenant = 'UDS_LOCAL_TEST' # 'uds_local_test' # 'uds_sandbox' + self.tenant_venue = 'DEV' # 'DEV1' # 'dev' + self.collection_name = 'UDS_COLLECTION' # 'uds_collection' # 'sbx_collection' + self.collection_version = '24.04.25.11.00'.replace('.', '') # '2402011200' + + self.custom_metadata_body = { + 'tag': {'type': 'keyword'}, + 'c_data1': {'type': 'long'}, + 'c_data2': {'type': 'boolean'}, + 'c_data3': {'type': 'keyword'}, + 'soil10': { + "properties": { + "0_0": {"type": "integer"}, + "0_1": {"type": "integer"}, + "0_2": {"type": "integer"}, + } + } + } + self.granule_id = 'abcd.1234.efgh.test_file05' + return + + def test_04_upload_sample_granule(self): + custom_metadata = { + 'tag': '#sample', + 'c_data1': [1, 10, 10**2, 10**3], + 'c_data2': [False, True, True, False, True], + 'c_data3': ['Bellman Ford'], + 'soil10': { + "0_0": 0, + "0_1": 1, + "0_2": 0, + } + } + temp_collection_id = f'URN:NASA:UNITY:{self.tenant}:{self.tenant_venue}:{self.collection_name}___{self.collection_version}' + os.environ['VERIFY_SSL'] = 'FALSE' + + os.environ['COLLECTION_ID'] = temp_collection_id + os.environ['STAGING_BUCKET'] = 'uds-sbx-cumulus-staging' + + os.environ['GRANULES_SEARCH_DOMAIN'] = 'UNITY' + # os.environ['GRANULES_UPLOAD_TYPE'] = 'UPLOAD_S3_BY_STAC_CATALOG' + # defaulted to this value + + if len(argv) > 1: + argv.pop(-1) + argv.append('UPLOAD') + + with tempfile.TemporaryDirectory() as tmp_dir_name: + os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json') + os.environ['UPLOAD_DIR'] = '' # not needed + os.environ['CATALOG_FILE'] = os.path.join(tmp_dir_name, 'catalog.json') + os.environ['OUTPUT_DIRECTORY'] = os.path.join(tmp_dir_name, 'output_dir') + FileUtils.mk_dir_p(os.environ.get('OUTPUT_DIRECTORY')) + granules_dir = os.path.join(tmp_dir_name, 'some_granules') + FileUtils.mk_dir_p(granules_dir) + with open(os.path.join(granules_dir, f'{self.granule_id}.data.stac.json'), 'w') as ff: + ff.write('sample_file') + with open(os.path.join(granules_dir, f'{self.granule_id}.nc.cas'), 'w') as ff: + ff.write(''' +