diff --git a/.github/workflows/makefile.yml b/.github/workflows/makefile.yml index 72e24062..980087d7 100644 --- a/.github/workflows/makefile.yml +++ b/.github/workflows/makefile.yml @@ -19,7 +19,7 @@ jobs: with: python-version: '3.9' - run: | - python3 "${GITHUB_WORKSPACE}/setup.py" install + python3 -m pip install -r "${GITHUB_WORKSPACE}/requirements.txt" - run: | python3 "${GITHUB_WORKSPACE}/setup.py" install_lib - run: | diff --git a/README.md b/README.md index 67d9bdb8..3f8a05af 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ -
This repository contains source code that handles data ingest, data catalog, data search and data access that complies to OGC DAPA and STAC specifications.
+
This repository contains source code that handles data ingest, data catalog, data search and data access that complies to OGC DAPA and STAC specifications
diff --git a/ci.cd/Makefile b/ci.cd/Makefile index 7590edfd..d9023867 100644 --- a/ci.cd/Makefile +++ b/ci.cd/Makefile @@ -21,7 +21,7 @@ upload_lambda: aws --profile saml-pub s3 cp cumulus_lambda_functions_deployment.zip s3://am-uds-dev-cumulus-tf-state/unity_cumulus_lambda/ upload_lambda_mcp_dev: - aws s3 cp cumulus_lambda_functions_deployment.zip s3://uds-dev-cumulus-public/unity_cumulus_lambda/ + aws s3 cp tf-module/unity-cumulus/build/cumulus_lambda_functions_deployment.zip s3://uds-dev-cumulus-public/unity_cumulus_lambda/ update_lambda_function_mcp_dev_6: aws lambda update-function-code --s3-key unity_cumulus_lambda/cumulus_lambda_functions_deployment.zip --s3-bucket uds-dev-cumulus-public --function-name arn:aws:lambda:us-west-2:237868187491:function:uds-dev-cumulus-metadata_s4pa_generate_cmr --publish &>/dev/null update_lambda_function_mcp_dev_7: @@ -49,6 +49,9 @@ update_lambda_function_mcp_sbx_7: update_lambda_function_mcp_sbx_8: aws lambda update-function-code --s3-key unity_cumulus_lambda/cumulus_lambda_functions_deployment.zip --s3-bucket uds-dev-cumulus-public --function-name arn:aws:lambda:us-west-2:237868187491:function:uds-sbx-cumulus-granules_to_es --publish &>/dev/null +update_lambda_function_mcp_sbx_ingester: + aws lambda update-function-code --s3-key unity_cumulus_lambda/cumulus_lambda_functions_deployment.zip --s3-bucket uds-dev-cumulus-public --function-name arn:aws:lambda:us-west-2:237868187491:function:uds-sbx-cumulus-granules_cnm_ingester --publish &>/dev/null + mcp_sbx: upload_lambda_mcp_dev update_lambda_function_mcp_sbx_7 update_lambda_function_mcp_sbx_8 mcp_sbx_fastapi: upload_lambda_mcp_dev update_lambda_function_mcp_sbx_uds_api diff --git a/cumulus_lambda_functions/granules_cnm_ingester/granules_cnm_ingester_logic.py b/cumulus_lambda_functions/granules_cnm_ingester/granules_cnm_ingester_logic.py new file mode 100644 index 00000000..47bad3ad --- /dev/null +++ b/cumulus_lambda_functions/granules_cnm_ingester/granules_cnm_ingester_logic.py @@ -0,0 +1,193 @@ +import os +import time + +from cumulus_lambda_functions.lib.aws.aws_message_transformers import AwsMessageTransformers +from cumulus_lambda_functions.lib.uds_db.uds_collections import UdsCollections + +from cumulus_lambda_functions.stage_in_out.stage_in_out_utils import StageInOutUtils + +from cumulus_lambda_functions.uds_api.dapa.collections_dapa_cnm import CollectionsDapaCnm + +from cumulus_lambda_functions.cumulus_stac.unity_collection_stac import UnityCollectionStac +from cumulus_lambda_functions.uds_api.dapa.collections_dapa_creation import CollectionDapaCreation +from cumulus_lambda_functions.cumulus_stac.item_transformer import ItemTransformer +from pystac import ItemCollection, Item +from cumulus_lambda_functions.lib.utils.file_utils import FileUtils +from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator +from cumulus_lambda_functions.lib.aws.aws_s3 import AwsS3 + +LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env()) + +""" +TODO + +UNITY_DEFAULT_PROVIDER +CUMULUS_WORKFLOW_NAME +REPORT_TO_EMS +CUMULUS_WORKFLOW_SQS_URL +CUMULUS_LAMBDA_PREFIX +ES_URL +ES_PORT +SNS_TOPIC_ARN +""" +class GranulesCnmIngesterLogic: + def __init__(self): + self.__s3 = AwsS3() + self.__successful_features_json = None + self.__successful_features: ItemCollection = None + self.__collection_id = None + self.__chunk_size = StageInOutUtils.CATALOG_DEFAULT_CHUNK_SIZE + if 'UNITY_DEFAULT_PROVIDER' not in os.environ: + raise ValueError(f'missing UNITY_DEFAULT_PROVIDER') + self.__default_provider = os.environ.get('UNITY_DEFAULT_PROVIDER') + self.__uds_collection = UdsCollections(es_url=os.getenv('ES_URL'), es_port=int(os.getenv('ES_PORT', '443'))) + + @property + def successful_features_json(self): + return self.__successful_features_json + + @successful_features_json.setter + def successful_features_json(self, val): + """ + :param val: + :return: None + """ + self.__successful_features_json = val + return + + @property + def collection_id(self): + return self.__collection_id + + @collection_id.setter + def collection_id(self, val): + """ + :param val: + :return: None + """ + self.__collection_id = val + return + + @property + def successful_features(self): + return self.__successful_features + + @successful_features.setter + def successful_features(self, val): + """ + :param val: + :return: None + """ + self.__successful_features = val + return + + def load_successful_features_s3(self, successful_features_s3_url): + self.__s3.set_s3_url(successful_features_s3_url) + if not self.__s3.exists(self.__s3.target_bucket, self.__s3.target_key): + LOGGER.error(f'missing successful_features: {successful_features_s3_url}') + raise ValueError(f'missing successful_features: {successful_features_s3_url}') + local_successful_features = self.__s3.download('/tmp') + self.__successful_features_json = FileUtils.read_json(local_successful_features) + FileUtils.remove_if_exists(local_successful_features) + self.__successful_features = ItemCollection.from_dict(self.__successful_features_json) + return + + def validate_granules(self): + if self.successful_features is None: + raise RuntimeError(f'NULL successful_features') + missing_granules = [] + for each_granule in self.successful_features.items: + missing_assets = [] + for each_asset_name, each_asset in each_granule.assets.items(): + temp_bucket, temp_key = self.__s3.split_s3_url(each_asset.href) + if not self.__s3.exists(temp_bucket, temp_key): + missing_assets.append({each_asset_name: each_asset.href}) + if len(missing_assets) > 0: + missing_granules.append({ + 'granule_id': each_granule.id, + 'missing_assets': missing_assets + }) + if len(missing_granules) > 0: + LOGGER.error(f'missing_granules: {missing_granules}') + raise ValueError(f'missing_granules: {missing_granules}') + return + + def extract_collection_id(self): + if self.successful_features is None: + raise RuntimeError(f'NULL successful_features') + if len(self.successful_features.items) < 1: + LOGGER.error(f'not required to process. No Granules: {self.successful_features.to_dict(False)}') + return + self.collection_id = self.successful_features.items[0].collection_id + return + + def has_collection(self): + uds_collection_result = self.__uds_collection.get_collection(self.collection_id) + return len(uds_collection_result) > 0 + + def create_collection(self): + if self.collection_id is None: + raise RuntimeError(f'NULL collection_id') + if self.has_collection(): + LOGGER.debug(f'{self.collection_id} already exists. continuing..') + return + # ref: https://github.com/unity-sds/unity-py/blob/0.4.0/unity_sds_client/services/data_service.py + dapa_collection = UnityCollectionStac() \ + .with_id(self.collection_id) \ + .with_graule_id_regex("^test_file.*$") \ + .with_granule_id_extraction_regex("(^test_file.*)(\\.nc|\\.nc\\.cas|\\.cmr\\.xml)") \ + .with_title(f'Collection: {self.collection_id}') \ + .with_process('stac') \ + .with_provider(self.__default_provider) \ + .add_file_type("test_file01.nc", "^test_file.*\\.nc$", 'unknown_bucket', 'application/json', 'root') \ + .add_file_type("test_file01.nc", "^test_file.*\\.nc$", 'protected', 'data', 'item') \ + .add_file_type("test_file01.nc.cas", "^test_file.*\\.nc.cas$", 'protected', 'metadata', 'item') \ + .add_file_type("test_file01.nc.cmr.xml", "^test_file.*\\.nc.cmr.xml$", 'protected', 'metadata', 'item') \ + .add_file_type("test_file01.nc.stac.json", "^test_file.*\\.nc.stac.json$", 'protected', 'metadata', 'item') + + stac_collection = dapa_collection.start() + creation_result = CollectionDapaCreation(stac_collection).create() + if creation_result['statusCode'] >= 400: + raise RuntimeError(f'failed to create collection: {self.collection_id}. details: {creation_result["body"]}') + time.sleep(3) # cool off period before checking DB + if not self.has_collection(): + LOGGER.error(f'missing collection. (failed to create): {self.collection_id}') + raise ValueError(f'missing collection. (failed to create): {self.collection_id}') + return + + def send_cnm_msg(self): + LOGGER.debug(f'starting ingest_cnm_dapa_actual') + try: + errors = [] + for i, features_chunk in enumerate(StageInOutUtils.chunk_list(self.successful_features_json['features'], self.__chunk_size)): + try: + LOGGER.debug(f'working on chunk_index {i}') + dapa_body = { + "provider_id": self.__default_provider, + "features": features_chunk + } + collections_dapa_cnm = CollectionsDapaCnm(dapa_body) + cnm_result = collections_dapa_cnm.start() + if cnm_result['statusCode'] != 200: + errors.extend(features_chunk) + except Exception as e1: + LOGGER.exception(f'failed to queue CNM process.') + errors.extend(features_chunk) + except Exception as e: + LOGGER.exception('failed to ingest to CNM') + raise ValueError(f'failed to ingest to CNM: {e}') + if len(errors) > 0: + raise RuntimeError(f'failures during CNM ingestion: {errors}') + return + + def start(self, event): + LOGGER.debug(f'event: {event}') + sns_msg = AwsMessageTransformers().sqs_sns(event) + s3_details = AwsMessageTransformers().get_s3_from_sns(sns_msg) + s3_url = f's3://{s3_details["bucket"]}/{s3_details["key"]}' + self.load_successful_features_s3(s3_url) + self.validate_granules() + self.extract_collection_id() + self.create_collection() + self.send_cnm_msg() + return diff --git a/cumulus_lambda_functions/granules_cnm_ingester/lambda_function.py b/cumulus_lambda_functions/granules_cnm_ingester/lambda_function.py index 626431a1..b5f7cbaf 100644 --- a/cumulus_lambda_functions/granules_cnm_ingester/lambda_function.py +++ b/cumulus_lambda_functions/granules_cnm_ingester/lambda_function.py @@ -1,4 +1,6 @@ import json + +from cumulus_lambda_functions.granules_cnm_ingester.granules_cnm_ingester_logic import GranulesCnmIngesterLogic from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator @@ -7,7 +9,8 @@ def lambda_handler(event, context): :param event: :param context: :return: + {'Records': [{'messageId': '6ff7c6fd-4053-4ab4-bc12-c042be1ed275', 'receiptHandle': 'AQEBYASiFPjQT5JBI2KKCTF/uQhHfJt/tHhgucslQQdvkNVxcXCNi2E5Ux4U9N0eu7RfvlnvtycjUh0gdL7jIeoyH+VRKSF61uAJuT4p31BsNe0GYu49N9A6+kxjP/RrykR7ZofmQRdHToX1ugRc76SMRic4H/ZZ89YAHA2QeomJFMrYywIxlk8OAzYaBf2dQI7WexjY5u1CW00XNMbTGyTo4foVPxcSn6bdFpfgxW/L7yJMX/0YQvrA9ruiuQ+lrui+6fWYh5zEk3f5v1bYtUQ6DtyyfbtMHZQJTJpUlWAFRzzN+3melilH7FySyOGDXhPb0BOSzmdKq9wBbfLW/YPb7l99ejq4GfRfj8LyI4EtB96vTeUw4LCgUqbZcBrxbGBLUXMacweh+gCjHav9ylqr2SeOiqG3vWPq9pwFYQIDqNE=', 'body': '{\n "Type" : "Notification",\n "MessageId" : "33e1075a-435c-5217-a33d-59fae85e19b2",\n "TopicArn" : "arn:aws:sns:us-west-2:237868187491:uds-sbx-cumulus-granules_cnm_ingester",\n "Subject" : "Amazon S3 Notification",\n "Message" : "{\\"Service\\":\\"Amazon S3\\",\\"Event\\":\\"s3:TestEvent\\",\\"Time\\":\\"2024-04-22T18:13:22.416Z\\",\\"Bucket\\":\\"uds-sbx-cumulus-staging\\",\\"RequestId\\":\\"DQ4T0GRVFPSX45C9\\",\\"HostId\\":\\"gHBFnYNmfnGDZBmqoQwA3RScjtjBk5lr426moGxu8IDpe5UhWAqNTxHqilWBoPN1njzIrzNrf8c=\\"}",\n "Timestamp" : "2024-04-22T18:13:22.434Z",\n "SignatureVersion" : "1",\n "Signature" : "RvSxqpU7J7CCJXbin9cXqTxzjMjgAUFtk/n454mTMcOe5x3Ay1w4AHfzyeYQCFBdLHNBa8n3OdMDoDlJqyVQMb8k+nERaiZWN2oqFVDRqT9pqSr89b+4FwlhPv6TYy2pBa/bgjZ4cOSYsey1uSQ3hjl0idfssvuV5cCRxQScbA+yu8Gcv9K7Oqgy01mC0sDHiuPIifhFXxupG5ygbjqoHIB+1gdMEbBwyixoY5GOpHM/O2uHNF+dJDjax1WMxQ2FzVjiFeCa+tNcjovF059+tx2v1YmDq/kEAFrN6DAtP6R4zKag62P9jkvjU/wHYJ2jjXmZAqoG+nuzAo24HiZPSw==",\n "SigningCertURL" : "https://sns.us-west-2.amazonaws.com/SimpleNotificationService-60eadc530605d63b8e62a523676ef735.pem",\n "UnsubscribeURL" : "https://sns.us-west-2.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-west-2:237868187491:uds-sbx-cumulus-granules_cnm_ingester:76cbefa1-addf-45c2-97e1-ae16986b195b"\n}', 'attributes': {'ApproximateReceiveCount': '1', 'SentTimestamp': '1713809602474', 'SenderId': 'AIDAIYLAVTDLUXBIEIX46', 'ApproximateFirstReceiveTimestamp': '1713809602483'}, 'messageAttributes': {}, 'md5OfBody': 'c6d06d1b742ad5bd2cfe5f542640aad2', 'eventSource': 'aws:sqs', 'eventSourceARN': 'arn:aws:sqs:us-west-2:237868187491:uds-sbx-cumulus-granules_cnm_ingester', 'awsRegion': 'us-west-2'}]} """ LambdaLoggerGenerator.remove_default_handlers() - print(f'event: {event}') - raise NotImplementedError('Require implementation later') + GranulesCnmIngesterLogic().start(event) + return diff --git a/cumulus_lambda_functions/lib/aws/aws_message_transformers.py b/cumulus_lambda_functions/lib/aws/aws_message_transformers.py index 0fe119aa..101b383b 100644 --- a/cumulus_lambda_functions/lib/aws/aws_message_transformers.py +++ b/cumulus_lambda_functions/lib/aws/aws_message_transformers.py @@ -1,4 +1,5 @@ import json +from urllib.parse import unquote from cumulus_lambda_functions.lib.json_validator import JsonValidator @@ -29,8 +30,15 @@ class AwsMessageTransformers: "Type": {"type": "string"}, "MessageId": {"type": "string"}, "TopicArn": {"type": "string"}, + "Subject": {"type": "string"}, + "Timestamp": {"type": "string"}, + "SignatureVersion": {"type": "string"}, + "Signature": {"type": "string"}, + "SigningCertURL": {"type": "string"}, + "UnsubscribeURL": {"type": "string"}, "Message": {"type": "string"}, - } + }, + "required": ["Message"] } S3_RECORD_SCHEMA = { @@ -41,22 +49,25 @@ class AwsMessageTransformers: 'maxItems': 1, 'items': { 'type': 'object', - 'properties': {'s3': { - 'type': 'object', - 'properties': { - 'bucket': { - 'type': 'object', - 'properties': {'name': {'type': 'string', 'minLength': 1}}, - 'required': ['name'] - }, - 'object': { - 'type': 'object', - 'properties': {'key': {'type': 'string', 'minLength': 1}}, - 'required': ['key'] - }}, - 'required': ['bucket', 'object'] - }}, - 'required': ['s3'] + 'properties': { + 'eventName': {'type': 'string'}, + 's3': { + 'type': 'object', + 'properties': { + 'bucket': { + 'type': 'object', + 'properties': {'name': {'type': 'string', 'minLength': 1}}, + 'required': ['name'] + }, + 'object': { + 'type': 'object', + 'properties': {'key': {'type': 'string', 'minLength': 1}}, + 'required': ['key'] + }}, + 'required': ['bucket', 'object'] + } + }, + 'required': ['eventName', 's3'] } }}, 'required': ['Records'] @@ -74,3 +85,14 @@ def sqs_sns(self, raw_msg: json): sns_msg_body = sqs_msg_body['Message'] sns_msg_body = json.loads(sns_msg_body) return sns_msg_body + + def get_s3_from_sns(self, sns_msg_body): + result = JsonValidator(self.S3_RECORD_SCHEMA).validate(sns_msg_body) + if result is not None: + raise ValueError(f'sqs_msg did not pass SQS_MSG_SCHEMA: {result}') + s3_summary = { + 'eventName': sns_msg_body['Records'][0]['eventName'], + 'bucket': sns_msg_body['Records'][0]['s3']['bucket']['name'], + 'key': unquote(sns_msg_body['Records'][0]['s3']['object']['key'].replace('+', ' ')), + } + return s3_summary \ No newline at end of file diff --git a/cumulus_lambda_functions/lib/aws/aws_s3.py b/cumulus_lambda_functions/lib/aws/aws_s3.py index 4467c931..f4cbc4ae 100644 --- a/cumulus_lambda_functions/lib/aws/aws_s3.py +++ b/cumulus_lambda_functions/lib/aws/aws_s3.py @@ -58,6 +58,13 @@ def __upload_to_s3(self, bucket, prefix, file_path, delete_files=False, add_size raise e return f's3://{bucket}/{s3_key}' + def exists(self, base_path: str, relative_path: str): + try: + response = self.__s3_client.head_object(Bucket=base_path, Key=relative_path) + except: + return False + return True + def upload(self, file_path: str, base_path: str, relative_parent_path: str, delete_files: bool, s3_name: Union[str, None] = None, obj_tags: dict = {}, overwrite: bool = False): s3_url = self.__upload_to_s3(base_path, relative_parent_path, file_path, delete_files, True, obj_tags, s3_name) diff --git a/cumulus_lambda_functions/lib/uds_db/uds_collections.py b/cumulus_lambda_functions/lib/uds_db/uds_collections.py index d37f625c..17cf06c4 100644 --- a/cumulus_lambda_functions/lib/uds_db/uds_collections.py +++ b/cumulus_lambda_functions/lib/uds_db/uds_collections.py @@ -70,6 +70,25 @@ def add_collection(self, collection_id: str, start_time: int, end_time: int, bbo self.__es.index_one(indexing_dict, collection_id, DBConstants.collections_index) return self + def get_collection(self, collection_id: str): + authorized_collection_ids_dsl = { + 'size': 20, + 'query': { + 'bool': { + 'must': [ + {'term': {DBConstants.collection_id: {'value': collection_id}}} + ] + } + }, + 'sort': [ + {DBConstants.collection_id: {'order': 'asc'}} + ] + } + LOGGER.debug(f'authorized_collection_ids_dsl: {authorized_collection_ids_dsl}') + authorized_collection_ids = self.__es.query(authorized_collection_ids_dsl, DBConstants.collections_index) + authorized_collection_ids = [k['_source'] for k in authorized_collection_ids['hits']['hits']] + return authorized_collection_ids + def get_collections(self, collection_regex: list): # temp_dsl = { # 'query': {'match_all': {}}, diff --git a/cumulus_lambda_functions/stage_in_out/catalog_granules_unity.py b/cumulus_lambda_functions/stage_in_out/catalog_granules_unity.py index ed1691f0..53811749 100644 --- a/cumulus_lambda_functions/stage_in_out/catalog_granules_unity.py +++ b/cumulus_lambda_functions/stage_in_out/catalog_granules_unity.py @@ -19,7 +19,6 @@ class CatalogGranulesUnity(CatalogGranulesAbstract): DELAY_SECOND = 'DELAY_SECOND' REPEAT_TIMES = 'REPEAT_TIMES' CHUNK_SIZE = 'CHUNK_SIZE' - DEFAULT_CHUNK_SIZE = 5 def __init__(self) -> None: super().__init__() @@ -27,15 +26,15 @@ def __init__(self) -> None: self.__verify_ssl = True self.__delaying_second = 30 self.__repeating_times = 0 - self.__chunk_size = self.DEFAULT_CHUNK_SIZE + self.__chunk_size = StageInOutUtils.CATALOG_DEFAULT_CHUNK_SIZE def __set_props_from_env(self): missing_keys = [k for k in [self.UPLOADED_FILES_JSON, self.PROVIDER_ID_KEY] if k not in os.environ] if len(missing_keys) > 0: raise ValueError(f'missing environment keys: {missing_keys}') self._retrieve_stac_json() - self.__chunk_size = int(os.environ.get(self.CHUNK_SIZE, self.DEFAULT_CHUNK_SIZE)) - self.__chunk_size = self.__chunk_size if self.__chunk_size > 0 else self.DEFAULT_CHUNK_SIZE + self.__chunk_size = int(os.environ.get(self.CHUNK_SIZE, StageInOutUtils.CATALOG_DEFAULT_CHUNK_SIZE)) + self.__chunk_size = self.__chunk_size if self.__chunk_size > 0 else StageInOutUtils.CATALOG_DEFAULT_CHUNK_SIZE self.__provider_id = os.environ.get(self.PROVIDER_ID_KEY) self.__verify_ssl = os.environ.get(self.VERIFY_SSL_KEY, 'TRUE').strip().upper() == 'TRUE' self.__delaying_second = int(os.environ.get(self.DELAY_SECOND, '30')) diff --git a/cumulus_lambda_functions/stage_in_out/stage_in_out_utils.py b/cumulus_lambda_functions/stage_in_out/stage_in_out_utils.py index 70d1b6ea..3de73a1c 100644 --- a/cumulus_lambda_functions/stage_in_out/stage_in_out_utils.py +++ b/cumulus_lambda_functions/stage_in_out/stage_in_out_utils.py @@ -10,6 +10,7 @@ class StageInOutUtils: OUTPUT_FILE = 'OUTPUT_FILE' + CATALOG_DEFAULT_CHUNK_SIZE = 5 @staticmethod def write_output_to_file(output_json: Union[dict, str, list]): diff --git a/docker/Dockerfile_download_granules.jpl b/docker/Dockerfile_download_granules.jpl index 0d1b56f4..895a375f 100644 --- a/docker/Dockerfile_download_granules.jpl +++ b/docker/Dockerfile_download_granules.jpl @@ -5,8 +5,8 @@ RUN apt-get update -y && apt-get install vim -y ENV PYTHONPATH "${PYTHONPATH}:/usr/src/app/unity" RUN python -m pip install boto3 RUN mkdir -p /usr/src/app/unity -COPY setup.py ./setup.py -RUN ["python", "setup.py", "install"] +COPY requirements.txt ./requirements.txt +RUN python3 -m pip install -r requirements.txt COPY setup.py /usr/src/app/unity/setup.py COPY cumulus_lambda_functions /usr/src/app/unity/cumulus_lambda_functions diff --git a/docker/Dockerfile_download_granules.public b/docker/Dockerfile_download_granules.public index c74f9e26..16dd71a4 100644 --- a/docker/Dockerfile_download_granules.public +++ b/docker/Dockerfile_download_granules.public @@ -5,8 +5,8 @@ RUN apt-get update -y && apt-get install vim -y ENV PYTHONPATH "${PYTHONPATH}:/usr/src/app/unity" RUN python -m pip install boto3 RUN mkdir -p /usr/src/app/unity -COPY setup.py ./setup.py -RUN ["python", "setup.py", "install"] +COPY requirements.txt ./requirements.txt +RUN python3 -m pip install -r requirements.txt COPY setup.py /usr/src/app/unity/setup.py COPY cumulus_lambda_functions /usr/src/app/unity/cumulus_lambda_functions diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..291699f8 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,39 @@ +annotated-types==0.6.0 +anyio==4.3.0 +attrs==23.2.0 +certifi==2024.2.2 +charset-normalizer==3.3.2 +click==8.1.7 +dateparser==1.2.0 +elasticsearch==7.13.4 +exceptiongroup==1.2.1 +fastapi==0.110.3 +fastjsonschema==2.19.1 +h11==0.14.0 +idna==3.7 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==0.12.0 +mangum==0.17.0 +pydantic==2.7.1 +pydantic_core==2.18.2 +pygeofilter==0.2.1 +pygeoif==1.4.0 +pystac==1.9.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +pytz==2024.1 +referencing==0.35.0 +regex==2024.4.28 +requests==2.31.0 +requests-aws4auth==1.2.3 +rpds-py==0.18.0 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +tenacity==8.2.3 +typing_extensions==4.11.0 +tzlocal==5.2 +urllib3==1.26.18 +uvicorn==0.29.0 +xmltodict==0.13.0 \ No newline at end of file diff --git a/tests/cumulus_lambda_functions/granules_cnm_ingester/__init__.py b/tests/cumulus_lambda_functions/granules_cnm_ingester/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/cumulus_lambda_functions/granules_cnm_ingester/test_granules_cnm_ingester_logic.py b/tests/cumulus_lambda_functions/granules_cnm_ingester/test_granules_cnm_ingester_logic.py new file mode 100644 index 00000000..7849c00e --- /dev/null +++ b/tests/cumulus_lambda_functions/granules_cnm_ingester/test_granules_cnm_ingester_logic.py @@ -0,0 +1,65 @@ +import json +import os +from unittest import TestCase + +from cumulus_lambda_functions.lib.time_utils import TimeUtils +from pystac import ItemCollection + +from cumulus_lambda_functions.granules_cnm_ingester.granules_cnm_ingester_logic import GranulesCnmIngesterLogic + + +class TestGranulesCnmIngesterLogic(TestCase): + + def __init__(self, methodName: str = ...) -> None: + super().__init__(methodName) + os.environ['SNS_TOPIC_ARN'] = 'arn:aws:sns:us-west-2:237868187491:uds-sbx-cumulus-cnm-submission-sns' + os.environ['COLLECTION_CREATION_LAMBDA_NAME'] = 'NA' + os.environ['UNITY_DEFAULT_PROVIDER'] = 'unity' + os.environ['CUMULUS_WORKFLOW_NAME'] = 'CatalogGranule' + os.environ['REPORT_TO_EMS'] = 'FALSE' + os.environ['CUMULUS_LAMBDA_PREFIX'] = 'uds-sbx-cumulus' + os.environ['CUMULUS_WORKFLOW_SQS_URL'] = 'https://sqs.us-west-2.amazonaws.com/237868187491/uds-sbx-cumulus-cnm-submission-queue' + os.environ['ES_URL'] = 'vpc-uds-sbx-cumulus-es-qk73x5h47jwmela5nbwjte4yzq.us-west-2.es.amazonaws.com' + os.environ['ES_PORT'] = '9200' + + def test_load_successful_features_s3(self): + s3_url = 's3://uds-sbx-cumulus-staging/integration_test/stag_eout/successful_features_2024-04-11T21:21:13.019769.json' + a = GranulesCnmIngesterLogic().load_successful_features_s3(s3_url) + return + + def test_validate_granules(self): + result = {"type": "FeatureCollection", "features": [{"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file01", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file01.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file01/test_file01.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file01.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file01/test_file01.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file01.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file01/test_file01.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file02", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file02.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file02/test_file02.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file02.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file02/test_file02.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file02.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file02/test_file02.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file03", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file03.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file03/test_file03.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file03.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file03/test_file03.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file03.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file03/test_file03.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file04", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file04.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file04/test_file04.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file04.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file04/test_file04.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file04.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file04/test_file04.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file05", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file05.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file05/test_file05.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file05.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file05/test_file05.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file05.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file05/test_file05.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file06", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file06.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file06/test_file06.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file06.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file06/test_file06.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file06.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file06/test_file06.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file08", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file08.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file08/test_file08.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file08.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file08/test_file08.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file08.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file08/test_file08.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file07", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file07.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file07/test_file07.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file07.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file07/test_file07.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file07.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file07/test_file07.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file10", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file10.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file10/test_file10.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file10.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file10/test_file10.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file10.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file10/test_file10.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file09", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file09.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file09/test_file09.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file09.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file09/test_file09.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file09.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file09/test_file09.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}]} + result = ItemCollection.from_dict(result) + a = GranulesCnmIngesterLogic() + a.successful_features = result + a.validate_granules() + return + + def test_extract_collection_id(self): + result = {"type": "FeatureCollection", "features": [{"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file01", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file01.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file01/test_file01.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file01.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file01/test_file01.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file01.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file01/test_file01.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file02", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file02.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file02/test_file02.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file02.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file02/test_file02.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file02.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file02/test_file02.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file03", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file03.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file03/test_file03.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file03.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file03/test_file03.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file03.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file03/test_file03.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file04", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file04.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file04/test_file04.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file04.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file04/test_file04.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file04.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file04/test_file04.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file05", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file05.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file05/test_file05.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file05.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file05/test_file05.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file05.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file05/test_file05.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file06", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file06.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file06/test_file06.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file06.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file06/test_file06.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file06.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file06/test_file06.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file08", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file08.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file08/test_file08.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file08.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file08/test_file08.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file08.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file08/test_file08.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file07", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file07.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file07/test_file07.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file07.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file07/test_file07.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file07.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file07/test_file07.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file10", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file10.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file10/test_file10.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file10.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file10/test_file10.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file10.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file10/test_file10.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}, {"type": "Feature", "stac_version": "1.0.0", "id": "NEW_COLLECTION_EXAMPLE_L1B___9:test_file09", "properties": {"start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"test_file09.nc": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file09/test_file09.nc", "title": "test_file01.nc", "roles": ["data"]}, "test_file09.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file09/test_file09.nc.cas", "title": "test_file01.nc.cas", "roles": ["metadata"]}, "test_file09.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file09/test_file09.nc.stac.json", "title": "test_file01.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "NEW_COLLECTION_EXAMPLE_L1B___9"}]} + result = ItemCollection.from_dict(result) + a = GranulesCnmIngesterLogic() + a.successful_features = result + a.extract_collection_id() + self.assertEqual(a.collection_id, 'NEW_COLLECTION_EXAMPLE_L1B___9') + return + + def test_create_collection(self): + a = GranulesCnmIngesterLogic() + self.tenant = 'UDS_LOCAL_TEST' # 'uds_local_test' # 'uds_sandbox' + self.tenant_venue = 'DEV' # 'DEV1' # 'dev' + self.collection_name = 'UDS_COLLECTION_CNM_INGESTION' # 'uds_collection' # 'sbx_collection' + self.collection_version = '24.04.24.06.00'.replace('.', '') # '2402011200' + temp_collection_id = f'URN:NASA:UNITY:{self.tenant}:{self.tenant_venue}:{self.collection_name}___{self.collection_version}' + a.collection_id = temp_collection_id + a.create_collection() + return + + def test_send_cnm_msg(self): + result = '''{"type": "FeatureCollection", "features": [{"type": "Feature", "stac_version": "1.0.0", "id": "URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:UDS_COLLECTION___2404240700:abcd.1234.efgh.test_file05", "properties": {"tag": "#sample", "c_data1": [1, 10, 100, 1000], "c_data2": [false, true, true, false, true], "c_data3": ["Bellman Ford"], "soil10": {"0_0": 0, "0_1": 1, "0_2": 0}, "start_datetime": "2016-01-31T18:00:00.009057Z", "end_datetime": "2016-01-31T19:59:59.991043Z", "created": "2016-02-01T02:45:59.639000Z", "updated": "2022-03-23T15:48:21.578000Z", "datetime": "1970-01-01T00:00:00Z"}, "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, "links": [], "assets": {"abcd.1234.efgh.test_file05.data.stac.json": {"href": "s3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:UDS_COLLECTION___2404240700/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:UDS_COLLECTION___2404240700:abcd.1234.efgh.test_file05/abcd.1234.efgh.test_file05.data.stac.json", "title": "abcd.1234.efgh.test_file05.data.stac.json", "roles": ["data"]}, "abcd.1234.efgh.test_file05.nc.cas": {"href": "s3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:UDS_COLLECTION___2404240700/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:UDS_COLLECTION___2404240700:abcd.1234.efgh.test_file05/abcd.1234.efgh.test_file05.nc.cas", "title": "abcd.1234.efgh.test_file05.nc.cas", "roles": ["metadata"]}, "abcd.1234.efgh.test_file05.nc.stac.json": {"href": "s3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:UDS_COLLECTION___2404240700/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:UDS_COLLECTION___2404240700:abcd.1234.efgh.test_file05/abcd.1234.efgh.test_file05.nc.stac.json", "title": "abcd.1234.efgh.test_file05.nc.stac.json", "roles": ["metadata"]}}, "bbox": [0.0, 0.0, 0.0, 0.0], "stac_extensions": [], "collection": "URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:UDS_COLLECTION___2404240700"}]} + ''' + result = json.loads(result) + a = GranulesCnmIngesterLogic() + a.successful_features_json = result + a.send_cnm_msg() + return diff --git a/tests/integration_tests/test_custom_metadata_automated_ingestion.py b/tests/integration_tests/test_custom_metadata_automated_ingestion.py new file mode 100644 index 00000000..2342edd7 --- /dev/null +++ b/tests/integration_tests/test_custom_metadata_automated_ingestion.py @@ -0,0 +1,528 @@ +import base64 +import json +import os +import tempfile +from datetime import datetime +from sys import argv +from time import sleep +from unittest import TestCase + +import pystac +import requests +from cumulus_lambda_functions.lib.aws.aws_s3 import AwsS3 + +from cumulus_lambda_functions.lib.time_utils import TimeUtils +from pystac import Link, Catalog, Asset, Item, ItemCollection + +from cumulus_lambda_functions.docker_entrypoint.__main__ import choose_process + +from cumulus_lambda_functions.lib.utils.file_utils import FileUtils + +from cumulus_lambda_functions.cumulus_stac.unity_collection_stac import UnityCollectionStac + +from cumulus_lambda_functions.lib.cognito_login.cognito_login import CognitoLogin +from dotenv import load_dotenv + + +class TestCustomMetadataEndToEnd(TestCase): + # 1. setup admin for the test venue + # 2. create a custom metadata for the venue + # 3. create a collection + # 4. push granules to cnm w/ custom metadata + # 5. get granules w/ custom metadata + + def setUp(self) -> None: + super().setUp() + load_dotenv() + self.cognito_login = CognitoLogin() \ + .with_client_id(os.environ.get('CLIENT_ID', '')) \ + .with_cognito_url(os.environ.get('COGNITO_URL', '')) \ + .with_verify_ssl(False) \ + .start(base64.standard_b64decode(os.environ.get('USERNAME')).decode(), + base64.standard_b64decode(os.environ.get('PASSWORD')).decode()) + self._url_prefix = f'{os.environ.get("UNITY_URL")}/{os.environ.get("UNITY_STAGE", "sbx-uds-dapa")}' + self.tenant = 'UDS_LOCAL_TEST' # 'uds_local_test' # 'uds_sandbox' + self.tenant_venue = 'DEV' # 'DEV1' # 'dev' + self.collection_name = 'UDS_COLLECTION' # 'uds_collection' # 'sbx_collection' + self.collection_version = '24.04.25.11.00'.replace('.', '') # '2402011200' + + self.custom_metadata_body = { + 'tag': {'type': 'keyword'}, + 'c_data1': {'type': 'long'}, + 'c_data2': {'type': 'boolean'}, + 'c_data3': {'type': 'keyword'}, + 'soil10': { + "properties": { + "0_0": {"type": "integer"}, + "0_1": {"type": "integer"}, + "0_2": {"type": "integer"}, + } + } + } + self.granule_id = 'abcd.1234.efgh.test_file05' + return + + def test_04_upload_sample_granule(self): + custom_metadata = { + 'tag': '#sample', + 'c_data1': [1, 10, 10**2, 10**3], + 'c_data2': [False, True, True, False, True], + 'c_data3': ['Bellman Ford'], + 'soil10': { + "0_0": 0, + "0_1": 1, + "0_2": 0, + } + } + temp_collection_id = f'URN:NASA:UNITY:{self.tenant}:{self.tenant_venue}:{self.collection_name}___{self.collection_version}' + os.environ['VERIFY_SSL'] = 'FALSE' + + os.environ['COLLECTION_ID'] = temp_collection_id + os.environ['STAGING_BUCKET'] = 'uds-sbx-cumulus-staging' + + os.environ['GRANULES_SEARCH_DOMAIN'] = 'UNITY' + # os.environ['GRANULES_UPLOAD_TYPE'] = 'UPLOAD_S3_BY_STAC_CATALOG' + # defaulted to this value + + if len(argv) > 1: + argv.pop(-1) + argv.append('UPLOAD') + + with tempfile.TemporaryDirectory() as tmp_dir_name: + os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json') + os.environ['UPLOAD_DIR'] = '' # not needed + os.environ['CATALOG_FILE'] = os.path.join(tmp_dir_name, 'catalog.json') + os.environ['OUTPUT_DIRECTORY'] = os.path.join(tmp_dir_name, 'output_dir') + FileUtils.mk_dir_p(os.environ.get('OUTPUT_DIRECTORY')) + granules_dir = os.path.join(tmp_dir_name, 'some_granules') + FileUtils.mk_dir_p(granules_dir) + with open(os.path.join(granules_dir, f'{self.granule_id}.data.stac.json'), 'w') as ff: + ff.write('sample_file') + with open(os.path.join(granules_dir, f'{self.granule_id}.nc.cas'), 'w') as ff: + ff.write(''' + + + AggregateDir + snppatmsl1a + + + AutomaticQualityFlag + Passed + + + BuildId + v01.43.00 + + + CollectionLabel + L1AMw_nominal2 + + + DataGroup + sndr + + + EndDateTime + 2016-01-14T10:06:00.000Z + + + EndTAI93 + 726919569.000 + + + FileFormat + nc4 + + + FileLocation + /pge/out + + + Filename + SNDR.SNPP.ATMS.L1A.nominal2.02.nc + + + GranuleNumber + 101 + + + JobId + f163835c-9945-472f-bee2-2bc12673569f + + + ModelId + urn:npp:SnppAtmsL1a + + + NominalDate + 2016-01-14 + + + ProductName + SNDR.SNPP.ATMS.20160114T1000.m06.g101.L1A.L1AMw_nominal2.v03_15_00.D.201214135000.nc + + + ProductType + SNDR_SNPP_ATMS_L1A + + + ProductionDateTime + 2020-12-14T13:50:00.000Z + + + ProductionLocation + Sounder SIPS: JPL/Caltech (Dev) + + + ProductionLocationCode + D + + + RequestId + 1215 + + + StartDateTime + 2016-01-14T10:00:00.000Z + + + StartTAI93 + 726919209.000 + + + TaskId + 8c3ae101-8f7c-46c8-b5c6-63e7b6d3c8cd + + ''') + stac_item = Item(id=self.granule_id, + geometry={ + "type": "Point", + "coordinates": [0.0, 0.0] + }, + bbox=[0.0, 0.0, 0.0, 0.0], + datetime=TimeUtils().parse_from_unix(0, True).get_datetime_obj(), + properties={ + **custom_metadata, + "start_datetime": "2016-01-31T18:00:00.009057Z", + "end_datetime": "2016-01-31T19:59:59.991043Z", + "created": "2016-02-01T02:45:59.639000Z", + "updated": "2022-03-23T15:48:21.578000Z", + "datetime": "2022-03-23T15:48:19.079000Z" + }, + href=os.path.join('some_granules', f'{self.granule_id}.nc.stac.json'), + collection=temp_collection_id, + assets={ + f'{self.granule_id}.data.stac.json': Asset(os.path.join('.', f'{self.granule_id}.data.stac.json'), title=f'{self.granule_id}.data.stac.json', roles=['data']), + f'{self.granule_id}.nc.cas': Asset(os.path.join('.', f'{self.granule_id}.nc.cas'), title=f'{self.granule_id}.nc.cas', roles=['metadata']), + f'{self.granule_id}.nc.stac.json': Asset(os.path.join('.', f'{self.granule_id}.nc.stac.json'), title=f'{self.granule_id}.nc.stac.json', roles=['metadata']), + }) + with open(os.path.join(granules_dir, f'{self.granule_id}.nc.stac.json'), 'w') as ff: + ff.write(json.dumps(stac_item.to_dict(False, False))) + catalog = Catalog( + id='NA', + description='NA') + catalog.set_self_href(os.environ['CATALOG_FILE']) + catalog.add_link( + Link('item', os.path.join('some_granules', f'{self.granule_id}.nc.stac.json'), 'application/json')) + print(json.dumps(catalog.to_dict(False, False))) + with open(os.environ['CATALOG_FILE'], 'w') as ff: + ff.write(json.dumps(catalog.to_dict(False, False))) + + upload_result_str = choose_process() + upload_result = json.loads(upload_result_str) + print(upload_result) + self.assertTrue('type' in upload_result, 'missing type') + self.assertEqual(upload_result['type'], 'Catalog', 'missing type') + upload_result = Catalog.from_dict(upload_result) + child_links = [k.href for k in upload_result.get_links(rel='item')] + self.assertEqual(len(child_links), 2, f'wrong length: {child_links}') + self.assertTrue(FileUtils.file_exist(child_links[0]), f'missing file: {child_links[0]}') + successful_feature_collection = ItemCollection.from_dict(FileUtils.read_json(child_links[0])) + successful_feature_collection = list(successful_feature_collection.items) + self.assertEqual(len(successful_feature_collection), 1, + f'wrong length: {successful_feature_collection}') + + self.assertTrue(FileUtils.file_exist(child_links[1]), f'missing file: {child_links[1]}') + failed_feature_collection = ItemCollection.from_dict(FileUtils.read_json(child_links[1])) + failed_feature_collection = list(failed_feature_collection.items) + self.assertEqual(len(failed_feature_collection), 0, f'wrong length: {failed_feature_collection}') + + upload_result = successful_feature_collection[0].to_dict(False, False) + print(f'example feature: {upload_result}') + self.assertTrue('assets' in upload_result, 'missing assets') + self.assertTrue(f'{self.granule_id}.nc.cas' in upload_result['assets'], 'missing assets#metadata__cas') + self.assertTrue('href' in upload_result['assets'][f'{self.granule_id}.nc.cas'], 'missing assets#metadata__cas#href') + self.assertTrue(upload_result['assets'][f'{self.granule_id}.nc.cas']['href'].startswith( + f's3://{os.environ["STAGING_BUCKET"]}/{os.environ["COLLECTION_ID"]}/')) + self.assertTrue(f'{self.granule_id}.data.stac.json' in upload_result['assets'], 'missing assets#data') + self.assertTrue('href' in upload_result['assets'][f'{self.granule_id}.data.stac.json'], 'missing assets#data#href') + self.assertTrue(upload_result['assets'][f'{self.granule_id}.data.stac.json']['href'].startswith( + f's3://{os.environ["STAGING_BUCKET"]}/{os.environ["COLLECTION_ID"]}/')) + self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file') + return + + def test_04_upload_many_granules(self): + custom_metadata = { + 'tag': '#sample', + 'c_data1': [1, 10, 10**2, 10**3], + 'c_data2': [False, True, True, False, True], + 'c_data3': ['Bellman Ford'], + 'soil10': { + "0_0": 0, + "0_1": 1, + "0_2": 0, + } + } + temp_collection_id = f'URN:NASA:UNITY:{self.tenant}:{self.tenant_venue}:{self.collection_name}___{self.collection_version}' + + os.environ['VERIFY_SSL'] = 'FALSE' + # os.environ['RESULT_PATH_PREFIX'] = 'integration_test/stag_eout' + os.environ['STAGING_BUCKET'] = 'uds-sbx-cumulus-staging' + os.environ['COLLECTION_ID'] = temp_collection_id + + os.environ['GRANULES_SEARCH_DOMAIN'] = 'UNITY' + # os.environ['GRANULES_UPLOAD_TYPE'] = 'UPLOAD_S3_BY_STAC_CATALOG' + # defaulted to this value + + if len(argv) > 1: + argv.pop(-1) + argv.append('UPLOAD') + + starting_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M') + with tempfile.TemporaryDirectory() as tmp_dir_name: + os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json') + os.environ['UPLOAD_DIR'] = '' # not needed + os.environ['OUTPUT_DIRECTORY'] = os.path.join(tmp_dir_name, 'output_dir') + FileUtils.mk_dir_p(os.environ.get('OUTPUT_DIRECTORY')) + os.environ['CATALOG_FILE'] = os.path.join(tmp_dir_name, 'catalog.json') + total_files = 20 + # os.environ['PARALLEL_COUNT'] = str(total_files) + granules_dir = os.path.join(tmp_dir_name, 'some_granules') + FileUtils.mk_dir_p(granules_dir) + catalog = Catalog( + id='NA', + description='NA') + catalog.set_self_href(os.environ['CATALOG_FILE']) + + for i in range(1, total_files+1): + filename = f'test_file{i:02d}' + with open(os.path.join(granules_dir, f'{filename}.nc'), 'w') as ff: + ff.write('sample_file') + with open(os.path.join(granules_dir, f'{filename}.nc.cas'), 'w') as ff: + ff.write(''' + + + AggregateDir + snppatmsl1a + + + AutomaticQualityFlag + Passed + + + BuildId + v01.43.00 + + + CollectionLabel + L1AMw_nominal2 + + + DataGroup + sndr + + + EndDateTime + 2016-01-14T10:06:00.000Z + + + EndTAI93 + 726919569.000 + + + FileFormat + nc4 + + + FileLocation + /pge/out + + + Filename + SNDR.SNPP.ATMS.L1A.nominal2.02.nc + + + GranuleNumber + 101 + + + JobId + f163835c-9945-472f-bee2-2bc12673569f + + + ModelId + urn:npp:SnppAtmsL1a + + + NominalDate + 2016-01-14 + + + ProductName + SNDR.SNPP.ATMS.20160114T1000.m06.g101.L1A.L1AMw_nominal2.v03_15_00.D.201214135000.nc + + + ProductType + SNDR_SNPP_ATMS_L1A + + + ProductionDateTime + 2020-12-14T13:50:00.000Z + + + ProductionLocation + Sounder SIPS: JPL/Caltech (Dev) + + + ProductionLocationCode + D + + + RequestId + 1215 + + + StartDateTime + 2016-01-14T10:00:00.000Z + + + StartTAI93 + 726919209.000 + + + TaskId + 8c3ae101-8f7c-46c8-b5c6-63e7b6d3c8cd + + ''') + stac_item = Item(id=filename, + geometry={ + "type": "Point", + "coordinates": [0.0, 0.0] + }, + bbox=[0.0, 0.0, 0.0, 0.0], + datetime=TimeUtils().parse_from_unix(0, True).get_datetime_obj(), + properties={ + **custom_metadata, + "start_datetime": "2016-01-31T18:00:00.009057Z", + "end_datetime": "2016-01-31T19:59:59.991043Z", + "created": "2016-02-01T02:45:59.639000Z", + "updated": "2022-03-23T15:48:21.578000Z", + "datetime": "2022-03-23T15:48:19.079000Z" + }, + href=os.path.join('some_granules', f'{filename}.nc.stac.json'), + collection='NA', + assets={ + f'{filename}.nc': Asset(os.path.join('.', f'{filename}.nc'), title='test_file01.nc', roles=['data']), + f'{filename}.nc.cas': Asset(os.path.join('.', f'{filename}.nc.cas'), title='test_file01.nc.cas', roles=['metadata']), + f'{filename}.nc.stac.json': Asset(os.path.join('.', f'{filename}.nc.stac.json'), title='test_file01.nc.stac.json', roles=['metadata']), + }) + with open(os.path.join(granules_dir, f'{filename}.nc.stac.json'), 'w') as ff: + ff.write(json.dumps(stac_item.to_dict(False, False))) + catalog.add_link(Link('item', os.path.join('some_granules', f'{filename}.nc.stac.json'), 'application/json')) + print(json.dumps(catalog.to_dict(False, False))) + with open(os.environ['CATALOG_FILE'], 'w') as ff: + ff.write(json.dumps(catalog.to_dict(False, False))) + + upload_result_str = choose_process() + upload_result = json.loads(upload_result_str) + print(upload_result) + """ + {'type': 'Catalog', 'id': 'NA', 'stac_version': '1.0.0', 'description': 'NA', 'links': [{'rel': 'root', 'href': '/var/folders/33/xhq97d6s0dq78wg4h2smw23m0000gq/T/tmprew515jo/catalog.json', 'type': 'application/json'}, {'rel': 'item', 'href': '/var/folders/33/xhq97d6s0dq78wg4h2smw23m0000gq/T/tmprew515jo/successful_features.json', 'type': 'application/json'}, {'rel': 'item', 'href': '/var/folders/33/xhq97d6s0dq78wg4h2smw23m0000gq/T/tmprew515jo/failed_features.json', 'type': 'application/json'}]} + """ + self.assertTrue('type' in upload_result, 'missing type') + self.assertEqual(upload_result['type'], 'Catalog', 'missing type') + upload_result = Catalog.from_dict(upload_result) + child_links = [k.href for k in upload_result.get_links(rel='item')] + self.assertEqual(len(child_links), 2, f'wrong length: {child_links}') + self.assertTrue(FileUtils.file_exist(child_links[0]), f'missing file: {child_links[0]}') + successful_feature_collection = ItemCollection.from_dict(FileUtils.read_json(child_links[0])) + successful_feature_collection = list(successful_feature_collection.items) + self.assertEqual(len(successful_feature_collection), total_files, f'wrong length: {successful_feature_collection}') + + self.assertTrue(FileUtils.file_exist(child_links[1]), f'missing file: {child_links[1]}') + failed_feature_collection = ItemCollection.from_dict(FileUtils.read_json(child_links[1])) + failed_feature_collection = list(failed_feature_collection.items) + self.assertEqual(len(failed_feature_collection), 0, f'wrong length: {failed_feature_collection}') + + upload_result = successful_feature_collection[0].to_dict(False, False) + print(f'example feature: {upload_result}') + self.assertTrue('assets' in upload_result, 'missing assets') + result_key = [k for k in upload_result['assets'].keys()][0] + self.assertTrue(result_key.startswith('test_file'), f'worng asset key: {result_key}') + result_key_prefix = result_key.split('.')[0] + self.assertTrue(f'{result_key_prefix}.nc.cas' in upload_result['assets'], f'missing assets#metadata asset: {result_key_prefix}.nc.cas') + self.assertTrue('href' in upload_result['assets'][f'{result_key_prefix}.nc.cas'], 'missing assets#metadata__cas#href') + self.assertTrue(upload_result['assets'][f'{result_key_prefix}.nc.cas']['href'].startswith(f's3://{os.environ["STAGING_BUCKET"]}/{os.environ["COLLECTION_ID"]}/')) + self.assertTrue(f'{result_key_prefix}.nc' in upload_result['assets'], f'missing assets#data: {result_key_prefix}.nc') + self.assertTrue('href' in upload_result['assets'][f'{result_key_prefix}.nc'], 'missing assets#data#href') + self.assertTrue(upload_result['assets'][f'{result_key_prefix}.nc']['href'].startswith(f's3://{os.environ["STAGING_BUCKET"]}/{os.environ["COLLECTION_ID"]}/')) + self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file') + """ + Example output: + { + 'type': 'FeatureCollection', + 'features': [{ + 'type': 'Feature', + 'stac_version': '1.0.0', + 'id': 'NEW_COLLECTION_EXAMPLE_L1B___9:test_file01', + 'properties': {'start_datetime': '2016-01-31T18:00:00.009057Z', + 'end_datetime': '2016-01-31T19:59:59.991043Z', 'created': '2016-02-01T02:45:59.639000Z', + 'updated': '2022-03-23T15:48:21.578000Z', 'datetime': '1970-01-01T00:00:00Z'}, + 'geometry': {'type': 'Point', 'coordinates': [0.0, 0.0]}, 'links': [], + 'assets': {'data': { + 'href': 's3://uds-test-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file01/test_file01.nc', + 'title': 'main data'}, 'metadata__cas': { + 'href': 's3://uds-test-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file01/test_file01.nc.cas', + 'title': 'metadata cas'}, 'metadata__stac': { + 'href': 's3://uds-test-cumulus-staging/NEW_COLLECTION_EXAMPLE_L1B___9/NEW_COLLECTION_EXAMPLE_L1B___9:test_file01/test_file01.nc.stac.json', + 'title': 'metadata stac'}}, + 'bbox': [0.0, 0.0, 0.0, 0.0], + 'stac_extensions': [], + 'collection': 'NEW_COLLECTION_EXAMPLE_L1B___9'}]} + """ + s3 = AwsS3() + s3_keys = [k for k in s3.get_child_s3_files(os.environ['STAGING_BUCKET'], + f"stage_out/successful_features_{starting_time}", + )] + s3_keys = sorted(s3_keys) + print(f's3_keys: {s3_keys}') + self.assertTrue(len(s3_keys) > 0, f'empty files in S3') + local_file = s3.set_s3_url(f's3://{os.environ["STAGING_BUCKET"]}/{s3_keys[-1][0]}').download(tmp_dir_name) + successful_feature_collection = ItemCollection.from_dict(FileUtils.read_json(local_file)) + successful_feature_collection = list(successful_feature_collection.items) + self.assertEqual(len(successful_feature_collection), total_files, f'wrong length: {successful_feature_collection}') + return + + + def test_06_retrieve_granule(self): + temp_collection_id = f'URN:NASA:UNITY:{self.tenant}:{self.tenant_venue}:{self.collection_name}___{self.collection_version}' + post_url = f'{self._url_prefix}/collections/{temp_collection_id}/items?limit=2' + # post_url = f'{self._url_prefix}/collections/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:UDS_COLLECTION___2312041030/items?limit=2&offset=URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:UDS_COLLECTION___2312041030:test_file02' + print(post_url) + headers = { + 'Authorization': f'Bearer {self.cognito_login.token}', + 'Content-Type': 'application/json', + } + query_result = requests.get(url=post_url, + headers=headers) + self.assertEqual(query_result.status_code, 200, f'wrong status code. {query_result.text}') + response_json = json.loads(query_result.content.decode()) + print(response_json) + self.assertTrue(len(response_json['features']) > 0, f'empty granules. Need collections to compare') + for each_feature in response_json['features']: + stac_item = pystac.Item.from_dict(each_feature) + validation_result = stac_item.validate() + self.assertTrue(isinstance(validation_result, list), + f'wrong validation for : {json.dumps(each_feature, indent=4)}. details: {validation_result}') + self.assertTrue('c_data3' in stac_item.properties, f'missing custom_metadata: {each_feature}') + return diff --git a/tests/integration_tests/test_custom_metadata_end_to_end.py b/tests/integration_tests/test_custom_metadata_end_to_end.py index c6c14cc2..b548df66 100644 --- a/tests/integration_tests/test_custom_metadata_end_to_end.py +++ b/tests/integration_tests/test_custom_metadata_end_to_end.py @@ -41,7 +41,7 @@ def setUp(self) -> None: self.tenant = 'UDS_LOCAL_TEST' # 'uds_local_test' # 'uds_sandbox' self.tenant_venue = 'DEV' # 'DEV1' # 'dev' self.collection_name = 'UDS_COLLECTION' # 'uds_collection' # 'sbx_collection' - self.collection_version = '24.03.29.10.00'.replace('.', '') # '2402011200' + self.collection_version = '24.04.25.09.00'.replace('.', '') # '2402011200' self.custom_metadata_body = { 'tag': {'type': 'keyword'}, diff --git a/tf-module/unity-cumulus/granules_cnm_ingester.tf b/tf-module/unity-cumulus/granules_cnm_ingester.tf index 1d45a863..49a435e2 100644 --- a/tf-module/unity-cumulus/granules_cnm_ingester.tf +++ b/tf-module/unity-cumulus/granules_cnm_ingester.tf @@ -11,6 +11,14 @@ resource "aws_lambda_function" "granules_cnm_ingester" { variables = { LOG_LEVEL = var.log_level SNS_TOPIC_ARN = var.cnm_sns_topic_arn + ES_URL = aws_elasticsearch_domain.uds-es.endpoint + ES_PORT = 443 + CUMULUS_WORKFLOW_SQS_URL = var.workflow_sqs_url + CUMULUS_LAMBDA_PREFIX = var.prefix + REPORT_TO_EMS = var.report_to_ems + CUMULUS_WORKFLOW_NAME = "CatalogGranule" + UNITY_DEFAULT_PROVIDER = var.unity_default_provider + COLLECTION_CREATION_LAMBDA_NAME = "NA" } } diff --git a/tf-module/unity-cumulus/main.tf b/tf-module/unity-cumulus/main.tf index 2a29591e..bff1222a 100644 --- a/tf-module/unity-cumulus/main.tf +++ b/tf-module/unity-cumulus/main.tf @@ -148,7 +148,6 @@ resource "aws_lambda_function" "uds_api_1" { UNITY_DEFAULT_PROVIDER = var.unity_default_provider COLLECTION_CREATION_LAMBDA_NAME = "arn:aws:lambda:${var.aws_region}:${local.account_id}:function:${var.prefix}-uds_api_1" SNS_TOPIC_ARN = var.cnm_sns_topic_arn - UNITY_DEFAULT_PROVIDER = var.unity_default_provider DAPA_API_PREIFX_KEY = var.dapa_api_prefix CORS_ORIGINS = var.cors_origins UDS_BASE_URL = var.uds_base_url