unity-sds · ngachung · Jan 23, 2023 · Dec 19, 2022 · Jan 5, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,15 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.10.0] - 2022-12-19
+### Added
+- [#104](https://github.com/unity-sds/unity-data-services/pull/104) added Updated time in collection & item STAC dictionaries
+### Changed
+- [#104](https://github.com/unity-sds/unity-data-services/pull/104) use pystac library objects to create collection and item STAC dictionaries
+
+## [1.9.3] - 2022-12-19
+### Added
+- [#103](https://github.com/unity-sds/unity-data-services/pull/103) return a dictionary including HREFs instead of a string REGISTERED
 ## [1.9.2] - 2022-11-16
 ### Fixed
 - [#100](https://github.com/unity-sds/unity-data-services/pull/100) status=completed is only for granules, not for collections

diff --git a/ci.cd/Makefile b/ci.cd/Makefile
@@ -42,4 +42,6 @@ update_lambda_function_mcp_dev_4:
 update_lambda_function_mcp_dev_5:
 	aws lambda update-function-code --s3-key unity_cumulus_lambda/cumulus_lambda_functions_deployment.zip --s3-bucket uds-dev-cumulus-public --function-name arn:aws:lambda:us-west-2:237868187491:function:uds-dev-cumulus-cumulus_collections_creation_dapa_facade --publish &>/dev/null
 
+mcp_dev: upload_lambda_mcp_dev update_lambda_function_mcp_dev_1 update_lambda_function_mcp_dev_2 update_lambda_function_mcp_dev_4 update_lambda_function_mcp_dev_5
+
 
diff --git a/cumulus_lambda_functions/cumulus_stac/collection_transformer.py b/cumulus_lambda_functions/cumulus_stac/collection_transformer.py
@@ -3,8 +3,10 @@
 from urllib.parse import quote_plus, urlparse, unquote_plus
 
 import pystac
+from pystac.utils import datetime_to_str
+
 from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator
-from pystac import Link
+from pystac import Link, Collection, Extent, SpatialExtent, TemporalExtent, Summaries
 
 from cumulus_lambda_functions.cumulus_stac.stac_transformer_abstract import StacTransformerAbstract
 from cumulus_lambda_functions.lib.time_utils import TimeUtils
@@ -287,6 +289,7 @@
 
 class CollectionTransformer(StacTransformerAbstract):
     def __init__(self, report_to_ems:bool = True, include_date_range=False):
+        super().__init__()
         self.__stac_collection_schema = json.loads(STAC_COLLECTION_SCHEMA)
         self.__cumulus_collection_schema = {}
         self.__report_to_ems = report_to_ems
@@ -307,7 +310,7 @@ def generate_target_link_url(self, regex: str = None, bucket: str = None):
             href_link[0] = bucket
         return f"./collection.json?bucket={href_link[0]}&regex={quote_plus(href_link[1])}"
 
-    def __convert_to_stac_links(self, collection_file_obj: dict, rel_type: str = 'item'):
+    def __convert_to_stac_link_obj(self, collection_file_obj: dict, rel_type: str = 'item'):
         """
         expected output
         {
@@ -329,23 +332,17 @@ def __convert_to_stac_links(self, collection_file_obj: dict, rel_type: str = 'it
         :param collection_file_obj:
         :return: dict
         """
-        if collection_file_obj is None:
-            return {}
-        stac_link = {
-            'rel': rel_type,
-        }
+        temp_link = Link(target=self.generate_target_link_url(
+                collection_file_obj['regex'] if 'regex' in collection_file_obj else None,
+                collection_file_obj['bucket'] if 'bucket' in collection_file_obj else None,
+            ),
+            rel=rel_type
+            )
         if 'type' in collection_file_obj:
-            stac_link['type'] = collection_file_obj['type']
+            temp_link.media_type = collection_file_obj['type']
         if 'sampleFileName' in collection_file_obj:
-            stac_link['title'] = collection_file_obj['sampleFileName']
-        stac_link['href'] = self.generate_target_link_url(
-            collection_file_obj['regex'] if 'regex' in collection_file_obj else None,
-            collection_file_obj['bucket'] if 'bucket' in collection_file_obj else None,
-        )
-        return stac_link
-
-    # def to_pystac_link_obj(self, input_dict: dict):
-    #     return
+            temp_link.title = collection_file_obj['sampleFileName']
+        return temp_link
 
     def to_stac(self, source: dict) -> dict:
         source_sample = {
@@ -392,60 +389,39 @@ def to_stac(self, source: dict) -> dict:
             "url_path": "{cmrMetadata.Granule.Collection.ShortName}___{cmrMetadata.Granule.Collection.VersionId}",
             "timestamp": 1647992849273
         }
-        # TemporalIntervals([
-        #     datetime.strptime(source['dateFrom'])
-        # ])
-        # stac_collection = pystac.Collection(
-        #     id=f"{source['name']}___{source['version']}",
-        #     description='TODO',
-        #     extent=Extent(
-        #         SpatialExtent([[0, 0, 0, 0]]),
-        #         TemporalExtent([[source['dateFrom'] if 'dateFrom' in source else None,
-        #                          source['dateTo'] if 'dateTo' in source else None]])
-        #     ),
-        #     summaries=Summaries({
-        #         "granuleId": [source['granuleId'] if 'granuleId' in source else ''],
-        #         "granuleIdExtraction": [source['granuleIdExtraction'] if 'granuleIdExtraction' in source else ''],
-        #         "process": [source['process'] if 'process' in source else ''],
-        #         "totalGranules": [source['total_size'] if 'total_size' in source else -1],
-        #     }),
-        # )
-        # stac_collection.get_root_link().target = './collection.json'
-        # stac_collection.add_links([Link.from_dict(k) for k in [self.__convert_to_stac_links(k) for k in source['files']]])
-        stac_collection = {
-            "type": "Collection",
-            "stac_version": "1.0.0",
-            # "stac_extensions": [],
-            "id": f"{source['name']}___{source['version']}",
-            "description": "TODO",
-            "license": "proprietary",
-            # "keywords": [],
-            "providers": [],
-            "extent": {
-                "spatial": {
-                    "bbox": [[0, 0, 0, 0]]
-                },
-                "temporal": {
-                    "interval": [[source['dateFrom'] if 'dateFrom' in source else None,
-                                 source['dateTo'] if 'dateTo' in source else None
-                                  ]]
-                }
-            },
-            "assets": {},
-            "summaries": {
+        temporal_extent = []
+        if 'dateFrom' in source:
+            temporal_extent.append(self.get_time_obj(source['dateFrom']))
+        if 'dateTo' in source:
+            temporal_extent.append(self.get_time_obj(source['dateTo']))
+        stac_collection = Collection(
+            id=f"{source['name']}___{source['version']}",
+            # href=f"https://ideas-api-to-be-hosted/slcp/collections/{input_collection['ShortName']}::{input_collection['VersionId']}",
+            description="TODO",
+            extent=Extent(
+                SpatialExtent([[0.0, 0.0, 0.0, 0.0]]),
+                TemporalExtent([temporal_extent])
+            ),
+            license="proprietary",
+            providers=[],
+            # title=input_collection['LongName'],
+            # keywords=[input_collection['SpatialKeywords']['Keyword']],
+            summaries=Summaries({
+                "updated": [datetime_to_str(TimeUtils().parse_from_unix(source['updatedAt'], True).get_datetime_obj())],
                 "granuleId": [source['granuleId'] if 'granuleId' in source else ''],
                 "granuleIdExtraction": [source['granuleIdExtraction'] if 'granuleIdExtraction' in source else ''],
                 "process": [source['process'] if 'process' in source else ''],
                 "totalGranules": [source['total_size'] if 'total_size' in source else -1],
-            },
-            "links": [self.__convert_to_stac_links({
-                "regex": source['url_path'] if 'url_path' in source else './collection.json',
-                "sampleFileName": source['sampleFileName'],
-                "type": "application/json",
+            }),
+            # assets={}
+        )
+        stac_collection.links = [self.__convert_to_stac_link_obj({
+            "regex": source['url_path'] if 'url_path' in source else './collection.json',
+            "sampleFileName": source['sampleFileName'],
+            "type": "application/json",
 
-            }, 'root')] + [self.__convert_to_stac_links(k) for k in source['files']],
-        }
-        return stac_collection
+        }, 'root')] + [self.__convert_to_stac_link_obj(k) for k in source['files']]
+        return stac_collection.to_dict(include_self_link=False, transform_hrefs=False)
 
     def get_href(self, input_href: str):
         parse_result = urlparse(input_href)

diff --git a/cumulus_lambda_functions/cumulus_stac/item_transformer.py b/cumulus_lambda_functions/cumulus_stac/item_transformer.py
@@ -1,5 +1,8 @@
 import json
 
+from pystac import Item, Asset, Link
+from pystac.utils import datetime_to_str
+
 from cumulus_lambda_functions.cumulus_stac.stac_transformer_abstract import StacTransformerAbstract
 from cumulus_lambda_functions.lib.json_validator import JsonValidator
 from cumulus_lambda_functions.lib.time_utils import TimeUtils
@@ -292,6 +295,7 @@
 
 class ItemTransformer(StacTransformerAbstract):
     def __init__(self):
+        super().__init__()
         self.__stac_item_schema = json.loads(STAC_ITEM_SCHEMA)
         self.__cumulus_granule_schema = {}
 
@@ -307,33 +311,13 @@ def __get_asset_name(self, input_dict):
             return 'metadata__data'
         return input_dict['type']
 
-    def __get_assets(self, input_dict):
-        """
-        Sample:
-        {
-            "bucket": "am-uds-dev-cumulus-internal",
-            "key": "ATMS_SCIENCE_Group___1/P1570515ATMSSCIENCEAAT16032024518500.PDS",
-            "size": 760,
-            "fileName": "P1570515ATMSSCIENCEAAT16032024518500.PDS",
-            "source": "data/SNPP_ATMS_Level0_T/ATMS_SCIENCE_Group/2016/031//P1570515ATMSSCIENCEAAT16032024518500.PDS",
-            "type": "data"
-        }
-        :param input_dict:
-        :return:
-        """
-        asset_dict = {
-            'href': f"s3://{input_dict['bucket']}/{input_dict['key']}",
-            'title': input_dict['fileName'],
-            'description': input_dict['fileName'],
-            # 'type': '',
-            # 'roles': '',
-        }
-        return asset_dict
-
-    def __get_datetime_from_source(self, source: dict, datetime_key: str):
-        if datetime_key not in source:
-            return '1970-01-01T00:00:00Z'
-        return f"{source[datetime_key]}{'' if source[datetime_key].endswith('Z') else 'Z'}"
+    def __get_asset_obj(self, input_dict):
+        asset = Asset(
+            href=f"s3://{input_dict['bucket']}/{input_dict['key']}",
+            title=input_dict['fileName'],
+            description=input_dict['fileName'],
+        )
+        return asset
 
     def to_stac(self, source: dict) -> dict:
         """
@@ -402,35 +386,28 @@ def to_stac(self, source: dict) -> dict:
 
         cumulus_file_validator = JsonValidator(CUMULUS_FILE_SCHEMA)
         validated_files = [k for k in source['files'] if cumulus_file_validator.validate(k) is None]
-        minimum_stac_item = {
-            "stac_version": "1.0.0",
-            "stac_extensions": [],
-            "type": "Feature",
-            "id": source['granuleId'],
-            "bbox": [0, 0, 0, 0, ],
-            "geometry": {
-                "type": "Point",
-                "coordinates": [0, 0]
+        stac_item = Item(
+            id=source['granuleId'],
+            bbox=[0.0, 0.0, 0.0, 0.0],
+            properties={
+                # "datetime": f"{TimeUtils.decode_datetime(source['createdAt'], False)}Z",
+                "start_datetime": datetime_to_str(self.get_time_obj(source['beginningDateTime'])),
+                "end_datetime": datetime_to_str(self.get_time_obj(source['endingDateTime'])),
+                "created": datetime_to_str(self.get_time_obj(source['productionDateTime'])),
+                "updated": datetime_to_str(TimeUtils().parse_from_unix(source['updatedAt'], True).get_datetime_obj()),
             },
-            "properties": {
-                "datetime": f"{TimeUtils.decode_datetime(source['createdAt'], False)}Z",
-                "start_datetime": self.__get_datetime_from_source(source, 'beginningDateTime'),
-                "end_datetime": self.__get_datetime_from_source(source, 'endingDateTime'),
-                "created": self.__get_datetime_from_source(source, 'productionDateTime'),
-                # "created": source['processingEndDateTime'],  # TODO
+            collection=source['collectionId'],
+            assets={self.__get_asset_name(k): self.__get_asset_obj(k) for k in validated_files},
+            geometry={
+                "type": "Point",
+                "coordinates": [0.0, 0.0]
             },
-            "collection": source['collectionId'],
-            "links": [
-                {
-                    "rel": "collection",
-                    "href": ".",
-                    # "type": "application/json",
-                    # "title": "Simple Example Collection"
-                }
-            ],
-            "assets": {self.__get_asset_name(k): self.__get_assets(k) for k in validated_files}
-        }
-        return minimum_stac_item
+            datetime=TimeUtils().parse_from_unix(source['createdAt'], True).get_datetime_obj(),
+        )
+        stac_item.links = [
+            Link(rel='collection', target='.')
+        ]
+        return stac_item.to_dict(include_self_link=False, transform_hrefs=False)
 
     def from_stac(self, source: dict) -> dict:
         return {}
diff --git a/cumulus_lambda_functions/cumulus_stac/stac_transformer_abstract.py b/cumulus_lambda_functions/cumulus_stac/stac_transformer_abstract.py
@@ -1,11 +1,37 @@
+import logging
 from abc import ABC, abstractmethod
 
+from cumulus_lambda_functions.lib.time_utils import TimeUtils
+
+LOGGER = logging.getLogger(__name__)
+
 
 class StacTransformerAbstract(ABC):
+    def __init__(self) -> None:
+        super().__init__()
+        self._dt_formats = [
+            '%y-%m-%dT%H:%M:%S.%f%z',
+            '%y-%m-%dT%H:%M:%S%z',
+            '%Y-%m-%dT%H:%M:%S.%f%z',
+            '%Y-%m-%dT%H:%M:%S%z',
+            '%Y-%m-%dT%H:%M:%S.%f',
+        ]
+
+    def get_time_obj(self, datetime_str: str):
+        if datetime_str is None:
+            return None
+        for each_fmt in self._dt_formats:
+            try:
+                dt_utils = TimeUtils().parse_from_str(datetime_str, each_fmt)
+                return dt_utils.get_datetime_obj()
+            except ValueError as ve1:
+                LOGGER.debug(f'format and value do not match: {each_fmt} v. {datetime_str}')
+        raise ValueError(f'unknown format: {datetime_str}')
+
     @abstractmethod
     def to_stac(self, source: dict) -> dict:
-        return
+        return {}
 
     @abstractmethod
     def from_stac(self, source: dict) -> dict:
-        return
+        return {}
diff --git a/cumulus_lambda_functions/cumulus_upload_granules/upload_granules.py b/cumulus_lambda_functions/cumulus_upload_granules/upload_granules.py
@@ -124,4 +124,4 @@ def start(self):
         }
         LOGGER.debug(f'dapa_body_granules: {dapa_body}')
         dapa_ingest_result = dapa_client.ingest_granules_w_cnm(dapa_body)
-        return dapa_ingest_result
+        return dapa_body
diff --git a/cumulus_lambda_functions/lib/time_utils.py b/cumulus_lambda_functions/lib/time_utils.py
@@ -1,7 +1,7 @@
 import logging
 import calendar
 import time
-from datetime import datetime
+from datetime import datetime, timezone, timedelta
 from time import mktime
 import re
 
@@ -17,6 +17,27 @@ class TimeUtils:
     MMDD_FORMAT = '%Y-%m-%dT%H:%M:%S'
     GB_1 = 1000000000
     YR_IN_SECOND = 31536000
+
+    def __init__(self):
+        self.__time_obj = datetime.utcnow()
+
+    def parse_from_str(self, timestamp_str: str, fmt='%Y-%m-%dT%H:%M:%S%z', in_ms=False):
+        self.__time_obj = datetime.strptime(timestamp_str, fmt)
+        return self
+
+    def parse_from_unix(self, unix_timestamp, in_ms=False):
+        converting_timestamp = unix_timestamp / 1000 if in_ms is True else unix_timestamp
+        self.__time_obj = datetime.fromtimestamp(converting_timestamp, timezone(timedelta(0, 0, 0, 0)))
+        return self
+
+    def get_datetime_obj(self):
+        return self.__time_obj
+
+    def get_datetime_unix(self, in_ms=False):
+        return int(self.__time_obj.timestamp()) if not in_ms else int(self.__time_obj.timestamp() * 1000)
+
+    def get_datetime_str(self, fmt='%Y-%m-%dT%H:%M:%S %z', in_ms=True):
+        return self.__time_obj.strftime(fmt).replace('0000', '00:00')
     @staticmethod
     def get_current_year():
         return datetime.utcnow().year

diff --git a/setup.py b/setup.py
@@ -17,7 +17,7 @@
 
 setup(
     name="cumulus_lambda_functions",
-    version="1.9.2",
+    version="1.10.0",
     packages=find_packages(),
     install_requires=install_requires,
     tests_require=['mock', 'nose', 'sphinx', 'sphinx_rtd_theme', 'coverage', 'pystac', 'python-dotenv', 'jsonschema'],

diff --git a/tests/cumulus_lambda_functions/cumulus_stac/test_collection_transformer.py b/tests/cumulus_lambda_functions/cumulus_stac/test_collection_transformer.py
@@ -7,7 +7,7 @@
 from cumulus_lambda_functions.lib.json_validator import JsonValidator
 
 
-class TestItemTransformer(TestCase):
+class TestCollectionTransformer(TestCase):
     def test_01(self):
         stac_validator = JsonValidator(json.loads(STAC_COLLECTION_SCHEMA))
         source = {
Original file line number	Diff line number	Diff line change
Expand Up		@@ -42,4 +42,6 @@ update_lambda_function_mcp_dev_4:
		update_lambda_function_mcp_dev_5:
		aws lambda update-function-code --s3-key unity_cumulus_lambda/cumulus_lambda_functions_deployment.zip --s3-bucket uds-dev-cumulus-public --function-name arn:aws:lambda:us-west-2:237868187491:function:uds-dev-cumulus-cumulus_collections_creation_dapa_facade --publish &>/dev/null

		mcp_dev: upload_lambda_mcp_dev update_lambda_function_mcp_dev_1 update_lambda_function_mcp_dev_2 update_lambda_function_mcp_dev_4 update_lambda_function_mcp_dev_5