Skip to content

Commit fa897c4

Browse files
authored
Merge pull request #584 from unity-sds/develop
release/9.12.0
2 parents 74c0c73 + 01d41b4 commit fa897c4

File tree

12 files changed

+121
-9
lines changed

12 files changed

+121
-9
lines changed

CHANGELOG.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,26 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [9.12.0] - 2025-05-24
9+
### Changed
10+
- [#585](https://github.com/unity-sds/unity-data-services/pull/585) feat: add ram size in lambdas
11+
12+
## [9.11.9] - 2025-05-23
13+
### Fixed
14+
- [#582](https://github.com/unity-sds/unity-data-services/pull/582) fix: use correct schema
15+
16+
## [9.11.8] - 2025-05-21
17+
### Fixed
18+
- [#580](https://github.com/unity-sds/unity-data-services/pull/580) fix: update-archival-index-mapping
19+
20+
## [9.11.7] - 2025-05-21
21+
### Fixed
22+
- [#578](https://github.com/unity-sds/unity-data-services/pull/578) fix: sending sns to daac
23+
24+
## [9.11.6] - 2025-05-14
25+
### Fixed
26+
- [#575](https://github.com/unity-sds/unity-data-services/pull/575) fix: lib version bump
27+
828
## [9.11.5] - 2025-04-24
929
### Fixed
1030
- [#568](https://github.com/unity-sds/unity-data-services/pull/568) fix: case insensitive

cumulus_lambda_functions/daac_archiver/daac_archiver_logic.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,20 +43,37 @@ def get_cnm_response_json_file(self, potential_file, granule_id):
4343
return None
4444
if len(cnm_response_keys) > 1:
4545
LOGGER.warning(f'more than 1 cnm response file: {cnm_response_keys}')
46-
cnm_response_keys = cnm_response_keys[0]
46+
# assuming the names are the same, and it has processing date in the filename, it is easier to reverse it
47+
cnm_response_keys = sorted(cnm_response_keys)[-1] # sort and get the last one which is supposed to be the most recent one.
4748
LOGGER.debug(f'cnm_response_keys: {cnm_response_keys}')
4849
local_file = self.__s3.set_s3_url(f's3://{self.__s3.target_bucket}/{cnm_response_keys}').download('/tmp')
4950
cnm_response_json = FileUtils.read_json(local_file)
5051
FileUtils.remove_if_exists(local_file)
5152
return cnm_response_json
5253

54+
@staticmethod
55+
def revert_to_s3_url(input_url):
56+
if input_url.startswith("s3://"):
57+
return input_url
58+
if input_url.startswith("http://") or input_url.startswith("https://"):
59+
parts = input_url.split('/', 3)
60+
if len(parts) < 4:
61+
ValueError(f'invalid url: {input_url}')
62+
path_parts = parts[3].split('/', 1)
63+
if len(path_parts) != 2:
64+
ValueError(f'invalid url: {input_url}')
65+
bucket, key = path_parts
66+
return f"s3://{bucket}/{key}"
67+
raise ValueError(f'unknown schema: {input_url}')
68+
5369
def __extract_files(self, uds_cnm_json: dict, daac_config: dict):
5470
granule_files = uds_cnm_json['product']['files']
5571
if 'archiving_types' not in daac_config or len(daac_config['archiving_types']) < 1:
5672
return granule_files # TODO remove missing md5?
5773
archiving_types = {k['data_type']: [] if 'file_extension' not in k else k['file_extension'] for k in daac_config['archiving_types']}
5874
result_files = []
5975
for each_file in granule_files:
76+
LOGGER.debug(f'each_file: {each_file}')
6077
"""
6178
{
6279
"type": "data",
@@ -71,6 +88,7 @@ def __extract_files(self, uds_cnm_json: dict, daac_config: dict):
7188
if each_file['type'] not in archiving_types:
7289
continue
7390
file_extensions = archiving_types[each_file['type']]
91+
each_file['uri'] = self.revert_to_s3_url(each_file['uri'])
7492
if len(file_extensions) < 1:
7593
result_files.append(each_file) # TODO remove missing md5?
7694
temp_filename = each_file['name'].upper().strip()
@@ -79,28 +97,36 @@ def __extract_files(self, uds_cnm_json: dict, daac_config: dict):
7997
return result_files
8098

8199
def send_to_daac_internal(self, uds_cnm_json: dict):
100+
LOGGER.debug(f'uds_cnm_json: {uds_cnm_json}')
82101
granule_identifier = UdsCollections.decode_identifier(uds_cnm_json['identifier']) # This is normally meant to be for collection. Since our granule ID also has collection id prefix. we can use this.
83102
self.__archive_index_logic.set_tenant_venue(granule_identifier.tenant, granule_identifier.venue)
84103
daac_config = self.__archive_index_logic.percolate_document(uds_cnm_json['identifier'])
85104
if daac_config is None or len(daac_config) < 1:
86105
LOGGER.debug(f'uds_cnm_json is not configured for archival. uds_cnm_json: {uds_cnm_json}')
87106
return
88107
daac_config = daac_config[0] # TODO This is currently not supporting more than 1 daac.
108+
result = JsonValidator(UdsArchiveConfigIndex.db_record_schema).validate(daac_config)
109+
if result is not None:
110+
raise ValueError(f'daac_config does not have valid schema. Pls re-add the daac config: {result} for {daac_config}')
89111
try:
90112
self.__sns.set_topic_arn(daac_config['daac_sns_topic_arn'])
91113
daac_cnm_message = {
92-
"collection": daac_config['daac_collection_name'],
114+
"collection": {
115+
'name': daac_config['daac_collection_name'],
116+
'version': daac_config['daac_data_version'],
117+
},
93118
"identifier": uds_cnm_json['identifier'],
94119
"submissionTime": f'{TimeUtils.get_current_time()}Z',
95120
"provider": granule_identifier.tenant,
96121
"version": "1.6.0", # TODO this is hardcoded?
97122
"product": {
98123
"name": granule_identifier.id,
99-
"dataVersion": daac_config['daac_data_version'],
124+
# "dataVersion": daac_config['daac_data_version'],
100125
'files': self.__extract_files(uds_cnm_json, daac_config),
101126
}
102127
}
103-
self.__sns.publish_message(json.dumps(daac_cnm_message))
128+
LOGGER.debug(f'daac_cnm_message: {daac_cnm_message}')
129+
self.__sns.set_external_role(daac_config['daac_role_arn'], daac_config['daac_role_session_name']).publish_message(json.dumps(daac_cnm_message), True)
104130
self.__granules_index.update_entry(granule_identifier.tenant, granule_identifier.venue, {
105131
'archive_status': 'cnm_s_success',
106132
'archive_error_message': '',

cumulus_lambda_functions/granules_to_es/granules_index_mapping.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@ class GranulesIndexMapping:
99
"daac_data_version": {
1010
"type": "keyword"
1111
},
12+
"daac_role_arn": {
13+
"type": "keyword"
14+
},
15+
"daac_role_session_name": {
16+
"type": "keyword"
17+
},
1218
"archiving_types": {
1319
"type": "object",
1420
"properties": {

cumulus_lambda_functions/granules_to_es/granules_indexer.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,12 @@ def start(self):
7878
return
7979
self.__cumulus_record = incoming_msg['record']
8080
if len(self.__cumulus_record['files']) < 1:
81+
LOGGER.debug(f'No files in cumulus record. Not inserting to ES')
8182
# TODO ingest updating stage?
8283
return
84+
if 'status' not in self.__cumulus_record or self.__cumulus_record['status'].upper() != 'COMPLETED':
85+
LOGGER.debug(f'missing status or it is NOT COMPLETED status. Not inserting to ES')
86+
return
8387
stac_input_meta = None
8488
potential_files = self.__get_potential_files()
8589
LOGGER.debug(f'potential_files: {potential_files}')

cumulus_lambda_functions/lib/uds_db/archive_index.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,31 @@ class UdsArchiveConfigIndex:
1515
basic_schema = {
1616
'type': 'object',
1717
"additionalProperties": False,
18-
'required': ['daac_collection_id', 'daac_sns_topic_arn', 'daac_data_version', 'collection', 'ss_username', 'archiving_types'],
18+
'required': ['daac_collection_id', 'daac_sns_topic_arn', 'daac_data_version', 'daac_role_arn', 'daac_role_session_name',
19+
'collection', 'ss_username', 'archiving_types'],
1920
'properties': {
2021
'daac_collection_id': {'type': 'string'},
2122
'daac_sns_topic_arn': {'type': 'string'},
2223
'daac_data_version': {'type': 'string'},
24+
'daac_role_arn': {'type': 'string'},
25+
'daac_role_session_name': {'type': 'string'},
26+
'collection': {'type': 'string'},
27+
'ss_username': {'type': 'string'},
28+
'archiving_types': {'type': 'array', 'items': {'type': 'object'}},
29+
}
30+
}
31+
32+
db_record_schema = {
33+
'type': 'object',
34+
'required': ['daac_collection_name', 'daac_sns_topic_arn', 'daac_data_version', 'daac_role_arn',
35+
'daac_role_session_name',
36+
'collection', 'ss_username', 'archiving_types'],
37+
'properties': {
38+
'daac_collection_name': {'type': 'string'},
39+
'daac_sns_topic_arn': {'type': 'string'},
40+
'daac_data_version': {'type': 'string'},
41+
'daac_role_arn': {'type': 'string'},
42+
'daac_role_session_name': {'type': 'string'},
2343
'collection': {'type': 'string'},
2444
'ss_username': {'type': 'string'},
2545
'archiving_types': {'type': 'array', 'items': {'type': 'object'}},

cumulus_lambda_functions/uds_api/dapa/daac_archive_crud.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,17 @@ class DaacUpdateModel(BaseModel):
1919
daac_collection_id: str
2020
daac_data_version: Optional[str] = None
2121
daac_sns_topic_arn: Optional[str] = None
22+
daac_role_arn: Optional[str] = None
23+
daac_role_session_name: Optional[str] = None
2224
archiving_types: Optional[list[ArchivingTypesModel]] = None
2325

2426

2527
class DaacAddModel(BaseModel):
2628
daac_collection_id: str
2729
daac_data_version: str
2830
daac_sns_topic_arn: str
31+
daac_role_arn: str
32+
daac_role_session_name: str
2933
archiving_types: Optional[list[ArchivingTypesModel]] = []
3034

3135

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jsonschema==4.23.0
1515
jsonschema-specifications==2023.12.1
1616
lark==0.12.0
1717
mangum==0.18.0
18-
mdps-ds-lib==1.1.1.dev701
18+
mdps-ds-lib==1.1.1.dev800
1919
pydantic==2.9.2
2020
pydantic_core==2.23.4
2121
pygeofilter==0.2.4
@@ -37,4 +37,4 @@ typing_extensions==4.12.2
3737
tzlocal==5.2
3838
urllib3==1.26.11
3939
uvicorn==0.30.6
40-
xmltodict==0.13.0
40+
xmltodict==0.13.0

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
setup(
1414
name="cumulus_lambda_functions",
15-
version="9.11.5",
15+
version="9.12.0",
1616
packages=find_packages(),
1717
install_requires=install_requires,
1818
package_data={

tf-module/unity-cumulus/daac_archiver.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ resource "aws_lambda_function" "daac_archiver_response" {
3838
handler = "cumulus_lambda_functions.daac_archiver.lambda_function.lambda_handler_response"
3939
runtime = "python3.9"
4040
timeout = 300
41+
memory_size = 256
4142
environment {
4243
variables = {
4344
LOG_LEVEL = var.log_level

tf-module/unity-cumulus/granules_cnm_ingester.tf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ resource "aws_lambda_function" "granules_cnm_ingester" {
66
handler = "cumulus_lambda_functions.granules_cnm_ingester.lambda_function.lambda_handler"
77
runtime = "python3.9"
88
timeout = 300
9+
memory_size = 512
910
reserved_concurrent_executions = var.granules_cnm_ingester__lambda_concurrency # TODO
1011
environment {
1112
variables = {
@@ -66,6 +67,7 @@ resource "aws_lambda_function" "granules_cnm_response_writer" {
6667
handler = "cumulus_lambda_functions.granules_cnm_response_writer.lambda_function.lambda_handler"
6768
runtime = "python3.9"
6869
timeout = 300
70+
memory_size = 256
6971
reserved_concurrent_executions = var.granules_cnm_response_writer__lambda_concurrency # TODO
7072
environment {
7173
variables = {

tf-module/unity-cumulus/main.tf

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ resource "aws_lambda_function" "metadata_s4pa_generate_cmr" {
4343
handler = "cumulus_lambda_functions.metadata_s4pa_generate_cmr.lambda_function.lambda_handler"
4444
runtime = "python3.9"
4545
timeout = 300
46+
memory_size = 256
4647
environment {
4748
variables = {
4849
LOG_LEVEL = var.log_level
@@ -65,6 +66,7 @@ resource "aws_lambda_function" "metadata_cas_generate_cmr" {
6566
handler = "cumulus_lambda_functions.metadata_cas_generate_cmr.lambda_function.lambda_handler"
6667
runtime = "python3.9"
6768
timeout = 300
69+
memory_size = 256
6870
environment {
6971
variables = {
7072
LOG_LEVEL = var.log_level
@@ -86,6 +88,7 @@ resource "aws_lambda_function" "metadata_stac_generate_cmr" {
8688
handler = "cumulus_lambda_functions.metadata_stac_generate_cmr.lambda_function.lambda_handler"
8789
runtime = "python3.9"
8890
timeout = 300
91+
memory_size = 256
8992
environment {
9093
variables = {
9194
LOG_LEVEL = var.log_level
@@ -112,6 +115,7 @@ resource "aws_lambda_function" "granules_to_es" {
112115
handler = "cumulus_lambda_functions.granules_to_es.lambda_function.lambda_handler"
113116
runtime = "python3.9"
114117
timeout = 300
118+
memory_size = 256
115119
environment {
116120
variables = {
117121
LOG_LEVEL = var.log_level
@@ -137,7 +141,7 @@ resource "aws_lambda_function" "uds_api_1" {
137141
handler = "cumulus_lambda_functions.uds_api.web_service.handler"
138142
runtime = "python3.9"
139143
timeout = 300
140-
144+
memory_size = 512
141145
environment {
142146
variables = {
143147
CUMULUS_BASE = var.cumulus_base
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
data "aws_iam_role" "lambda_processing" {
2+
name = split("/", var.lambda_processing_role_arn)[1]
3+
}
4+
5+
resource "aws_iam_policy" "uds_lambda_processing_policy" {
6+
name = "${var.prefix}-uds_lambda_processing_policy"
7+
description = "IAM policy for Lambda to access S3 bucket and publish to SNS topic in another account"
8+
policy = jsonencode({
9+
Version = "2012-10-17",
10+
Statement = [
11+
{
12+
Effect = "Allow",
13+
Action = [
14+
"sts:AssumeRole",
15+
],
16+
"Resource": "arn:aws:iam::*:role/*"
17+
},
18+
]
19+
})
20+
}
21+
22+
resource "aws_iam_role_policy_attachment" "uds_lambda_processing_policy_attachment" {
23+
role = data.aws_iam_role.lambda_processing.name
24+
policy_arn = aws_iam_policy.uds_lambda_processing_policy.arn
25+
}

0 commit comments

Comments
 (0)