Skip to content

Commit 55ddcf5

Browse files
authored
Merge pull request #149 from unity-sds/output-to-file
feat: writing output content to a file if ENV is provided
2 parents 854f8e8 + b582adc commit 55ddcf5

File tree

12 files changed

+99
-18
lines changed

12 files changed

+99
-18
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [3.8.0] - 2023-05-04
9+
### Added
10+
- [#149](https://github.com/unity-sds/unity-data-services/pull/149) feat: writing output content to a file if ENV is provided
11+
812
## [3.7.1] - 2023-05-04
913
### Changed
1014
- [#148](https://github.com/unity-sds/unity-data-services/pull/148) fix: use cas structure to generate metadata for stac

cumulus_lambda_functions/docker_entrypoint/__main__.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,32 @@
44

55
from cumulus_lambda_functions.stage_in_out.catalog_granules_factory import CatalogGranulesFactory
66
from cumulus_lambda_functions.stage_in_out.download_granules_factory import DownloadGranulesFactory
7-
from cumulus_lambda_functions.stage_in_out.download_granules_s3 import DownloadGranulesS3
87
from cumulus_lambda_functions.stage_in_out.search_granules_factory import SearchGranulesFactory
8+
from cumulus_lambda_functions.stage_in_out.stage_in_out_utils import StageInOutUtils
99
from cumulus_lambda_functions.stage_in_out.upoad_granules_factory import UploadGranulesFactory
1010

1111

1212
def choose_process():
1313
if argv[1].strip().upper() == 'SEARCH':
1414
logging.info('starting SEARCH script')
15-
return SearchGranulesFactory().get_class(os.getenv('GRANULES_SEARCH_DOMAIN', 'MISSING_GRANULES_SEARCH_DOMAIN')).search()
15+
result_str = SearchGranulesFactory().get_class(os.getenv('GRANULES_SEARCH_DOMAIN', 'MISSING_GRANULES_SEARCH_DOMAIN')).search()
16+
StageInOutUtils.write_output_to_file(result_str)
17+
return result_str
1618
if argv[1].strip().upper() == 'DOWNLOAD':
1719
logging.info('starting DOWNLOAD script')
18-
return DownloadGranulesFactory().get_class(os.getenv('GRANULES_DOWNLOAD_TYPE', 'MISSING_GRANULES_DOWNLOAD_TYPE')).download()
20+
result_str = DownloadGranulesFactory().get_class(os.getenv('GRANULES_DOWNLOAD_TYPE', 'MISSING_GRANULES_DOWNLOAD_TYPE')).download()
21+
StageInOutUtils.write_output_to_file(result_str)
22+
return result_str
1923
if argv[1].strip().upper() == 'UPLOAD':
2024
logging.info('starting UPLOAD script')
21-
return UploadGranulesFactory().get_class(os.getenv('GRANULES_UPLOAD_TYPE', 'MISSING_GRANULES_UPLOAD_TYPE')).upload()
25+
result_str = UploadGranulesFactory().get_class(os.getenv('GRANULES_UPLOAD_TYPE', 'MISSING_GRANULES_UPLOAD_TYPE')).upload()
26+
StageInOutUtils.write_output_to_file(result_str)
27+
return result_str
2228
if argv[1].strip().upper() == 'CATALOG':
2329
logging.info('starting CATALOG script')
24-
return CatalogGranulesFactory().get_class(os.getenv('GRANULES_CATALOG_TYPE', 'MISSING_GRANULES_CATALOG_TYPE')).catalog()
30+
result_str = CatalogGranulesFactory().get_class(os.getenv('GRANULES_CATALOG_TYPE', 'MISSING_GRANULES_CATALOG_TYPE')).catalog()
31+
StageInOutUtils.write_output_to_file(result_str)
32+
return result_str
2533
raise ValueError(f'invalid argument: {argv}')
2634

2735

cumulus_lambda_functions/stage_in_out/catalog_granules_unity.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import logging
55
import os
66

7+
78
LOGGER = logging.getLogger(__name__)
89

910

cumulus_lambda_functions/stage_in_out/download_granules_daac.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import logging
77
import os
88

9+
910
LOGGER = logging.getLogger(__name__)
1011

1112

cumulus_lambda_functions/stage_in_out/search_granules_cmr.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,4 +100,5 @@ def search(self, **kwargs) -> str:
100100
if len(temp_results) < page_size:
101101
break
102102
results = self.__get_correct_result_count(results)
103-
return json.dumps(StacUtils.reduce_stac_list_to_data_links(results)) if self.__filter_results else json.dumps(results)
103+
results = StacUtils.reduce_stac_list_to_data_links(results) if self.__filter_results else results
104+
return json.dumps(results)

cumulus_lambda_functions/stage_in_out/search_granules_unity.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
from cumulus_lambda_functions.cumulus_dapa_client.dapa_client import DapaClient
66
from cumulus_lambda_functions.cumulus_stac.stac_utils import StacUtils
7-
from cumulus_lambda_functions.lib.utils.file_utils import FileUtils
87
from cumulus_lambda_functions.stage_in_out.search_granules_abstract import SearchGranulesAbstract
98

109
LOGGER = logging.getLogger(__name__)
@@ -51,4 +50,5 @@ def search(self, **kwargs) -> str:
5150
self.__set_props_from_env()
5251
dapa_client = DapaClient().with_verify_ssl(self.__verify_ssl)
5352
granules_result = dapa_client.get_all_granules(self.__collection_id, self.__limit, self.__date_from, self.__date_to)
54-
return json.dumps(StacUtils.reduce_stac_list_to_data_links(granules_result)) if self.__filter_results else json.dumps(granules_result)
53+
granules_result = StacUtils.reduce_stac_list_to_data_links(granules_result) if self.__filter_results else granules_result
54+
return json.dumps(granules_result)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import json
2+
import logging
3+
import os
4+
from typing import Union
5+
6+
from cumulus_lambda_functions.lib.utils.file_utils import FileUtils
7+
8+
LOGGER = logging.getLogger(__name__)
9+
10+
11+
class StageInOutUtils:
12+
OUTPUT_FILE = 'OUTPUT_FILE'
13+
14+
@staticmethod
15+
def write_output_to_file(output_json: Union[dict, str, list]):
16+
if StageInOutUtils.OUTPUT_FILE not in os.environ:
17+
LOGGER.debug(f'Not writing output to file due to missing {StageInOutUtils.OUTPUT_FILE} in ENV')
18+
return
19+
output_filepath = os.environ.get(StageInOutUtils.OUTPUT_FILE)
20+
FileUtils.mk_dir_p(os.path.dirname(output_filepath))
21+
output_str = json.dumps(output_json) if not isinstance(output_json, str) else output_json
22+
with open(output_filepath, 'w') as ff:
23+
ff.write(output_str)
24+
return

cumulus_lambda_functions/stage_in_out/upload_granules_s3.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import json
22

33
from cumulus_lambda_functions.stage_in_out.search_collections_factory import SearchCollectionsFactory
4-
from cumulus_lambda_functions.stage_in_out.search_granules_factory import SearchGranulesFactory
54
from cumulus_lambda_functions.stage_in_out.upload_granules_abstract import UploadGranulesAbstract
65
import logging
76
import os

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
setup(
2020
name="cumulus_lambda_functions",
21-
version="3.7.1",
21+
version="3.8.0",
2222
packages=find_packages(),
2323
install_requires=install_requires,
2424
tests_require=['mock', 'nose', 'sphinx', 'sphinx_rtd_theme', 'coverage', 'pystac', 'python-dotenv', 'jsonschema'],

tests/cumulus_lambda_functions/stage_in_out/__init__.py

Whitespace-only changes.
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import os
2+
import tempfile
3+
from unittest import TestCase
4+
5+
from cumulus_lambda_functions.lib.utils.file_utils import FileUtils
6+
from cumulus_lambda_functions.stage_in_out.stage_in_out_utils import StageInOutUtils
7+
8+
9+
class TestStageInOutUtils(TestCase):
10+
def test_01(self):
11+
with tempfile.TemporaryDirectory() as tmp_dir_name:
12+
os.environ[StageInOutUtils.OUTPUT_FILE] = os.path.join(tmp_dir_name, 'SAMPLE', 'output.json')
13+
StageInOutUtils.write_output_to_file({'test1': True})
14+
self.assertTrue(FileUtils.file_exist(os.environ.get(StageInOutUtils.OUTPUT_FILE)))
15+
return
16+
17+
def test_02(self):
18+
with tempfile.TemporaryDirectory() as tmp_dir_name:
19+
os.environ[StageInOutUtils.OUTPUT_FILE] = os.path.join(tmp_dir_name, 'output.json')
20+
StageInOutUtils.write_output_to_file({'test1': True})
21+
self.assertTrue(FileUtils.file_exist(os.environ.get(StageInOutUtils.OUTPUT_FILE)))
22+
return

tests/integration_tests/test_docker_entry.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,16 @@ def test_01_search_part_01(self):
3535
if len(argv) > 1:
3636
argv.pop(-1)
3737
argv.append('SEARCH')
38-
search_result = choose_process()
39-
search_result = json.loads(search_result)
40-
self.assertTrue(isinstance(search_result, list), f'search_result is not list: {search_result}')
41-
self.assertEqual(len(search_result), 4000, f'wrong length')
42-
search_result = set([k['id'] for k in search_result])
43-
self.assertEqual(len(search_result),4000, f'wrong length. not unique')
38+
with tempfile.TemporaryDirectory() as tmp_dir_name:
39+
os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json')
40+
search_result_str = choose_process()
41+
search_result = json.loads(search_result_str)
42+
self.assertTrue(isinstance(search_result, list), f'search_result is not list: {search_result}')
43+
self.assertEqual(len(search_result), 4000, f'wrong length')
44+
search_result = set([k['id'] for k in search_result])
45+
self.assertEqual(len(search_result),4000, f'wrong length. not unique')
46+
self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file')
47+
self.assertEqual(sorted(json.dumps(FileUtils.read_json(os.environ['OUTPUT_FILE']))), sorted(search_result_str), f'not identical result')
4448
return
4549

4650
def test_01_search_part_02(self):
@@ -220,6 +224,7 @@ def test_02_download(self):
220224
argv.pop(-1)
221225
argv.append('DOWNLOAD')
222226
with tempfile.TemporaryDirectory() as tmp_dir_name:
227+
os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json')
223228
os.environ['DOWNLOAD_DIR'] = tmp_dir_name
224229
download_result = choose_process()
225230
self.assertTrue(isinstance(download_result, list), f'download_result is not list: {download_result}')
@@ -228,6 +233,7 @@ def test_02_download(self):
228233
for each_granule in zip(granule_json, download_result):
229234
remote_filename = os.path.basename(each_granule[0]['assets']['data']['href'])
230235
self.assertEqual(each_granule[1]['assets']['data']['href'], os.path.join(tmp_dir_name, remote_filename), f"mismatched: {each_granule[0]['assets']['data']['href']}")
236+
self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file')
231237
return
232238

233239
def test_02_download__daac(self):
@@ -242,6 +248,7 @@ def test_02_download__daac(self):
242248
argv.pop(-1)
243249
argv.append('DOWNLOAD')
244250
with tempfile.TemporaryDirectory() as tmp_dir_name:
251+
os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json')
245252
os.environ['DOWNLOAD_DIR'] = tmp_dir_name
246253
download_result = choose_process()
247254
self.assertTrue(isinstance(download_result, list), f'download_result is not list: {download_result}')
@@ -254,6 +261,7 @@ def test_02_download__daac(self):
254261
remote_filename = os.path.basename(each_granule[0]['assets']['data']['href'])
255262
self.assertEqual(each_granule[1]['assets']['data']['href'], os.path.join(tmp_dir_name, remote_filename),
256263
f"mismatched: {each_granule[0]['assets']['data']['href']}")
264+
self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file')
257265
return
258266

259267
def test_02_download__daac__from_file(self):
@@ -268,6 +276,7 @@ def test_02_download__daac__from_file(self):
268276
argv.pop(-1)
269277
argv.append('DOWNLOAD')
270278
with tempfile.TemporaryDirectory() as tmp_dir_name:
279+
os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json')
271280
granule_json_file = os.path.join(tmp_dir_name, 'input_file.json')
272281
downloading_dir = os.path.join(tmp_dir_name, 'downloading_dir')
273282
FileUtils.mk_dir_p(downloading_dir)
@@ -285,6 +294,7 @@ def test_02_download__daac__from_file(self):
285294
remote_filename = os.path.basename(each_granule[0]['assets']['data']['href'])
286295
self.assertEqual(each_granule[1]['assets']['data']['href'], os.path.join(downloading_dir, remote_filename),
287296
f"mismatched: {each_granule[0]['assets']['data']['href']}")
297+
self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file')
288298
return
289299

290300
def test_02_download__daac_error(self):
@@ -299,6 +309,7 @@ def test_02_download__daac_error(self):
299309
argv.pop(-1)
300310
argv.append('DOWNLOAD')
301311
with tempfile.TemporaryDirectory() as tmp_dir_name:
312+
os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json')
302313
# TODO this is downloading a login page HTML
303314
os.environ['DOWNLOAD_DIR'] = tmp_dir_name
304315
download_result = choose_process()
@@ -307,6 +318,7 @@ def test_02_download__daac_error(self):
307318
error_file = os.path.join(tmp_dir_name, 'error.log')
308319
if FileUtils.file_exist(error_file):
309320
self.assertTrue(False, f'some downloads failed. error.log exists. {FileUtils.read_json(error_file)}')
321+
self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file')
310322
return
311323

312324
def test_02_download__from_file(self):
@@ -316,6 +328,7 @@ def test_02_download__from_file(self):
316328
argv.append('DOWNLOAD')
317329
os.environ['GRANULES_DOWNLOAD_TYPE'] = 'S3'
318330
with tempfile.TemporaryDirectory() as tmp_dir_name:
331+
os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json')
319332
granule_json_file = os.path.join(tmp_dir_name, 'input_file.json')
320333
downloading_dir = os.path.join(tmp_dir_name, 'downloading_dir')
321334
FileUtils.mk_dir_p(downloading_dir)
@@ -333,6 +346,7 @@ def test_02_download__from_file(self):
333346
remote_filename = os.path.basename(each_granule[0]['assets']['data']['href'])
334347
self.assertEqual(each_granule[1]['assets']['data']['href'], os.path.join(downloading_dir, remote_filename),
335348
f"mismatched: {each_granule[0]['assets']['data']['href']}")
349+
self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file')
336350
return
337351

338352
def test_03_upload(self):
@@ -354,6 +368,7 @@ def test_03_upload(self):
354368
argv.append('UPLOAD')
355369

356370
with tempfile.TemporaryDirectory() as tmp_dir_name:
371+
os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json')
357372
os.environ['UPLOAD_DIR'] = tmp_dir_name
358373
with open(os.path.join(tmp_dir_name, 'test_file01.nc'), 'w') as ff:
359374
ff.write('sample_file')
@@ -465,6 +480,7 @@ def test_03_upload(self):
465480
self.assertTrue('data' in upload_result['assets'], 'missing assets#data')
466481
self.assertTrue('href' in upload_result['assets']['data'], 'missing assets#data#href')
467482
self.assertTrue(upload_result['assets']['data']['href'].startswith(f's3://{os.environ["STAGING_BUCKET"]}/'))
483+
self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file')
468484
return
469485

470486
def test_03_upload_catalog(self):
@@ -487,6 +503,7 @@ def test_03_upload_catalog(self):
487503
argv.append('UPLOAD')
488504

489505
with tempfile.TemporaryDirectory() as tmp_dir_name:
506+
os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json')
490507
os.environ['UPLOAD_DIR'] = '' # not needed
491508
os.environ['CATALOG_FILE'] = os.path.join(tmp_dir_name, 'catalog.json')
492509
with open(os.path.join(tmp_dir_name, 'test_file01.nc'), 'w') as ff:
@@ -629,6 +646,7 @@ def test_03_upload_catalog(self):
629646
self.assertTrue('data' in upload_result['assets'], 'missing assets#data')
630647
self.assertTrue('href' in upload_result['assets']['data'], 'missing assets#data#href')
631648
self.assertTrue(upload_result['assets']['data']['href'].startswith(f's3://{os.environ["STAGING_BUCKET"]}/'))
649+
self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file')
632650
return
633651

634652
def test_04_catalog(self):
@@ -646,6 +664,9 @@ def test_04_catalog(self):
646664
if len(argv) > 1:
647665
argv.pop(-1)
648666
argv.append('CATALOG')
649-
catalog_result = choose_process()
650-
self.assertEqual('registered', catalog_result, 'wrong status')
667+
with tempfile.TemporaryDirectory() as tmp_dir_name:
668+
os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json')
669+
catalog_result = choose_process()
670+
self.assertEqual('registered', catalog_result, 'wrong status')
671+
self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file')
651672
return

0 commit comments

Comments
 (0)