diff --git a/CHANGELOG.md b/CHANGELOG.md index 1648c2b4..904e522b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [3.8.0] - 2023-05-04 +### Added +- [#149](https://github.com/unity-sds/unity-data-services/pull/149) feat: writing output content to a file if ENV is provided + ## [3.7.1] - 2023-05-04 ### Changed - [#148](https://github.com/unity-sds/unity-data-services/pull/148) fix: use cas structure to generate metadata for stac diff --git a/cumulus_lambda_functions/docker_entrypoint/__main__.py b/cumulus_lambda_functions/docker_entrypoint/__main__.py index 8510cf31..ac683075 100644 --- a/cumulus_lambda_functions/docker_entrypoint/__main__.py +++ b/cumulus_lambda_functions/docker_entrypoint/__main__.py @@ -4,24 +4,32 @@ from cumulus_lambda_functions.stage_in_out.catalog_granules_factory import CatalogGranulesFactory from cumulus_lambda_functions.stage_in_out.download_granules_factory import DownloadGranulesFactory -from cumulus_lambda_functions.stage_in_out.download_granules_s3 import DownloadGranulesS3 from cumulus_lambda_functions.stage_in_out.search_granules_factory import SearchGranulesFactory +from cumulus_lambda_functions.stage_in_out.stage_in_out_utils import StageInOutUtils from cumulus_lambda_functions.stage_in_out.upoad_granules_factory import UploadGranulesFactory def choose_process(): if argv[1].strip().upper() == 'SEARCH': logging.info('starting SEARCH script') - return SearchGranulesFactory().get_class(os.getenv('GRANULES_SEARCH_DOMAIN', 'MISSING_GRANULES_SEARCH_DOMAIN')).search() + result_str = SearchGranulesFactory().get_class(os.getenv('GRANULES_SEARCH_DOMAIN', 'MISSING_GRANULES_SEARCH_DOMAIN')).search() + StageInOutUtils.write_output_to_file(result_str) + return result_str if argv[1].strip().upper() == 'DOWNLOAD': logging.info('starting DOWNLOAD script') - return DownloadGranulesFactory().get_class(os.getenv('GRANULES_DOWNLOAD_TYPE', 'MISSING_GRANULES_DOWNLOAD_TYPE')).download() + result_str = DownloadGranulesFactory().get_class(os.getenv('GRANULES_DOWNLOAD_TYPE', 'MISSING_GRANULES_DOWNLOAD_TYPE')).download() + StageInOutUtils.write_output_to_file(result_str) + return result_str if argv[1].strip().upper() == 'UPLOAD': logging.info('starting UPLOAD script') - return UploadGranulesFactory().get_class(os.getenv('GRANULES_UPLOAD_TYPE', 'MISSING_GRANULES_UPLOAD_TYPE')).upload() + result_str = UploadGranulesFactory().get_class(os.getenv('GRANULES_UPLOAD_TYPE', 'MISSING_GRANULES_UPLOAD_TYPE')).upload() + StageInOutUtils.write_output_to_file(result_str) + return result_str if argv[1].strip().upper() == 'CATALOG': logging.info('starting CATALOG script') - return CatalogGranulesFactory().get_class(os.getenv('GRANULES_CATALOG_TYPE', 'MISSING_GRANULES_CATALOG_TYPE')).catalog() + result_str = CatalogGranulesFactory().get_class(os.getenv('GRANULES_CATALOG_TYPE', 'MISSING_GRANULES_CATALOG_TYPE')).catalog() + StageInOutUtils.write_output_to_file(result_str) + return result_str raise ValueError(f'invalid argument: {argv}') diff --git a/cumulus_lambda_functions/stage_in_out/catalog_granules_unity.py b/cumulus_lambda_functions/stage_in_out/catalog_granules_unity.py index 9fdf9c20..936244d3 100644 --- a/cumulus_lambda_functions/stage_in_out/catalog_granules_unity.py +++ b/cumulus_lambda_functions/stage_in_out/catalog_granules_unity.py @@ -4,6 +4,7 @@ import logging import os + LOGGER = logging.getLogger(__name__) diff --git a/cumulus_lambda_functions/stage_in_out/download_granules_daac.py b/cumulus_lambda_functions/stage_in_out/download_granules_daac.py index 7732bed5..02cac62f 100644 --- a/cumulus_lambda_functions/stage_in_out/download_granules_daac.py +++ b/cumulus_lambda_functions/stage_in_out/download_granules_daac.py @@ -6,6 +6,7 @@ import logging import os + LOGGER = logging.getLogger(__name__) diff --git a/cumulus_lambda_functions/stage_in_out/search_granules_cmr.py b/cumulus_lambda_functions/stage_in_out/search_granules_cmr.py index f0c6914d..089576f0 100644 --- a/cumulus_lambda_functions/stage_in_out/search_granules_cmr.py +++ b/cumulus_lambda_functions/stage_in_out/search_granules_cmr.py @@ -100,4 +100,5 @@ def search(self, **kwargs) -> str: if len(temp_results) < page_size: break results = self.__get_correct_result_count(results) - return json.dumps(StacUtils.reduce_stac_list_to_data_links(results)) if self.__filter_results else json.dumps(results) + results = StacUtils.reduce_stac_list_to_data_links(results) if self.__filter_results else results + return json.dumps(results) diff --git a/cumulus_lambda_functions/stage_in_out/search_granules_unity.py b/cumulus_lambda_functions/stage_in_out/search_granules_unity.py index 459ee846..9532a829 100644 --- a/cumulus_lambda_functions/stage_in_out/search_granules_unity.py +++ b/cumulus_lambda_functions/stage_in_out/search_granules_unity.py @@ -4,7 +4,6 @@ from cumulus_lambda_functions.cumulus_dapa_client.dapa_client import DapaClient from cumulus_lambda_functions.cumulus_stac.stac_utils import StacUtils -from cumulus_lambda_functions.lib.utils.file_utils import FileUtils from cumulus_lambda_functions.stage_in_out.search_granules_abstract import SearchGranulesAbstract LOGGER = logging.getLogger(__name__) @@ -51,4 +50,5 @@ def search(self, **kwargs) -> str: self.__set_props_from_env() dapa_client = DapaClient().with_verify_ssl(self.__verify_ssl) granules_result = dapa_client.get_all_granules(self.__collection_id, self.__limit, self.__date_from, self.__date_to) - return json.dumps(StacUtils.reduce_stac_list_to_data_links(granules_result)) if self.__filter_results else json.dumps(granules_result) + granules_result = StacUtils.reduce_stac_list_to_data_links(granules_result) if self.__filter_results else granules_result + return json.dumps(granules_result) diff --git a/cumulus_lambda_functions/stage_in_out/stage_in_out_utils.py b/cumulus_lambda_functions/stage_in_out/stage_in_out_utils.py new file mode 100644 index 00000000..acb83159 --- /dev/null +++ b/cumulus_lambda_functions/stage_in_out/stage_in_out_utils.py @@ -0,0 +1,24 @@ +import json +import logging +import os +from typing import Union + +from cumulus_lambda_functions.lib.utils.file_utils import FileUtils + +LOGGER = logging.getLogger(__name__) + + +class StageInOutUtils: + OUTPUT_FILE = 'OUTPUT_FILE' + + @staticmethod + def write_output_to_file(output_json: Union[dict, str, list]): + if StageInOutUtils.OUTPUT_FILE not in os.environ: + LOGGER.debug(f'Not writing output to file due to missing {StageInOutUtils.OUTPUT_FILE} in ENV') + return + output_filepath = os.environ.get(StageInOutUtils.OUTPUT_FILE) + FileUtils.mk_dir_p(os.path.dirname(output_filepath)) + output_str = json.dumps(output_json) if not isinstance(output_json, str) else output_json + with open(output_filepath, 'w') as ff: + ff.write(output_str) + return diff --git a/cumulus_lambda_functions/stage_in_out/upload_granules_s3.py b/cumulus_lambda_functions/stage_in_out/upload_granules_s3.py index 8aba70c2..686e5b99 100644 --- a/cumulus_lambda_functions/stage_in_out/upload_granules_s3.py +++ b/cumulus_lambda_functions/stage_in_out/upload_granules_s3.py @@ -1,7 +1,6 @@ import json from cumulus_lambda_functions.stage_in_out.search_collections_factory import SearchCollectionsFactory -from cumulus_lambda_functions.stage_in_out.search_granules_factory import SearchGranulesFactory from cumulus_lambda_functions.stage_in_out.upload_granules_abstract import UploadGranulesAbstract import logging import os diff --git a/setup.py b/setup.py index 5735a9ad..64a33f72 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ setup( name="cumulus_lambda_functions", - version="3.7.1", + version="3.8.0", packages=find_packages(), install_requires=install_requires, tests_require=['mock', 'nose', 'sphinx', 'sphinx_rtd_theme', 'coverage', 'pystac', 'python-dotenv', 'jsonschema'], diff --git a/tests/cumulus_lambda_functions/stage_in_out/__init__.py b/tests/cumulus_lambda_functions/stage_in_out/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/cumulus_lambda_functions/stage_in_out/test_stage_in_out_utils.py b/tests/cumulus_lambda_functions/stage_in_out/test_stage_in_out_utils.py new file mode 100644 index 00000000..91ebfe71 --- /dev/null +++ b/tests/cumulus_lambda_functions/stage_in_out/test_stage_in_out_utils.py @@ -0,0 +1,22 @@ +import os +import tempfile +from unittest import TestCase + +from cumulus_lambda_functions.lib.utils.file_utils import FileUtils +from cumulus_lambda_functions.stage_in_out.stage_in_out_utils import StageInOutUtils + + +class TestStageInOutUtils(TestCase): + def test_01(self): + with tempfile.TemporaryDirectory() as tmp_dir_name: + os.environ[StageInOutUtils.OUTPUT_FILE] = os.path.join(tmp_dir_name, 'SAMPLE', 'output.json') + StageInOutUtils.write_output_to_file({'test1': True}) + self.assertTrue(FileUtils.file_exist(os.environ.get(StageInOutUtils.OUTPUT_FILE))) + return + + def test_02(self): + with tempfile.TemporaryDirectory() as tmp_dir_name: + os.environ[StageInOutUtils.OUTPUT_FILE] = os.path.join(tmp_dir_name, 'output.json') + StageInOutUtils.write_output_to_file({'test1': True}) + self.assertTrue(FileUtils.file_exist(os.environ.get(StageInOutUtils.OUTPUT_FILE))) + return diff --git a/tests/integration_tests/test_docker_entry.py b/tests/integration_tests/test_docker_entry.py index de1e5985..fa5bb6c9 100644 --- a/tests/integration_tests/test_docker_entry.py +++ b/tests/integration_tests/test_docker_entry.py @@ -35,12 +35,16 @@ def test_01_search_part_01(self): if len(argv) > 1: argv.pop(-1) argv.append('SEARCH') - search_result = choose_process() - search_result = json.loads(search_result) - self.assertTrue(isinstance(search_result, list), f'search_result is not list: {search_result}') - self.assertEqual(len(search_result), 4000, f'wrong length') - search_result = set([k['id'] for k in search_result]) - self.assertEqual(len(search_result),4000, f'wrong length. not unique') + with tempfile.TemporaryDirectory() as tmp_dir_name: + os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json') + search_result_str = choose_process() + search_result = json.loads(search_result_str) + self.assertTrue(isinstance(search_result, list), f'search_result is not list: {search_result}') + self.assertEqual(len(search_result), 4000, f'wrong length') + search_result = set([k['id'] for k in search_result]) + self.assertEqual(len(search_result),4000, f'wrong length. not unique') + self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file') + self.assertEqual(sorted(json.dumps(FileUtils.read_json(os.environ['OUTPUT_FILE']))), sorted(search_result_str), f'not identical result') return def test_01_search_part_02(self): @@ -220,6 +224,7 @@ def test_02_download(self): argv.pop(-1) argv.append('DOWNLOAD') with tempfile.TemporaryDirectory() as tmp_dir_name: + os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json') os.environ['DOWNLOAD_DIR'] = tmp_dir_name download_result = choose_process() self.assertTrue(isinstance(download_result, list), f'download_result is not list: {download_result}') @@ -228,6 +233,7 @@ def test_02_download(self): for each_granule in zip(granule_json, download_result): remote_filename = os.path.basename(each_granule[0]['assets']['data']['href']) self.assertEqual(each_granule[1]['assets']['data']['href'], os.path.join(tmp_dir_name, remote_filename), f"mismatched: {each_granule[0]['assets']['data']['href']}") + self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file') return def test_02_download__daac(self): @@ -242,6 +248,7 @@ def test_02_download__daac(self): argv.pop(-1) argv.append('DOWNLOAD') with tempfile.TemporaryDirectory() as tmp_dir_name: + os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json') os.environ['DOWNLOAD_DIR'] = tmp_dir_name download_result = choose_process() self.assertTrue(isinstance(download_result, list), f'download_result is not list: {download_result}') @@ -254,6 +261,7 @@ def test_02_download__daac(self): remote_filename = os.path.basename(each_granule[0]['assets']['data']['href']) self.assertEqual(each_granule[1]['assets']['data']['href'], os.path.join(tmp_dir_name, remote_filename), f"mismatched: {each_granule[0]['assets']['data']['href']}") + self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file') return def test_02_download__daac__from_file(self): @@ -268,6 +276,7 @@ def test_02_download__daac__from_file(self): argv.pop(-1) argv.append('DOWNLOAD') with tempfile.TemporaryDirectory() as tmp_dir_name: + os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json') granule_json_file = os.path.join(tmp_dir_name, 'input_file.json') downloading_dir = os.path.join(tmp_dir_name, 'downloading_dir') FileUtils.mk_dir_p(downloading_dir) @@ -285,6 +294,7 @@ def test_02_download__daac__from_file(self): remote_filename = os.path.basename(each_granule[0]['assets']['data']['href']) self.assertEqual(each_granule[1]['assets']['data']['href'], os.path.join(downloading_dir, remote_filename), f"mismatched: {each_granule[0]['assets']['data']['href']}") + self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file') return def test_02_download__daac_error(self): @@ -299,6 +309,7 @@ def test_02_download__daac_error(self): argv.pop(-1) argv.append('DOWNLOAD') with tempfile.TemporaryDirectory() as tmp_dir_name: + os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json') # TODO this is downloading a login page HTML os.environ['DOWNLOAD_DIR'] = tmp_dir_name download_result = choose_process() @@ -307,6 +318,7 @@ def test_02_download__daac_error(self): error_file = os.path.join(tmp_dir_name, 'error.log') if FileUtils.file_exist(error_file): self.assertTrue(False, f'some downloads failed. error.log exists. {FileUtils.read_json(error_file)}') + self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file') return def test_02_download__from_file(self): @@ -316,6 +328,7 @@ def test_02_download__from_file(self): argv.append('DOWNLOAD') os.environ['GRANULES_DOWNLOAD_TYPE'] = 'S3' with tempfile.TemporaryDirectory() as tmp_dir_name: + os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json') granule_json_file = os.path.join(tmp_dir_name, 'input_file.json') downloading_dir = os.path.join(tmp_dir_name, 'downloading_dir') FileUtils.mk_dir_p(downloading_dir) @@ -333,6 +346,7 @@ def test_02_download__from_file(self): remote_filename = os.path.basename(each_granule[0]['assets']['data']['href']) self.assertEqual(each_granule[1]['assets']['data']['href'], os.path.join(downloading_dir, remote_filename), f"mismatched: {each_granule[0]['assets']['data']['href']}") + self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file') return def test_03_upload(self): @@ -354,6 +368,7 @@ def test_03_upload(self): argv.append('UPLOAD') with tempfile.TemporaryDirectory() as tmp_dir_name: + os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json') os.environ['UPLOAD_DIR'] = tmp_dir_name with open(os.path.join(tmp_dir_name, 'test_file01.nc'), 'w') as ff: ff.write('sample_file') @@ -465,6 +480,7 @@ def test_03_upload(self): self.assertTrue('data' in upload_result['assets'], 'missing assets#data') self.assertTrue('href' in upload_result['assets']['data'], 'missing assets#data#href') self.assertTrue(upload_result['assets']['data']['href'].startswith(f's3://{os.environ["STAGING_BUCKET"]}/')) + self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file') return def test_03_upload_catalog(self): @@ -487,6 +503,7 @@ def test_03_upload_catalog(self): argv.append('UPLOAD') with tempfile.TemporaryDirectory() as tmp_dir_name: + os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json') os.environ['UPLOAD_DIR'] = '' # not needed os.environ['CATALOG_FILE'] = os.path.join(tmp_dir_name, 'catalog.json') with open(os.path.join(tmp_dir_name, 'test_file01.nc'), 'w') as ff: @@ -629,6 +646,7 @@ def test_03_upload_catalog(self): self.assertTrue('data' in upload_result['assets'], 'missing assets#data') self.assertTrue('href' in upload_result['assets']['data'], 'missing assets#data#href') self.assertTrue(upload_result['assets']['data']['href'].startswith(f's3://{os.environ["STAGING_BUCKET"]}/')) + self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file') return def test_04_catalog(self): @@ -646,6 +664,9 @@ def test_04_catalog(self): if len(argv) > 1: argv.pop(-1) argv.append('CATALOG') - catalog_result = choose_process() - self.assertEqual('registered', catalog_result, 'wrong status') + with tempfile.TemporaryDirectory() as tmp_dir_name: + os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json') + catalog_result = choose_process() + self.assertEqual('registered', catalog_result, 'wrong status') + self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file') return