Skip to content

Commit

Permalink
Merge pull request #149 from unity-sds/output-to-file
Browse files Browse the repository at this point in the history
feat: writing output content to a file if ENV is provided
  • Loading branch information
ngachung authored May 15, 2023
2 parents 854f8e8 + b582adc commit 55ddcf5
Show file tree
Hide file tree
Showing 12 changed files with 99 additions and 18 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [3.8.0] - 2023-05-04
### Added
- [#149](https://github.com/unity-sds/unity-data-services/pull/149) feat: writing output content to a file if ENV is provided

## [3.7.1] - 2023-05-04
### Changed
- [#148](https://github.com/unity-sds/unity-data-services/pull/148) fix: use cas structure to generate metadata for stac
Expand Down
18 changes: 13 additions & 5 deletions cumulus_lambda_functions/docker_entrypoint/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,32 @@

from cumulus_lambda_functions.stage_in_out.catalog_granules_factory import CatalogGranulesFactory
from cumulus_lambda_functions.stage_in_out.download_granules_factory import DownloadGranulesFactory
from cumulus_lambda_functions.stage_in_out.download_granules_s3 import DownloadGranulesS3
from cumulus_lambda_functions.stage_in_out.search_granules_factory import SearchGranulesFactory
from cumulus_lambda_functions.stage_in_out.stage_in_out_utils import StageInOutUtils
from cumulus_lambda_functions.stage_in_out.upoad_granules_factory import UploadGranulesFactory


def choose_process():
if argv[1].strip().upper() == 'SEARCH':
logging.info('starting SEARCH script')
return SearchGranulesFactory().get_class(os.getenv('GRANULES_SEARCH_DOMAIN', 'MISSING_GRANULES_SEARCH_DOMAIN')).search()
result_str = SearchGranulesFactory().get_class(os.getenv('GRANULES_SEARCH_DOMAIN', 'MISSING_GRANULES_SEARCH_DOMAIN')).search()
StageInOutUtils.write_output_to_file(result_str)
return result_str
if argv[1].strip().upper() == 'DOWNLOAD':
logging.info('starting DOWNLOAD script')
return DownloadGranulesFactory().get_class(os.getenv('GRANULES_DOWNLOAD_TYPE', 'MISSING_GRANULES_DOWNLOAD_TYPE')).download()
result_str = DownloadGranulesFactory().get_class(os.getenv('GRANULES_DOWNLOAD_TYPE', 'MISSING_GRANULES_DOWNLOAD_TYPE')).download()
StageInOutUtils.write_output_to_file(result_str)
return result_str
if argv[1].strip().upper() == 'UPLOAD':
logging.info('starting UPLOAD script')
return UploadGranulesFactory().get_class(os.getenv('GRANULES_UPLOAD_TYPE', 'MISSING_GRANULES_UPLOAD_TYPE')).upload()
result_str = UploadGranulesFactory().get_class(os.getenv('GRANULES_UPLOAD_TYPE', 'MISSING_GRANULES_UPLOAD_TYPE')).upload()
StageInOutUtils.write_output_to_file(result_str)
return result_str
if argv[1].strip().upper() == 'CATALOG':
logging.info('starting CATALOG script')
return CatalogGranulesFactory().get_class(os.getenv('GRANULES_CATALOG_TYPE', 'MISSING_GRANULES_CATALOG_TYPE')).catalog()
result_str = CatalogGranulesFactory().get_class(os.getenv('GRANULES_CATALOG_TYPE', 'MISSING_GRANULES_CATALOG_TYPE')).catalog()
StageInOutUtils.write_output_to_file(result_str)
return result_str
raise ValueError(f'invalid argument: {argv}')


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import os


LOGGER = logging.getLogger(__name__)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import logging
import os


LOGGER = logging.getLogger(__name__)


Expand Down
3 changes: 2 additions & 1 deletion cumulus_lambda_functions/stage_in_out/search_granules_cmr.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,4 +100,5 @@ def search(self, **kwargs) -> str:
if len(temp_results) < page_size:
break
results = self.__get_correct_result_count(results)
return json.dumps(StacUtils.reduce_stac_list_to_data_links(results)) if self.__filter_results else json.dumps(results)
results = StacUtils.reduce_stac_list_to_data_links(results) if self.__filter_results else results
return json.dumps(results)
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from cumulus_lambda_functions.cumulus_dapa_client.dapa_client import DapaClient
from cumulus_lambda_functions.cumulus_stac.stac_utils import StacUtils
from cumulus_lambda_functions.lib.utils.file_utils import FileUtils
from cumulus_lambda_functions.stage_in_out.search_granules_abstract import SearchGranulesAbstract

LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -51,4 +50,5 @@ def search(self, **kwargs) -> str:
self.__set_props_from_env()
dapa_client = DapaClient().with_verify_ssl(self.__verify_ssl)
granules_result = dapa_client.get_all_granules(self.__collection_id, self.__limit, self.__date_from, self.__date_to)
return json.dumps(StacUtils.reduce_stac_list_to_data_links(granules_result)) if self.__filter_results else json.dumps(granules_result)
granules_result = StacUtils.reduce_stac_list_to_data_links(granules_result) if self.__filter_results else granules_result
return json.dumps(granules_result)
24 changes: 24 additions & 0 deletions cumulus_lambda_functions/stage_in_out/stage_in_out_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import json
import logging
import os
from typing import Union

from cumulus_lambda_functions.lib.utils.file_utils import FileUtils

LOGGER = logging.getLogger(__name__)


class StageInOutUtils:
OUTPUT_FILE = 'OUTPUT_FILE'

@staticmethod
def write_output_to_file(output_json: Union[dict, str, list]):
if StageInOutUtils.OUTPUT_FILE not in os.environ:
LOGGER.debug(f'Not writing output to file due to missing {StageInOutUtils.OUTPUT_FILE} in ENV')
return
output_filepath = os.environ.get(StageInOutUtils.OUTPUT_FILE)
FileUtils.mk_dir_p(os.path.dirname(output_filepath))
output_str = json.dumps(output_json) if not isinstance(output_json, str) else output_json
with open(output_filepath, 'w') as ff:
ff.write(output_str)
return
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json

from cumulus_lambda_functions.stage_in_out.search_collections_factory import SearchCollectionsFactory
from cumulus_lambda_functions.stage_in_out.search_granules_factory import SearchGranulesFactory
from cumulus_lambda_functions.stage_in_out.upload_granules_abstract import UploadGranulesAbstract
import logging
import os
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

setup(
name="cumulus_lambda_functions",
version="3.7.1",
version="3.8.0",
packages=find_packages(),
install_requires=install_requires,
tests_require=['mock', 'nose', 'sphinx', 'sphinx_rtd_theme', 'coverage', 'pystac', 'python-dotenv', 'jsonschema'],
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import os
import tempfile
from unittest import TestCase

from cumulus_lambda_functions.lib.utils.file_utils import FileUtils
from cumulus_lambda_functions.stage_in_out.stage_in_out_utils import StageInOutUtils


class TestStageInOutUtils(TestCase):
def test_01(self):
with tempfile.TemporaryDirectory() as tmp_dir_name:
os.environ[StageInOutUtils.OUTPUT_FILE] = os.path.join(tmp_dir_name, 'SAMPLE', 'output.json')
StageInOutUtils.write_output_to_file({'test1': True})
self.assertTrue(FileUtils.file_exist(os.environ.get(StageInOutUtils.OUTPUT_FILE)))
return

def test_02(self):
with tempfile.TemporaryDirectory() as tmp_dir_name:
os.environ[StageInOutUtils.OUTPUT_FILE] = os.path.join(tmp_dir_name, 'output.json')
StageInOutUtils.write_output_to_file({'test1': True})
self.assertTrue(FileUtils.file_exist(os.environ.get(StageInOutUtils.OUTPUT_FILE)))
return
37 changes: 29 additions & 8 deletions tests/integration_tests/test_docker_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,16 @@ def test_01_search_part_01(self):
if len(argv) > 1:
argv.pop(-1)
argv.append('SEARCH')
search_result = choose_process()
search_result = json.loads(search_result)
self.assertTrue(isinstance(search_result, list), f'search_result is not list: {search_result}')
self.assertEqual(len(search_result), 4000, f'wrong length')
search_result = set([k['id'] for k in search_result])
self.assertEqual(len(search_result),4000, f'wrong length. not unique')
with tempfile.TemporaryDirectory() as tmp_dir_name:
os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json')
search_result_str = choose_process()
search_result = json.loads(search_result_str)
self.assertTrue(isinstance(search_result, list), f'search_result is not list: {search_result}')
self.assertEqual(len(search_result), 4000, f'wrong length')
search_result = set([k['id'] for k in search_result])
self.assertEqual(len(search_result),4000, f'wrong length. not unique')
self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file')
self.assertEqual(sorted(json.dumps(FileUtils.read_json(os.environ['OUTPUT_FILE']))), sorted(search_result_str), f'not identical result')
return

def test_01_search_part_02(self):
Expand Down Expand Up @@ -220,6 +224,7 @@ def test_02_download(self):
argv.pop(-1)
argv.append('DOWNLOAD')
with tempfile.TemporaryDirectory() as tmp_dir_name:
os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json')
os.environ['DOWNLOAD_DIR'] = tmp_dir_name
download_result = choose_process()
self.assertTrue(isinstance(download_result, list), f'download_result is not list: {download_result}')
Expand All @@ -228,6 +233,7 @@ def test_02_download(self):
for each_granule in zip(granule_json, download_result):
remote_filename = os.path.basename(each_granule[0]['assets']['data']['href'])
self.assertEqual(each_granule[1]['assets']['data']['href'], os.path.join(tmp_dir_name, remote_filename), f"mismatched: {each_granule[0]['assets']['data']['href']}")
self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file')
return

def test_02_download__daac(self):
Expand All @@ -242,6 +248,7 @@ def test_02_download__daac(self):
argv.pop(-1)
argv.append('DOWNLOAD')
with tempfile.TemporaryDirectory() as tmp_dir_name:
os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json')
os.environ['DOWNLOAD_DIR'] = tmp_dir_name
download_result = choose_process()
self.assertTrue(isinstance(download_result, list), f'download_result is not list: {download_result}')
Expand All @@ -254,6 +261,7 @@ def test_02_download__daac(self):
remote_filename = os.path.basename(each_granule[0]['assets']['data']['href'])
self.assertEqual(each_granule[1]['assets']['data']['href'], os.path.join(tmp_dir_name, remote_filename),
f"mismatched: {each_granule[0]['assets']['data']['href']}")
self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file')
return

def test_02_download__daac__from_file(self):
Expand All @@ -268,6 +276,7 @@ def test_02_download__daac__from_file(self):
argv.pop(-1)
argv.append('DOWNLOAD')
with tempfile.TemporaryDirectory() as tmp_dir_name:
os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json')
granule_json_file = os.path.join(tmp_dir_name, 'input_file.json')
downloading_dir = os.path.join(tmp_dir_name, 'downloading_dir')
FileUtils.mk_dir_p(downloading_dir)
Expand All @@ -285,6 +294,7 @@ def test_02_download__daac__from_file(self):
remote_filename = os.path.basename(each_granule[0]['assets']['data']['href'])
self.assertEqual(each_granule[1]['assets']['data']['href'], os.path.join(downloading_dir, remote_filename),
f"mismatched: {each_granule[0]['assets']['data']['href']}")
self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file')
return

def test_02_download__daac_error(self):
Expand All @@ -299,6 +309,7 @@ def test_02_download__daac_error(self):
argv.pop(-1)
argv.append('DOWNLOAD')
with tempfile.TemporaryDirectory() as tmp_dir_name:
os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json')
# TODO this is downloading a login page HTML
os.environ['DOWNLOAD_DIR'] = tmp_dir_name
download_result = choose_process()
Expand All @@ -307,6 +318,7 @@ def test_02_download__daac_error(self):
error_file = os.path.join(tmp_dir_name, 'error.log')
if FileUtils.file_exist(error_file):
self.assertTrue(False, f'some downloads failed. error.log exists. {FileUtils.read_json(error_file)}')
self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file')
return

def test_02_download__from_file(self):
Expand All @@ -316,6 +328,7 @@ def test_02_download__from_file(self):
argv.append('DOWNLOAD')
os.environ['GRANULES_DOWNLOAD_TYPE'] = 'S3'
with tempfile.TemporaryDirectory() as tmp_dir_name:
os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json')
granule_json_file = os.path.join(tmp_dir_name, 'input_file.json')
downloading_dir = os.path.join(tmp_dir_name, 'downloading_dir')
FileUtils.mk_dir_p(downloading_dir)
Expand All @@ -333,6 +346,7 @@ def test_02_download__from_file(self):
remote_filename = os.path.basename(each_granule[0]['assets']['data']['href'])
self.assertEqual(each_granule[1]['assets']['data']['href'], os.path.join(downloading_dir, remote_filename),
f"mismatched: {each_granule[0]['assets']['data']['href']}")
self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file')
return

def test_03_upload(self):
Expand All @@ -354,6 +368,7 @@ def test_03_upload(self):
argv.append('UPLOAD')

with tempfile.TemporaryDirectory() as tmp_dir_name:
os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json')
os.environ['UPLOAD_DIR'] = tmp_dir_name
with open(os.path.join(tmp_dir_name, 'test_file01.nc'), 'w') as ff:
ff.write('sample_file')
Expand Down Expand Up @@ -465,6 +480,7 @@ def test_03_upload(self):
self.assertTrue('data' in upload_result['assets'], 'missing assets#data')
self.assertTrue('href' in upload_result['assets']['data'], 'missing assets#data#href')
self.assertTrue(upload_result['assets']['data']['href'].startswith(f's3://{os.environ["STAGING_BUCKET"]}/'))
self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file')
return

def test_03_upload_catalog(self):
Expand All @@ -487,6 +503,7 @@ def test_03_upload_catalog(self):
argv.append('UPLOAD')

with tempfile.TemporaryDirectory() as tmp_dir_name:
os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json')
os.environ['UPLOAD_DIR'] = '' # not needed
os.environ['CATALOG_FILE'] = os.path.join(tmp_dir_name, 'catalog.json')
with open(os.path.join(tmp_dir_name, 'test_file01.nc'), 'w') as ff:
Expand Down Expand Up @@ -629,6 +646,7 @@ def test_03_upload_catalog(self):
self.assertTrue('data' in upload_result['assets'], 'missing assets#data')
self.assertTrue('href' in upload_result['assets']['data'], 'missing assets#data#href')
self.assertTrue(upload_result['assets']['data']['href'].startswith(f's3://{os.environ["STAGING_BUCKET"]}/'))
self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file')
return

def test_04_catalog(self):
Expand All @@ -646,6 +664,9 @@ def test_04_catalog(self):
if len(argv) > 1:
argv.pop(-1)
argv.append('CATALOG')
catalog_result = choose_process()
self.assertEqual('registered', catalog_result, 'wrong status')
with tempfile.TemporaryDirectory() as tmp_dir_name:
os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json')
catalog_result = choose_process()
self.assertEqual('registered', catalog_result, 'wrong status')
self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file')
return

0 comments on commit 55ddcf5

Please sign in to comment.