Skip to content

Commit d952f18

Browse files
committed
feat: add public http download class
1 parent 794b488 commit d952f18

File tree

3 files changed

+171
-0
lines changed

3 files changed

+171
-0
lines changed

cumulus_lambda_functions/stage_in_out/download_granules_factory.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
class DownloadGranulesFactory:
44
S3 = 'S3'
5+
HTTP = 'HTTP'
56
DAAC = 'DAAC'
67

78
def get_class(self, search_type):
@@ -11,4 +12,7 @@ def get_class(self, search_type):
1112
elif search_type == DownloadGranulesFactory.DAAC:
1213
from cumulus_lambda_functions.stage_in_out.download_granules_daac import DownloadGranulesDAAC
1314
return DownloadGranulesDAAC()
15+
elif search_type == DownloadGranulesFactory.HTTP:
16+
from cumulus_lambda_functions.stage_in_out.download_granules_http import DownloadGranulesHttp
17+
return DownloadGranulesHttp()
1418
raise ValueError(f'unknown search_type: {search_type}')
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import shutil
2+
3+
import requests
4+
5+
from cumulus_lambda_functions.stage_in_out.download_granules_abstract import DownloadGranulesAbstract
6+
import json
7+
import logging
8+
import os
9+
10+
LOGGER = logging.getLogger(__name__)
11+
12+
13+
class DownloadGranulesHttp(DownloadGranulesAbstract):
14+
15+
def __init__(self) -> None:
16+
super().__init__()
17+
18+
def __set_props_from_env(self):
19+
missing_keys = [k for k in [self.STAC_JSON, self.DOWNLOAD_DIR_KEY] if k not in os.environ]
20+
if len(missing_keys) > 0:
21+
raise ValueError(f'missing environment keys: {missing_keys}')
22+
self._retrieve_stac_json()
23+
self._setup_download_dir()
24+
return self
25+
26+
def __get_downloading_urls(self, granules_result: list):
27+
if len(granules_result) < 1:
28+
LOGGER.warning(f'cannot find any granules')
29+
return []
30+
downloading_urls = [k['assets'] for k in granules_result]
31+
return downloading_urls
32+
33+
def __download_one_granule(self, assets: dict):
34+
"""
35+
sample assets
36+
{
37+
"data": {
38+
"href": "s3://am-uds-dev-cumulus-internal/ATMS_SCIENCE_Group___1/P1570515ATMSSCIENCEAAT16017044853900.PDS",
39+
"title": "P1570515ATMSSCIENCEAAT16017044853900.PDS",
40+
"description": "P1570515ATMSSCIENCEAAT16017044853900.PDS"
41+
},
42+
"metadata__data": {
43+
"href": "s3://am-uds-dev-cumulus-internal/ATMS_SCIENCE_Group___1/P1570515ATMSSCIENCEAAT16017044853901.PDS",
44+
"title": "P1570515ATMSSCIENCEAAT16017044853901.PDS",
45+
"description": "P1570515ATMSSCIENCEAAT16017044853901.PDS"
46+
},
47+
"metadata__xml": {
48+
"href": "s3://am-uds-dev-cumulus-internal/ATMS_SCIENCE_Group___1/P1570515ATMSSCIENCEAAT16017044853901.PDS.xml",
49+
"title": "P1570515ATMSSCIENCEAAT16017044853901.PDS.xml",
50+
"description": "P1570515ATMSSCIENCEAAT16017044853901.PDS.xml"
51+
},
52+
"metadata__cmr": {
53+
"href": "s3://am-uds-dev-cumulus-internal/ATMS_SCIENCE_Group___1/P1570515ATMSSCIENCEAAT16017044853900.PDS.cmr.xml",
54+
"title": "P1570515ATMSSCIENCEAAT16017044853900.PDS.cmr.xml",
55+
"description": "P1570515ATMSSCIENCEAAT16017044853900.PDS.cmr.xml"
56+
}
57+
}
58+
:param assets:
59+
:return:
60+
"""
61+
error_log = []
62+
local_item = {}
63+
for k, v in assets.items():
64+
local_item[k] = v
65+
try:
66+
LOGGER.debug(f'downloading: {v["href"]}')
67+
downloading_response = requests.get(v['href'])
68+
downloading_response.raise_for_status()
69+
downloading_response.raw.decode_content = True
70+
local_file_path = os.path.join(self._download_dir, os.path.basename(v["href"]))
71+
with open(local_file_path, 'wb') as f:
72+
shutil.copyfileobj(downloading_response.raw, f)
73+
local_item[k]['href'] = local_file_path
74+
except Exception as e:
75+
LOGGER.exception(f'failed to download {v}')
76+
local_item[k]['description'] = f'download failed. {str(e)}'
77+
error_log.append(v)
78+
return local_item, error_log
79+
80+
def download(self, **kwargs) -> list:
81+
self.__set_props_from_env()
82+
downloading_urls = self.__get_downloading_urls(self._granules_json)
83+
error_list = []
84+
local_items = []
85+
for each in downloading_urls:
86+
LOGGER.debug(f'working on {each}')
87+
local_item, current_error_list = self.__download_one_granule(each)
88+
local_items.append({'assets': local_item})
89+
error_list.extend(current_error_list)
90+
if len(error_list) > 0:
91+
with open(f'{self._download_dir}/error.log', 'w') as error_file:
92+
error_file.write(json.dumps(error_list, indent=4))
93+
return local_items

tests/integration_tests/test_docker_entry.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,80 @@ def test_02_download__from_file(self):
349349
self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file')
350350
return
351351

352+
def test_02_download__from_http(self):
353+
granule_json = [
354+
{
355+
"assets": {
356+
"data": {
357+
"href": "https://raw.githubusercontent.com/unity-sds/unity-data-services/develop/README.md",
358+
"title": "SNDR.SNPP.ATMS.L1A.nominal2.01.nc",
359+
"description": "SNDR.SNPP.ATMS.L1A.nominal2.01.nc"
360+
}
361+
}
362+
},
363+
{
364+
"assets": {
365+
"data": {
366+
"href": "https://raw.githubusercontent.com/unity-sds/unity-data-services/develop/CHANGELOG.md",
367+
"title": "SNDR.SNPP.ATMS.L1A.nominal2.08.nc",
368+
"description": "SNDR.SNPP.ATMS.L1A.nominal2.08.nc"
369+
}
370+
}
371+
},
372+
{
373+
"assets": {
374+
"data": {
375+
"href": "https://raw.githubusercontent.com/unity-sds/unity-data-services/develop/CODE_OF_CONDUCT.md",
376+
"title": "SNDR.SNPP.ATMS.L1A.nominal2.06.nc",
377+
"description": "SNDR.SNPP.ATMS.L1A.nominal2.06.nc"
378+
}
379+
}
380+
},
381+
{
382+
"assets": {
383+
"data": {
384+
"href": "https://raw.githubusercontent.com/unity-sds/unity-data-services/develop/CONTRIBUTING.md",
385+
"title": "SNDR.SNPP.ATMS.L1A.nominal2.18.nc",
386+
"description": "SNDR.SNPP.ATMS.L1A.nominal2.18.nc"
387+
}
388+
}
389+
},
390+
{
391+
"assets": {
392+
"data": {
393+
"href": "https://raw.githubusercontent.com/unity-sds/unity-data-services/develop/LICENSE",
394+
"title": "SNDR.SNPP.ATMS.L1A.nominal2.04.nc",
395+
"description": "SNDR.SNPP.ATMS.L1A.nominal2.04.nc"
396+
}
397+
}
398+
}
399+
]
400+
if len(argv) > 1:
401+
argv.pop(-1)
402+
argv.append('DOWNLOAD')
403+
os.environ['GRANULES_DOWNLOAD_TYPE'] = 'HTTP'
404+
with tempfile.TemporaryDirectory() as tmp_dir_name:
405+
os.environ['OUTPUT_FILE'] = os.path.join(tmp_dir_name, 'some_output', 'output.json')
406+
granule_json_file = os.path.join(tmp_dir_name, 'input_file.json')
407+
downloading_dir = os.path.join(tmp_dir_name, 'downloading_dir')
408+
FileUtils.mk_dir_p(downloading_dir)
409+
FileUtils.write_json(granule_json_file, granule_json)
410+
os.environ['STAC_JSON'] = granule_json_file
411+
os.environ['DOWNLOAD_DIR'] = downloading_dir
412+
download_result = choose_process()
413+
self.assertTrue(isinstance(download_result, list), f'download_result is not list: {download_result}')
414+
self.assertEqual(sum([len(k) for k in download_result]), len(glob(os.path.join(downloading_dir, '*'))), f'downloaded file does not match')
415+
error_file = os.path.join(downloading_dir, 'error.log')
416+
if FileUtils.file_exist(error_file):
417+
self.assertTrue(False, f'some downloads failed. error.log exists. {FileUtils.read_json(error_file)}')
418+
self.assertTrue('assets' in download_result[0], f'no assets in download_result: {download_result}')
419+
for each_granule in zip(granule_json, download_result):
420+
remote_filename = os.path.basename(each_granule[0]['assets']['data']['href'])
421+
self.assertEqual(each_granule[1]['assets']['data']['href'], os.path.join(downloading_dir, remote_filename),
422+
f"mismatched: {each_granule[0]['assets']['data']['href']}")
423+
self.assertTrue(FileUtils.file_exist(os.environ['OUTPUT_FILE']), f'missing output file')
424+
return
425+
352426
def test_03_upload(self):
353427
os.environ[Constants.USERNAME] = '/unity/uds/user/wphyo/username'
354428
os.environ[Constants.PASSWORD] = '/unity/uds/user/wphyo/dwssap'

0 commit comments

Comments
 (0)