Skip to content

Commit 08d036b

Browse files
authored
Merge pull request #142 from unity-sds/daac-download-stac-file
feat: Support DAAC download files stac file, not just direct json text
2 parents d88bca7 + d3ef2e1 commit 08d036b

File tree

6 files changed

+81
-46
lines changed

6 files changed

+81
-46
lines changed

CHANGELOG.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,16 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [3.6.0] - 2023-04-24
9+
### Added
10+
- [#142](https://github.com/unity-sds/unity-data-services/pull/142) feat: Support DAAC download files stac file, not just direct json text
11+
812
## [3.5.0] - 2023-04-18
9-
### Changed
13+
### Added
1014
- [#138](https://github.com/unity-sds/unity-data-services/pull/138) feat: Checkout stage with STAC catalog json
1115

1216
## [3.4.0] - 2023-04-17
13-
### Changed
17+
### Added
1418
- [#132](https://github.com/unity-sds/unity-data-services/pull/132) feat: add DAAC download logic
1519

1620
## [3.3.1] - 2023-04-13
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,43 @@
1+
import json
2+
import logging
3+
import os
14
from abc import ABC, abstractmethod
25

6+
from cumulus_lambda_functions.lib.utils.file_utils import FileUtils
7+
8+
LOGGER = logging.getLogger(__name__)
9+
310

411
class DownloadGranulesAbstract(ABC):
12+
STAC_JSON = 'STAC_JSON'
13+
DOWNLOAD_DIR_KEY = 'DOWNLOAD_DIR'
14+
15+
def __init__(self) -> None:
16+
super().__init__()
17+
self._granules_json = []
18+
self._download_dir = '/tmp'
19+
20+
def _setup_download_dir(self):
21+
self._download_dir = os.environ.get(self.DOWNLOAD_DIR_KEY)
22+
self._download_dir = self._download_dir[:-1] if self._download_dir.endswith('/') else self._download_dir
23+
LOGGER.debug(f'creating download dir: {self._download_dir}')
24+
FileUtils.mk_dir_p(self._download_dir)
25+
return self
26+
27+
def _retrieve_stac_json(self):
28+
raw_stac_json = os.environ.get(self.STAC_JSON)
29+
try:
30+
self._granules_json = json.loads(raw_stac_json)
31+
return self
32+
except:
33+
LOGGER.debug(f'raw_stac_json is not STAC_JSON: {raw_stac_json}. trying to see if file exists')
34+
if not FileUtils.file_exist(raw_stac_json):
35+
raise ValueError(f'missing file or not JSON: {raw_stac_json}')
36+
self._granules_json = FileUtils.read_json(raw_stac_json)
37+
if self._granules_json is None:
38+
raise ValueError(f'{raw_stac_json} is not JSON')
39+
return self
40+
541
@abstractmethod
642
def download(self, **kwargs) -> list:
743
raise NotImplementedError()

cumulus_lambda_functions/stage_in_out/download_granules_daac.py

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,30 +6,21 @@
66
import logging
77
import os
88

9-
from cumulus_lambda_functions.lib.aws.aws_s3 import AwsS3
10-
from cumulus_lambda_functions.lib.utils.file_utils import FileUtils
11-
129
LOGGER = logging.getLogger(__name__)
1310

1411

1512
class DownloadGranulesDAAC(DownloadGranulesAbstract):
16-
DOWNLOAD_DIR_KEY = 'DOWNLOAD_DIR'
17-
STAC_JSON = 'STAC_JSON'
1813

1914
def __init__(self) -> None:
2015
super().__init__()
21-
self.__download_dir = '/tmp'
22-
self.__s3 = AwsS3()
23-
self.__granules_json = []
2416
self.__edl_token = None
2517

2618
def __set_props_from_env(self):
2719
missing_keys = [k for k in [self.STAC_JSON, self.DOWNLOAD_DIR_KEY] if k not in os.environ]
2820
if len(missing_keys) > 0:
2921
raise ValueError(f'missing environment keys: {missing_keys}')
30-
self.__granules_json = json.loads(os.environ.get(self.STAC_JSON))
31-
self.__download_dir = os.environ.get(self.DOWNLOAD_DIR_KEY)
32-
self.__download_dir = self.__download_dir[:-1] if self.__download_dir.endswith('/') else self.__download_dir
22+
self._retrieve_stac_json()
23+
self._setup_download_dir()
3324
self.__edl_token = URSTokenRetriever().start()
3425
return self
3526

@@ -79,7 +70,7 @@ def __download_one_granule(self, assets: dict):
7970
if r.status_code >= 400:
8071
raise RuntimeError(f'wrong response status: {r.status_code}. details: {r.content}')
8172
# TODO. how to correctly check redirecting to login page
82-
with open(os.path.join(self.__download_dir, os.path.basename(v["href"])), 'wb') as fd:
73+
with open(os.path.join(self._download_dir, os.path.basename(v["href"])), 'wb') as fd:
8374
fd.write(r.content)
8475
except Exception as e:
8576
LOGGER.exception(f'failed to download {v}')
@@ -89,15 +80,14 @@ def __download_one_granule(self, assets: dict):
8980

9081
def download(self, **kwargs) -> list:
9182
self.__set_props_from_env()
92-
LOGGER.debug(f'creating download dir: {self.__download_dir}')
93-
FileUtils.mk_dir_p(self.__download_dir)
94-
downloading_urls = self.__get_downloading_urls(self.__granules_json)
83+
LOGGER.debug(f'creating download dir: {self._download_dir}')
84+
downloading_urls = self.__get_downloading_urls(self._granules_json)
9585
error_list = []
9686
for each in downloading_urls:
9787
LOGGER.debug(f'working on {each}')
9888
current_error_list = self.__download_one_granule(each)
9989
error_list.extend(current_error_list)
10090
if len(error_list) > 0:
101-
with open(f'{self.__download_dir}/error.log', 'w') as error_file:
91+
with open(f'{self._download_dir}/error.log', 'w') as error_file:
10292
error_file.write(json.dumps(error_list, indent=4))
10393
return downloading_urls

cumulus_lambda_functions/stage_in_out/download_granules_s3.py

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,42 +4,22 @@
44
import os
55

66
from cumulus_lambda_functions.lib.aws.aws_s3 import AwsS3
7-
from cumulus_lambda_functions.lib.utils.file_utils import FileUtils
87

98
LOGGER = logging.getLogger(__name__)
109

1110

1211
class DownloadGranulesS3(DownloadGranulesAbstract):
13-
DOWNLOAD_DIR_KEY = 'DOWNLOAD_DIR'
14-
STAC_JSON = 'STAC_JSON'
1512

1613
def __init__(self) -> None:
1714
super().__init__()
18-
self.__download_dir = '/tmp'
1915
self.__s3 = AwsS3()
20-
self.__granules_json = []
21-
22-
def __retrieve_stac_json(self):
23-
raw_stac_json = os.environ.get(self.STAC_JSON)
24-
try:
25-
self.__granules_json = json.loads(raw_stac_json)
26-
return self
27-
except:
28-
LOGGER.debug(f'raw_stac_json is not STAC_JSON: {raw_stac_json}. trying to see if file exists')
29-
if not FileUtils.file_exist(raw_stac_json):
30-
raise ValueError(f'missing file or not JSON: {raw_stac_json}')
31-
self.__granules_json = FileUtils.read_json(raw_stac_json)
32-
if self.__granules_json is None:
33-
raise ValueError(f'{raw_stac_json} is not JSON')
34-
return self
3516

3617
def __set_props_from_env(self):
3718
missing_keys = [k for k in [self.STAC_JSON, self.DOWNLOAD_DIR_KEY] if k not in os.environ]
3819
if len(missing_keys) > 0:
3920
raise ValueError(f'missing environment keys: {missing_keys}')
40-
self.__retrieve_stac_json()
41-
self.__download_dir = os.environ.get(self.DOWNLOAD_DIR_KEY)
42-
self.__download_dir = self.__download_dir[:-1] if self.__download_dir.endswith('/') else self.__download_dir
21+
self._retrieve_stac_json()
22+
self._setup_download_dir()
4323
return self
4424

4525
def __get_downloading_urls(self, granules_result: list):
@@ -81,7 +61,7 @@ def __download_one_granule(self, assets: dict):
8161
for k, v in assets.items():
8262
try:
8363
LOGGER.debug(f'downloading: {v["href"]}')
84-
self.__s3.set_s3_url(v['href']).download(self.__download_dir)
64+
self.__s3.set_s3_url(v['href']).download(self._download_dir)
8565
except Exception as e:
8666
LOGGER.exception(f'failed to download {v}')
8767
v['cause'] = str(e)
@@ -90,15 +70,13 @@ def __download_one_granule(self, assets: dict):
9070

9171
def download(self, **kwargs) -> list:
9272
self.__set_props_from_env()
93-
LOGGER.debug(f'creating download dir: {self.__download_dir}')
94-
FileUtils.mk_dir_p(self.__download_dir)
95-
downloading_urls = self.__get_downloading_urls(self.__granules_json)
73+
downloading_urls = self.__get_downloading_urls(self._granules_json)
9674
error_list = []
9775
for each in downloading_urls:
9876
LOGGER.debug(f'working on {each}')
9977
current_error_list = self.__download_one_granule(each)
10078
error_list.extend(current_error_list)
10179
if len(error_list) > 0:
102-
with open(f'{self.__download_dir}/error.log', 'w') as error_file:
80+
with open(f'{self._download_dir}/error.log', 'w') as error_file:
10381
error_file.write(json.dumps(error_list, indent=4))
10482
return downloading_urls

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
setup(
2020
name="cumulus_lambda_functions",
21-
version="3.5.0",
21+
version="3.6.0",
2222
packages=find_packages(),
2323
install_requires=install_requires,
2424
tests_require=['mock', 'nose', 'sphinx', 'sphinx_rtd_theme', 'coverage', 'pystac', 'python-dotenv', 'jsonschema'],

0 commit comments

Comments
 (0)