1
1
import requests
2
+ from pystac import ItemCollection , Item , Asset
2
3
3
4
from cumulus_lambda_functions .lib .earthdata_login .urs_token_retriever import URSTokenRetriever
5
+ from cumulus_lambda_functions .lib .utils .file_utils import FileUtils
4
6
from cumulus_lambda_functions .stage_in_out .download_granules_abstract import DownloadGranulesAbstract
5
7
import json
6
8
import logging
@@ -25,76 +27,50 @@ def __set_props_from_env(self):
25
27
self .__edl_token = URSTokenRetriever ().start ()
26
28
return self
27
29
28
- def __get_downloading_urls (self , granules_result : list ):
29
- if len (granules_result ) < 1 :
30
- LOGGER .warning (f'cannot find any granules' )
31
- return []
32
- downloading_urls = [k ['assets' ] for k in granules_result ]
33
- return downloading_urls
34
-
35
- def __download_one_granule (self , assets : dict ):
36
- """
37
- sample assets
38
- {
39
- "data": {
40
- "href": "s3://am-uds-dev-cumulus-internal/ATMS_SCIENCE_Group___1/P1570515ATMSSCIENCEAAT16017044853900.PDS",
41
- "title": "P1570515ATMSSCIENCEAAT16017044853900.PDS",
42
- "description": "P1570515ATMSSCIENCEAAT16017044853900.PDS"
43
- },
44
- "metadata__data": {
45
- "href": "s3://am-uds-dev-cumulus-internal/ATMS_SCIENCE_Group___1/P1570515ATMSSCIENCEAAT16017044853901.PDS",
46
- "title": "P1570515ATMSSCIENCEAAT16017044853901.PDS",
47
- "description": "P1570515ATMSSCIENCEAAT16017044853901.PDS"
48
- },
49
- "metadata__xml": {
50
- "href": "s3://am-uds-dev-cumulus-internal/ATMS_SCIENCE_Group___1/P1570515ATMSSCIENCEAAT16017044853901.PDS.xml",
51
- "title": "P1570515ATMSSCIENCEAAT16017044853901.PDS.xml",
52
- "description": "P1570515ATMSSCIENCEAAT16017044853901.PDS.xml"
53
- },
54
- "metadata__cmr": {
55
- "href": "s3://am-uds-dev-cumulus-internal/ATMS_SCIENCE_Group___1/P1570515ATMSSCIENCEAAT16017044853900.PDS.cmr.xml",
56
- "title": "P1570515ATMSSCIENCEAAT16017044853900.PDS.cmr.xml",
57
- "description": "P1570515ATMSSCIENCEAAT16017044853900.PDS.cmr.xml"
58
- }
59
- }
60
- :param assets:
61
- :return:
62
- """
63
- error_log = []
30
+ def __download_one_granule_item (self , granule_item : Item ):
64
31
headers = {
65
32
'Authorization' : f'Bearer { self .__edl_token } '
66
33
}
67
- local_item = {}
68
- for k , v in assets .items ():
69
- local_item [k ] = v
70
- try :
71
- LOGGER .debug (f'downloading: { v ["href" ]} ' )
72
- r = requests .get (v ['href' ], headers = headers )
73
- if r .status_code >= 400 :
74
- raise RuntimeError (f'wrong response status: { r .status_code } . details: { r .content } ' )
75
- # TODO. how to correctly check redirecting to login page
76
- local_file_path = os .path .join (self ._download_dir , os .path .basename (v ["href" ]))
77
- with open (local_file_path , 'wb' ) as fd :
78
- fd .write (r .content )
79
- local_item [k ]['href' ] = local_file_path
80
- except Exception as e :
81
- LOGGER .exception (f'failed to download { v } ' )
82
- local_item [k ]['description' ] = f'download failed. { str (e )} '
83
- error_log .append (v )
84
- return local_item , error_log
34
+ new_asset_dict = {}
35
+ for name , value_dict in granule_item .assets .items ():
36
+ if name not in self ._downloading_keys :
37
+ LOGGER .debug (f'skipping { name } . Not in downloading keys' )
38
+ continue
39
+ value_dict : Asset = value_dict
40
+ downloading_url = value_dict .href
41
+ LOGGER .debug (f'downloading: { downloading_url } ' )
42
+ r = requests .get (downloading_url , headers = headers )
43
+ r .raise_for_status ()
44
+ local_file_path = os .path .join (self ._download_dir , os .path .basename (downloading_url ))
45
+ with open (local_file_path , 'wb' ) as fd :
46
+ fd .write (r .content )
47
+ value_dict .href = os .path .join ('.' , os .path .basename (downloading_url ))
48
+ new_asset_dict [name ] = value_dict
49
+ granule_item .assets = new_asset_dict
50
+ return granule_item
85
51
86
- def download (self , ** kwargs ) -> list :
52
+ def download (self , ** kwargs ) -> dict :
87
53
self .__set_props_from_env ()
88
54
LOGGER .debug (f'creating download dir: { self ._download_dir } ' )
89
- downloading_urls = self .__get_downloading_urls (self ._granules_json )
90
- error_list = []
55
+ if len (self ._granules_json .items ) < 1 :
56
+ LOGGER .warning (f'cannot find any granules' )
57
+ return self ._granules_json .to_dict (False )
91
58
local_items = []
92
- for each in downloading_urls :
93
- LOGGER .debug (f'working on { each } ' )
94
- local_item , current_error_list = self .__download_one_granule (each )
95
- error_list .extend (current_error_list )
96
- local_items .append ({'assets' : local_item })
59
+ error_list = []
60
+ for each_item in self ._granules_json .items :
61
+ try :
62
+ local_item = self .__download_one_granule_item (each_item )
63
+ local_items .append (local_item )
64
+ except Exception as e :
65
+ LOGGER .exception (f'error downloading granule: { each_item .id } ' )
66
+ error_list .append ({'error' : str (e ), 'id' : each_item .id , })
67
+ LOGGER .debug (f'finished downloading all granules' )
68
+ self ._granules_json .items = local_items
69
+ LOGGER .debug (f'writing features collection json to downloading directory' )
70
+ granules_json_dict = self ._granules_json .to_dict (False )
71
+ FileUtils .write_json (os .path .join (self ._download_dir , 'downloaded_feature_collection.json' ), granules_json_dict , overwrite = True , prettify = True )
72
+ LOGGER .debug (f'writing errors if any' )
97
73
if len (error_list ) > 0 :
98
74
with open (f'{ self ._download_dir } /error.log' , 'w' ) as error_file :
99
75
error_file .write (json .dumps (error_list , indent = 4 ))
100
- return local_items
76
+ return granules_json_dict
0 commit comments