Skip to content

Commit 6e03fe4

Browse files
authored
fix: download error (#182)
* fix: retry 5 times + wait 45 sec for DAAC * feat: make re-try configurable * fix: retry on Temporary failure in name resolution * chore: update version * fix: add new test case for error
1 parent 5883643 commit 6e03fe4

File tree

8 files changed

+1013
-13
lines changed

8 files changed

+1013
-13
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [5.2.1] - 2023-07-10
9+
### Added
10+
- [#182](https://github.com/unity-sds/unity-data-services/pull/182) fix: Retry if Download Error in DAAC
11+
812
## [5.2.0] - 2023-07-05
913
### Added
1014
- [#169](https://github.com/unity-sds/unity-data-services/pull/169) feat: parallelize download

cumulus_lambda_functions/lib/earthdata_login/urs_token.py

Lines changed: 55 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,42 @@
11
import json
22
import logging
3+
import time
34
from http.cookiejar import CookieJar
45
from typing import Dict
56
from urllib import request
67

78
import requests
89
from requests.auth import HTTPBasicAuth
910
from tenacity import retry, retry_if_result, stop_after_attempt, wait_random_exponential
10-
LOGGER = logging.getLogger(__name__)
1111

12+
LOGGER = logging.getLogger(__name__)
1213

1314
class URSToken(object):
14-
def __init__(self, username: str, dwssap: str, edl_base_url: str = None) -> None:
15+
DNS_ERROR_TXT = 'Name-Resolution-Error'
16+
"""
17+
Traceback (most recent call last):
18+
File "/usr/local/lib/python3.9/site-packages/urllib3/connection.py", line 174, in _new_conn
19+
conn = connection.create_connection(
20+
File "/usr/local/lib/python3.9/site-packages/urllib3/util/connection.py", line 72, in create_connection
21+
for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
22+
File "/usr/local/lib/python3.9/socket.py", line 954, in getaddrinfo
23+
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
24+
socket.gaierror: [Errno -3] Temporary failure in name resolution
25+
During handling of the above exception, another exception occurred:
26+
Traceback (most recent call last):
27+
File "/usr/local/lib/python3.9/site-packages/urllib3/connectionpool.py", line 714, in urlopen
28+
httplib_response = self._make_request(
29+
File "/usr/local/lib/python3.9/site-packages/urllib3/connectionpool.py", line 403, in _make_request
30+
self._validate_conn(conn)
31+
File "/usr/local/lib/python3.9/site-packages/urllib3/connectionpool.py", line 1053, in _validate_conn
32+
conn.connect()
33+
File "/usr/local/lib/python3.9/site-packages/urllib3/connection.py", line 363, in connect
34+
self.sock = conn = self._new_conn()
35+
File "/usr/local/lib/python3.9/site-packages/urllib3/connection.py", line 186, in _new_conn
36+
raise NewConnectionError(
37+
urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f111cbc54f0>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution
38+
"""
39+
def __init__(self, username: str, dwssap: str, edl_base_url: str = None, wait_time = 30, retry_times = 5) -> None:
1540
super().__init__()
1641
self.__default_edl_base_url = 'https://urs.earthdata.nasa.gov/'
1742
self.__username = username
@@ -22,6 +47,9 @@ def __init__(self, username: str, dwssap: str, edl_base_url: str = None) -> None
2247
if not self.__edl_base_url.startswith('http'):
2348
self.__edl_base_url = f'https://{self.__edl_base_url}'
2449
self.__token = None
50+
self.__wait_time = wait_time
51+
self.__retry_times = retry_times
52+
2553

2654
@retry(wait=wait_random_exponential(multiplier=1, max=60),
2755
stop=stop_after_attempt(3),
@@ -47,10 +75,11 @@ def create_token(self, url: str) -> str:
4775

4876
# Add better error handling there
4977
# Max tokens
50-
# Wrong Username/Passsword
78+
# Wrong Username/Password
5179
# Other
52-
except: # noqa E722
80+
except Exception as e: # noqa E722
5381
LOGGER.warning("Error getting the token - check user name and password", exc_info=True)
82+
raise RuntimeError(str(e))
5483
return token
5584

5685
def list_tokens(self, url: str):
@@ -66,7 +95,7 @@ def list_tokens(self, url: str):
6695
for x in response_content:
6796
tokens.append(x['access_token'])
6897

69-
except: # noqa E722
98+
except Exception as e: # noqa E722
7099
LOGGER.warning("Error getting the token - check user name and password", exc_info=True)
71100
return tokens
72101

@@ -107,10 +136,25 @@ def delete_token(self, token: str) -> bool:
107136
LOGGER.warning("Error deleting the token", exc_info=True)
108137
return False
109138

139+
def __get_token_once(self):
140+
try:
141+
token_url = f'{self.__edl_base_url}api/users'
142+
tokens = self.list_tokens(token_url)
143+
if len(tokens) == 0:
144+
return self.create_token(token_url)
145+
except Exception as conn_err:
146+
if 'Temporary failure in name resolution' in str(conn_err):
147+
return self.DNS_ERROR_TXT
148+
raise conn_err
149+
return tokens[0]
150+
110151
def get_token(self) -> str:
111-
token_url = f'{self.__edl_base_url}api/users'
112-
tokens = self.list_tokens(token_url)
113-
if len(tokens) == 0:
114-
return self.create_token(token_url)
115-
else:
116-
return tokens[0]
152+
token = self.__get_token_once()
153+
retry_count = 0
154+
while token == self.DNS_ERROR_TXT and retry_count < self.__retry_times:
155+
LOGGER.error(f'{self.DNS_ERROR_TXT} for URS Token. attempt: {retry_count}')
156+
time.sleep(self.__wait_time)
157+
token = self.__get_token_once()
158+
retry_count += 1
159+
return token
160+

cumulus_lambda_functions/stage_in_out/download_granules_abstract.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,5 +131,5 @@ def download(self, **kwargs) -> str:
131131
LOGGER.debug(f'writing errors if any')
132132
if len(error_list_list) > 0:
133133
with open(f'{self._download_dir}/error.log', 'w') as error_file:
134-
error_file.write(json.dumps(error_list, indent=4))
134+
error_file.write(json.dumps(error_list_list, indent=4))
135135
return json.dumps(granules_json_dict)

cumulus_lambda_functions/stage_in_out/download_granules_daac.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import time
2+
13
import requests
24

35
from cumulus_lambda_functions.lib.earthdata_login.urs_token_retriever import URSTokenRetriever
@@ -15,6 +17,8 @@ class DownloadGranulesDAAC(DownloadGranulesAbstract):
1517
def __init__(self) -> None:
1618
super().__init__()
1719
self.__edl_token = None
20+
self.__retry_wait_time_sec = int(os.environ.get('DOWNLOAD_RETRY_WAIT_TIME', '30'))
21+
self.__retry_times = int(os.environ.get('DOWNLOAD_RETRY_TIMES', '5'))
1822

1923
def _set_props_from_env(self):
2024
missing_keys = [k for k in [self.STAC_JSON, self.DOWNLOAD_DIR_KEY] if k not in os.environ]
@@ -30,6 +34,12 @@ def _download_one_item(self, downloading_url):
3034
'Authorization': f'Bearer {self.__edl_token}'
3135
}
3236
r = requests.get(downloading_url, headers=headers)
37+
download_count = 1
38+
while r.status_code in [502, 504] and download_count < self.__retry_times:
39+
LOGGER.error(f'502 or 504 while downloading {downloading_url}. attempt: {download_count}')
40+
time.sleep(self.__retry_wait_time_sec)
41+
r = requests.get(downloading_url, headers=headers)
42+
download_count += 1
3343
r.raise_for_status()
3444
local_file_path = os.path.join(self._download_dir, os.path.basename(downloading_url))
3545
with open(local_file_path, 'wb') as fd:

docker/stage-in-stage-out/dc-002-download.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ services:
2020

2121
PARALLEL_COUNT: 'int. Ex: 10. can be defaulted to -1 which will create the same number of parallelism as CPU count'
2222

23+
DOWNLOAD_RETRY_WAIT_TIME: '30'
24+
DOWNLOAD_RETRY_TIMES: '5'
25+
2326
EDL_USERNAME: '/unity/uds/user/wphyo/edl_username'
2427
EDL_PASSWORD: '/unity/uds/user/wphyo/edl_dwssap'
2528
EDL_PASSWORD_TYPE: 'PARAM_STORE'

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
setup(
2020
name="cumulus_lambda_functions",
21-
version="5.2.0",
21+
version="5.2.1",
2222
packages=find_packages(),
2323
install_requires=install_requires,
2424
tests_require=['mock', 'nose', 'sphinx', 'sphinx_rtd_theme', 'coverage', 'pystac', 'python-dotenv', 'jsonschema'],

tests/cumulus_lambda_functions/lib/earthdata_login/test_urs_token_retriever.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,14 @@ def test_01(self):
1616
result = URSTokenRetriever().start()
1717
self.assertTrue(len(result) > 0, 'empty token')
1818
return
19+
20+
def test_02(self):
21+
os.environ[Constants.EDL_USERNAME] = '/unity/uds/user/wphyo/edl_username'
22+
os.environ[Constants.EDL_PASSWORD] = '/unity/uds/user/wphyo/edl_dwssap'
23+
# os.environ[Constants.USERNAME] = 'usps_username'
24+
# os.environ[Constants.PASSWORD] = 'usps_password'
25+
os.environ[Constants.EDL_PASSWORD_TYPE] = Constants.PARAM_STORE
26+
os.environ[Constants.EDL_BASE_URL] = 'urs1.earthdata.nasa.gov'
27+
with self.assertRaises(RuntimeError) as cm:
28+
result = URSTokenRetriever().start()
29+
return

0 commit comments

Comments
 (0)