Skip to content

release/5.2.1 #183

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [5.2.1] - 2023-07-10
### Added
- [#182](https://github.com/unity-sds/unity-data-services/pull/182) fix: Retry if Download Error in DAAC

## [5.2.0] - 2023-07-05
### Added
- [#169](https://github.com/unity-sds/unity-data-services/pull/169) feat: parallelize download
Expand Down
66 changes: 55 additions & 11 deletions cumulus_lambda_functions/lib/earthdata_login/urs_token.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,42 @@
import json
import logging
import time
from http.cookiejar import CookieJar
from typing import Dict
from urllib import request

import requests
from requests.auth import HTTPBasicAuth
from tenacity import retry, retry_if_result, stop_after_attempt, wait_random_exponential
LOGGER = logging.getLogger(__name__)

LOGGER = logging.getLogger(__name__)

class URSToken(object):
def __init__(self, username: str, dwssap: str, edl_base_url: str = None) -> None:
DNS_ERROR_TXT = 'Name-Resolution-Error'
"""
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/urllib3/connection.py", line 174, in _new_conn
conn = connection.create_connection(
File "/usr/local/lib/python3.9/site-packages/urllib3/util/connection.py", line 72, in create_connection
for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
File "/usr/local/lib/python3.9/socket.py", line 954, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno -3] Temporary failure in name resolution
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/urllib3/connectionpool.py", line 714, in urlopen
httplib_response = self._make_request(
File "/usr/local/lib/python3.9/site-packages/urllib3/connectionpool.py", line 403, in _make_request
self._validate_conn(conn)
File "/usr/local/lib/python3.9/site-packages/urllib3/connectionpool.py", line 1053, in _validate_conn
conn.connect()
File "/usr/local/lib/python3.9/site-packages/urllib3/connection.py", line 363, in connect
self.sock = conn = self._new_conn()
File "/usr/local/lib/python3.9/site-packages/urllib3/connection.py", line 186, in _new_conn
raise NewConnectionError(
urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f111cbc54f0>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution
"""
def __init__(self, username: str, dwssap: str, edl_base_url: str = None, wait_time = 30, retry_times = 5) -> None:
super().__init__()
self.__default_edl_base_url = 'https://urs.earthdata.nasa.gov/'
self.__username = username
Expand All @@ -22,6 +47,9 @@ def __init__(self, username: str, dwssap: str, edl_base_url: str = None) -> None
if not self.__edl_base_url.startswith('http'):
self.__edl_base_url = f'https://{self.__edl_base_url}'
self.__token = None
self.__wait_time = wait_time
self.__retry_times = retry_times


@retry(wait=wait_random_exponential(multiplier=1, max=60),
stop=stop_after_attempt(3),
Expand All @@ -47,10 +75,11 @@ def create_token(self, url: str) -> str:

# Add better error handling there
# Max tokens
# Wrong Username/Passsword
# Wrong Username/Password
# Other
except: # noqa E722
except Exception as e: # noqa E722
LOGGER.warning("Error getting the token - check user name and password", exc_info=True)
raise RuntimeError(str(e))
return token

def list_tokens(self, url: str):
Expand All @@ -66,7 +95,7 @@ def list_tokens(self, url: str):
for x in response_content:
tokens.append(x['access_token'])

except: # noqa E722
except Exception as e: # noqa E722
LOGGER.warning("Error getting the token - check user name and password", exc_info=True)
return tokens

Expand Down Expand Up @@ -107,10 +136,25 @@ def delete_token(self, token: str) -> bool:
LOGGER.warning("Error deleting the token", exc_info=True)
return False

def __get_token_once(self):
try:
token_url = f'{self.__edl_base_url}api/users'
tokens = self.list_tokens(token_url)
if len(tokens) == 0:
return self.create_token(token_url)
except Exception as conn_err:
if 'Temporary failure in name resolution' in str(conn_err):
return self.DNS_ERROR_TXT
raise conn_err
return tokens[0]

def get_token(self) -> str:
token_url = f'{self.__edl_base_url}api/users'
tokens = self.list_tokens(token_url)
if len(tokens) == 0:
return self.create_token(token_url)
else:
return tokens[0]
token = self.__get_token_once()
retry_count = 0
while token == self.DNS_ERROR_TXT and retry_count < self.__retry_times:
LOGGER.error(f'{self.DNS_ERROR_TXT} for URS Token. attempt: {retry_count}')
time.sleep(self.__wait_time)
token = self.__get_token_once()
retry_count += 1
return token

Original file line number Diff line number Diff line change
Expand Up @@ -131,5 +131,5 @@ def download(self, **kwargs) -> str:
LOGGER.debug(f'writing errors if any')
if len(error_list_list) > 0:
with open(f'{self._download_dir}/error.log', 'w') as error_file:
error_file.write(json.dumps(error_list, indent=4))
error_file.write(json.dumps(error_list_list, indent=4))
return json.dumps(granules_json_dict)
10 changes: 10 additions & 0 deletions cumulus_lambda_functions/stage_in_out/download_granules_daac.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import time

import requests

from cumulus_lambda_functions.lib.earthdata_login.urs_token_retriever import URSTokenRetriever
Expand All @@ -15,6 +17,8 @@ class DownloadGranulesDAAC(DownloadGranulesAbstract):
def __init__(self) -> None:
super().__init__()
self.__edl_token = None
self.__retry_wait_time_sec = int(os.environ.get('DOWNLOAD_RETRY_WAIT_TIME', '30'))
self.__retry_times = int(os.environ.get('DOWNLOAD_RETRY_TIMES', '5'))

def _set_props_from_env(self):
missing_keys = [k for k in [self.STAC_JSON, self.DOWNLOAD_DIR_KEY] if k not in os.environ]
Expand All @@ -30,6 +34,12 @@ def _download_one_item(self, downloading_url):
'Authorization': f'Bearer {self.__edl_token}'
}
r = requests.get(downloading_url, headers=headers)
download_count = 1
while r.status_code in [502, 504] and download_count < self.__retry_times:
LOGGER.error(f'502 or 504 while downloading {downloading_url}. attempt: {download_count}')
time.sleep(self.__retry_wait_time_sec)
r = requests.get(downloading_url, headers=headers)
download_count += 1
r.raise_for_status()
local_file_path = os.path.join(self._download_dir, os.path.basename(downloading_url))
with open(local_file_path, 'wb') as fd:
Expand Down
3 changes: 3 additions & 0 deletions docker/stage-in-stage-out/dc-002-download.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ services:

PARALLEL_COUNT: 'int. Ex: 10. can be defaulted to -1 which will create the same number of parallelism as CPU count'

DOWNLOAD_RETRY_WAIT_TIME: '30'
DOWNLOAD_RETRY_TIMES: '5'

EDL_USERNAME: '/unity/uds/user/wphyo/edl_username'
EDL_PASSWORD: '/unity/uds/user/wphyo/edl_dwssap'
EDL_PASSWORD_TYPE: 'PARAM_STORE'
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

setup(
name="cumulus_lambda_functions",
version="5.2.0",
version="5.2.1",
packages=find_packages(),
install_requires=install_requires,
tests_require=['mock', 'nose', 'sphinx', 'sphinx_rtd_theme', 'coverage', 'pystac', 'python-dotenv', 'jsonschema'],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,14 @@ def test_01(self):
result = URSTokenRetriever().start()
self.assertTrue(len(result) > 0, 'empty token')
return

def test_02(self):
os.environ[Constants.EDL_USERNAME] = '/unity/uds/user/wphyo/edl_username'
os.environ[Constants.EDL_PASSWORD] = '/unity/uds/user/wphyo/edl_dwssap'
# os.environ[Constants.USERNAME] = 'usps_username'
# os.environ[Constants.PASSWORD] = 'usps_password'
os.environ[Constants.EDL_PASSWORD_TYPE] = Constants.PARAM_STORE
os.environ[Constants.EDL_BASE_URL] = 'urs1.earthdata.nasa.gov'
with self.assertRaises(RuntimeError) as cm:
result = URSTokenRetriever().start()
return
Loading