Fix pep8 violations (#103)

WordPress · Jun 17, 2021 · 0c7a507 · 0c7a507
1 parent 85affa3
commit 0c7a507
Show file tree

Hide file tree

Showing 17 changed files with 114 additions and 138 deletions.
diff --git a/src/cc_catalog_airflow/dags/provider_api_scripts/brooklyn_museum.py b/src/cc_catalog_airflow/dags/provider_api_scripts/brooklyn_museum.py
@@ -25,7 +25,7 @@
     "api_key": API_KEY
 }
 
-DEFAULT_QUERY_PARAM = {
+DEFAULT_QUERY_PARAMS = {
     "has_images": 1,
     "rights_type_permissive": 1,
     "limit": LIMIT,
@@ -55,19 +55,24 @@ def main():
 
 def _get_query_param(
         offset=0,
-        default_query_param=DEFAULT_QUERY_PARAM
+        default_query_param=None
         ):
+    if default_query_param is None:
+        default_query_param = DEFAULT_QUERY_PARAMS
     query_param = default_query_param.copy()
     query_param.update(offset=offset)
     return query_param
 
 
 def _get_object_json(
-        headers=HEADERS,
+        headers=None,
         endpoint=ENDPOINT,
         retries=RETRIES,
         query_param=None
         ):
+    if headers is None:
+        headers = HEADERS.copy()
+    data = None
     for tries in range(retries):
         response = delay_request.get(
                     endpoint,
@@ -80,12 +85,8 @@ def _get_object_json(
                     response_json.get("message", "").lower() == "success."):
                 data = response_json.get("data")
                 break
-            else:
-                data = None
         except Exception as e:
             logger.error(f"Error due to {e}")
-            data = None
-
     return data
 
 

diff --git a/src/cc_catalog_airflow/dags/provider_api_scripts/cleveland_museum_of_art.py b/src/cc_catalog_airflow/dags/provider_api_scripts/cleveland_museum_of_art.py
@@ -62,7 +62,7 @@ def _get_response(
                 endpoint=ENDPOINT,
                 retries=RETRIES
                 ):
-    response_json, total_images = None, 0
+    response_json, total_images, tries = None, 0, 0
     for tries in range(retries):
         response = delay_request.get(
                     endpoint,
@@ -89,9 +89,8 @@ def _get_response(
         return response_json, total_images
 
 
-def _handle_response(
-                    batch
-                    ):
+def _handle_response(batch):
+    total_images = 0
     for data in batch:
         license_ = data.get('share_license_status', '').lower()
         if license_ != 'cc0':
@@ -135,39 +134,32 @@ def _handle_response(
     return total_images
 
 
-def _get_image_type(
-                    image_data
-                    ):
+def _get_image_type(image_data):
+    key, image_url = None, None
     if image_data.get('web'):
         key = 'web'
         image_url = image_data.get('web').get('url', None)
     elif image_data.get('print'):
         key = 'print'
         image_url = image_data.get('print').get('url', None)
-
     elif image_data.get('full'):
         key = 'full'
         image_url = image_data.get('full').get('url', None)
-    else:
-        image_url = None
-
-    if image_url is None:
-        key = None
     return image_url, key
 
 
 def _get_metadata(data):
-    metadata = {}
-
-    metadata['accession_number'] = data.get('accession_number', '')
-    metadata['technique'] = data.get('technique', '')
-    metadata['date'] = data.get('creation_date', '')
-    metadata['credit_line'] = data.get('creditline', '')
-    metadata['classification'] = data.get('type', '')
-    metadata['tombstone'] = data.get('tombstone', '')
-    metadata['culture'] = ','.join(
-        [i for i in data.get('culture', []) if i is not None]
-    )
+    metadata = {
+        'accession_number': data.get('accession_number', ''),
+        'technique': data.get('technique', ''),
+        'date': data.get('creation_date', ''),
+        'credit_line': data.get('creditline', ''),
+        'classification': data.get('type', ''),
+        'tombstone': data.get('tombstone', ''),
+        'culture': ','.join(
+            [i for i in data.get('culture', []) if i is not None]
+        )}
+    metadata = {k: v for k, v in metadata.items() if v is not None}
     return metadata
 
 

diff --git a/src/cc_catalog_airflow/dags/provider_api_scripts/europeana.py b/src/cc_catalog_airflow/dags/provider_api_scripts/europeana.py
@@ -63,8 +63,6 @@ def main(date):
 
 def _get_pagewise(start_timestamp, end_timestamp):
     cursor = '*'
-    total_number_of_images = 0
-    images_stored = 0
 
     while cursor is not None:
         image_list, next_cursor, total_number_of_images = _get_image_list(
@@ -94,6 +92,10 @@ def _get_image_list(
         endpoint=ENDPOINT,
         max_tries=6  # one original try, plus 5 retries
 ):
+    try_number = 0
+    image_list, next_cursor, total_number_of_images = (
+        None, None, None
+    )
     for try_number in range(max_tries):
 
         query_param_dict = _build_query_param_dict(
@@ -123,9 +125,7 @@ def _get_image_list(
             and (image_list is None or next_cursor is None)
     ):
         logger.warning('No more tries remaining. Returning None types.')
-        return None, None, None
-    else:
-        return image_list, next_cursor, total_number_of_images
+    return image_list, next_cursor, total_number_of_images
 
 
 def _extract_response_json(response):
@@ -157,6 +157,7 @@ def _extract_image_list_from_json(response_json):
 
 def _process_image_list(image_list):
     prev_total = 0
+    total_images = 0
     for image_data in image_list:
         total_images = _process_image_data(image_data)
         if total_images is None:
@@ -202,9 +203,9 @@ def _process_image_data(image_data, sub_providers=SUB_PROVIDERS,
 def _get_license_url(license_field):
     if len(license_field) > 1:
         logger.warning('More than one license field found')
-    for license in license_field:
-        if 'creativecommons' in license:
-            return license
+    for license_ in license_field:
+        if 'creativecommons' in license_:
+            return license_
     return None
 
 

diff --git a/src/cc_catalog_airflow/dags/provider_api_scripts/finnish_museums.py b/src/cc_catalog_airflow/dags/provider_api_scripts/finnish_museums.py
@@ -100,9 +100,9 @@ def _process_object_list(object_list):
 
 def _process_object(obj, sub_providers=SUB_PROVIDERS, provider=PROVIDER):
     total_images = 0
-    license = obj.get("imageRights")
-    if license is not None:
-        license_url = license.get("link")
+    license_url = obj.get("imageRights", {}).get("link")
+    if license_url is None:
+        return None
     foreign_identifier = obj.get("id")
     title = obj.get("title")
     building = obj.get("buildings")[0].get("value")
@@ -137,9 +137,9 @@ def _get_raw_tags(obj):
 
 def _get_landing(obj, landing_url=LANDING_URL):
     l_url = None
-    id = obj.get("id")
-    if id:
-        l_url = landing_url + id
+    id_ = obj.get("id")
+    if id_:
+        l_url = landing_url + id_
     return l_url
 
 

diff --git a/src/cc_catalog_airflow/dags/provider_api_scripts/flickr.py b/src/cc_catalog_airflow/dags/provider_api_scripts/flickr.py
@@ -79,7 +79,7 @@ def main(date):
     date_type = DATE_TYPE
 
     for start_timestamp, end_timestamp in timestamp_pairs:
-        total_images = _process_interval(
+        _process_interval(
             start_timestamp,
             end_timestamp,
             date_type
@@ -159,6 +159,8 @@ def _get_image_list(
         endpoint=ENDPOINT,
         max_tries=6  # one original try, plus 5 retries
 ):
+    image_list, total_pages = None, None
+    try_number = 0
     for try_number in range(max_tries):
         query_param_dict = _build_query_param_dict(
             start_timestamp,
@@ -181,9 +183,8 @@ def _get_image_list(
     if try_number == max_tries - 1 and (
             (image_list is None) or (total_pages is None)):
         logger.warning('No more tries remaining. Returning Nonetypes.')
-        return None, None
-    else:
-        return image_list, total_pages
+
+    return image_list, total_pages
 
 
 def _extract_response_json(response):

diff --git a/src/cc_catalog_airflow/dags/provider_api_scripts/metropolitan_museum_of_art.py b/src/cc_catalog_airflow/dags/provider_api_scripts/metropolitan_museum_of_art.py
@@ -133,15 +133,15 @@ def _build_foreign_id(object_id, image_url):
 
 
 def _create_meta_data(object_json):
-    meta_data = {}
-
-    meta_data['accession_number'] = object_json.get('accessionNumber', None)
-    meta_data['classification'] = object_json.get('classification', None)
-    meta_data['culture'] = object_json.get('culture', None)
-    meta_data['date'] = object_json.get('objectDate', None)
-    meta_data['medium'] = object_json.get('medium', None)
-    meta_data['credit_line'] = object_json.get('creditLine', None)
-
+    meta_data = {
+        'accession_number': object_json.get('accessionNumber'),
+        'classification': object_json.get('classification'),
+        'culture': object_json.get('culture'),
+        'date': object_json.get('objectDate'),
+        'medium': object_json.get('medium'),
+        'credit_line': object_json.get('creditLine')
+    }
+    meta_data = {k: v for k, v in meta_data.items() if v is not None}
     return meta_data
 
 

diff --git a/src/cc_catalog_airflow/dags/provider_api_scripts/museum_victoria.py b/src/cc_catalog_airflow/dags/provider_api_scripts/museum_victoria.py
@@ -55,7 +55,7 @@ def main():
 
             if type(results) == list:
                 if len(results) > 0:
-                    image_count = _handle_batch_objects(results)
+                    _handle_batch_objects(results)
                     page += 1
                 else:
                     condition = False
@@ -77,6 +77,7 @@ def _get_batch_objects(
         endpoint=ENDPOINT, params=None,
         headers=HEADERS, retries=RETRIES
 ):
+    data = None
     for retry in range(retries):
         response = delay_request.get(
             endpoint,
@@ -88,8 +89,6 @@ def _get_batch_objects(
             if type(response_json) == list:
                 data = response_json
                 break
-            else:
-                data = None
         except Exception:
             data = None
     return data

diff --git a/src/cc_catalog_airflow/dags/provider_api_scripts/nypl.py b/src/cc_catalog_airflow/dags/provider_api_scripts/nypl.py
@@ -193,7 +193,6 @@ def _get_images(
         image_url_dimensions=IMAGE_URL_DIMENSIONS,
         thumbnail_dimensions=THUMBNAIL_DIMENSIONS
         ):
-    image_url, thumbnail_url = None, None
     image_type = {
         parse_qs(urlparse(img.get("$")).query)['t'][0]: img.get("$")
         for img in images

diff --git a/src/cc_catalog_airflow/dags/provider_api_scripts/phylopic.py b/src/cc_catalog_airflow/dags/provider_api_scripts/phylopic.py
@@ -43,7 +43,6 @@ def main(date='all'):
            which running the script will pull data.
     """
 
-    param = None
     offset = 0
 
     logger.info('Begin: PhyloPic API requests')
@@ -114,8 +113,6 @@ def _get_total_images():
 
 def _create_endpoint_for_IDs(**args):
     limit = LIMIT
-    offset = 0
-    endpoint = ''
 
     if args.get('date'):
         # Get a list of objects uploaded/updated on a given date.
@@ -154,12 +151,6 @@ def _get_meta_data(_uuid):
     logger.info(f'Processing UUID: {_uuid}')
 
     base_url = 'http://phylopic.org'
-    img_url = ''
-    thumbnail = ''
-    width = ''
-    height = ''
-    foreign_id = ''
-    foreign_url = ''
     meta_data = {}
     endpoint = f"http://phylopic.org/api/a/image/{_uuid}?options=credit+" \
         "licenseURL+pngFiles+submitted+submitter+taxa+canonicalName" \
@@ -196,17 +187,17 @@ def _get_meta_data(_uuid):
 def _get_creator_details(result):
     credit_line = None
     pub_date = None
-    creator = ''
-
+    creator = None
     first_name = result.get('submitter', {}).get('firstName')
     last_name = result.get('submitter', {}).get('lastName')
-    creator = f'{first_name} {last_name}'.strip()
+    if first_name and last_name:
+        creator = f'{first_name} {last_name}'.strip()
 
     if result.get('credit'):
         credit_line = result.get('credit').strip()
         pub_date = result.get('submitted').strip()
 
-    return (creator, credit_line, pub_date)
+    return creator, credit_line, pub_date
 
 
 def _get_taxa_details(result):
@@ -215,15 +206,14 @@ def _get_taxa_details(result):
     taxa_list = None
     title = ''
     if taxa:
-        taxa = list(filter(
-            lambda x: x.get('canonicalName') is not None, taxa))
-        taxa_list = list(
-            map(lambda x: x.get('canonicalName', {}).get('string', ''), taxa))
+        taxa = [_.get('canonicalName') for _ in taxa
+                if _.get('canonicalName') is not None]
+        taxa_list = [_.get('string', '') for _ in taxa]
 
     if taxa_list:
         title = taxa_list[0]
 
-    return (taxa_list, title)
+    return taxa_list, title
 
 
 def _get_image_info(result, _uuid):
@@ -234,6 +224,8 @@ def _get_image_info(result, _uuid):
     height = ''
 
     image_info = result.get('pngFiles')
+    img = []
+    thb = []
     if image_info:
         img = list(filter(lambda x: (
             int(str(x.get('width', '0'))) >= 257), image_info))
@@ -257,7 +249,7 @@ def _get_image_info(result, _uuid):
             f'Image not detected in url: {base_url}/image/{_uuid}')
         return None, None, None, None
     else:
-        return (img_url, width, height, thumbnail)
+        return img_url, width, height, thumbnail
 
 
 if __name__ == '__main__':