Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/pump/_bitstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def __init__(self, bitstream_file_str: str, bundle2bitstream_file_str: str):
"col_logo": 0,
}

if len(self._bs) == 0:
if not self._bs:
_logger.info(f"Empty input: [{bitstream_file_str}].")
return

Expand All @@ -58,7 +58,7 @@ def __init__(self, bitstream_file_str: str, bundle2bitstream_file_str: str):
self._done = []

def __len__(self):
return len(self._bs)
return len(self._bs) if self._bs is not None else 0

def uuid(self, b_id: int):
return self._id2uuid.get(str(b_id), None)
Expand Down Expand Up @@ -103,7 +103,7 @@ def _logo2col_import_to(self, dspace, collections):
_logger.info("There are no logos for collections.")
return

expected = len(collections.logos.items())
expected = len(collections.logos.items()) if collections.logos is not None else 0
log_key = "collection logos"
log_before_import(log_key, expected)

Expand Down Expand Up @@ -134,7 +134,7 @@ def _logo2com_import_to(self, dspace, communities):
_logger.info("There are no logos for communities.")
return

expected = len(communities.logos.items())
expected = len(communities.logos.items()) if communities.logos is not None else 0
log_key = "communities logos"
log_before_import(log_key, expected)

Expand Down
2 changes: 1 addition & 1 deletion src/pump/_bitstreamformatregistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def __init__(self, bfr_file_str: str, fe_file_str: str):
return

def __len__(self):
return len(self._reg)
return len(self._reg) if self._reg is not None else 0

def uuid(self, f_id: int):
assert isinstance(list(self._id2uuid.keys() or [""])[0], str)
Expand Down
4 changes: 2 additions & 2 deletions src/pump/_bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def __init__(self, bundle_file_str: str, item2bundle_file_str: str):
}
self._id2uuid = {}

if len(self._bundles) == 0:
if not self._bundles:
_logger.info(f"Empty input: [{bundle_file_str}].")
return

Expand All @@ -38,7 +38,7 @@ def __init__(self, bundle_file_str: str, item2bundle_file_str: str):
self._primary[primary_id] = b['bundle_id']

def __len__(self):
return len(self._bundles)
return len(self._bundles) if self._bundles is not None else 0

def uuid(self, b_id: int):
assert isinstance(list(self._id2uuid.keys() or [""])[0], str)
Expand Down
6 changes: 3 additions & 3 deletions src/pump/_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ def __init__(self, col_file_str: str, com2col_file_str: str, metadata_file_str:
self._groups_id2uuid = {}
self._groups_uuid2type = {}

if len(self._col) == 0:
if not self._col:
_logger.info(f"Empty input collections: [{col_file_str}].")
return

if len(self._com2col) == 0:
if not self._com2col:
_logger.info(f"Empty input community2collection: [{com2col_file_str}].")
return

Expand All @@ -58,7 +58,7 @@ def __init__(self, col_file_str: str, com2col_file_str: str, metadata_file_str:
self._col2group[int(m.group(1))] = meta['resource_id']

def __len__(self):
return len(self._col)
return len(self._col) if self._col is not None else 0

def uuid(self, com_id: int):
assert isinstance(list(self._id2uuid.keys() or [""])[0], str)
Expand Down
6 changes: 3 additions & 3 deletions src/pump/_community.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def __init__(self, com_file_str: str, com2com_file_str: str):
self._groups = {}

def __len__(self):
return len(self._com)
return len(self._com) if self._com is not None else 0

@property
def logos(self):
Expand Down Expand Up @@ -79,15 +79,15 @@ def import_to(self, dspace, handles, metadata):
childs.setdefault(child_id, []).append(parent_id)

for arr in childs.values():
if len(arr) != 1:
if arr is None or len(arr) != 1:
_logger.critical(f"Strange child array: [{arr}]")

coms = self._com.copy()

iter = 0

i = 0
while len(coms) > 0:
while coms and len(coms) > 0:
iter += 1

if iter > 200:
Expand Down
43 changes: 27 additions & 16 deletions src/pump/_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ def fetch_one(self, sql: str):

def exe_sql(self, sql_text: str):
with self._conn as cursor:
sql_lines = [x.strip() for x in sql_text.splitlines() if len(x.strip()) > 0]
sql_lines = [x.strip()
for x in (sql_text or "").splitlines() if len(x.strip()) > 0]
for sql in sql_lines:
cursor.execute(sql)
return
Expand Down Expand Up @@ -184,7 +185,11 @@ def __init__(self, raw_db_dspace_5, raw_db_utilities_5, raw_db_7, repo=None):

@staticmethod
def get_list_val(part: list, pos: int):
return part[pos] if len(part) > pos else None
if part is None:
return None
if 0 <= pos < len(part):
return part[pos]
return None

@staticmethod
def log_error(msg: str, test_n: str, part_type: str = None) -> list:
Expand Down Expand Up @@ -314,9 +319,9 @@ def _cmp_values(self, table_name: str, vals5, only_in_5, vals7, only_in_7, do_no
too_many_5 = ""
too_many_7 = ""
LIMIT = 5
if len(only_in_5) > LIMIT:
if only_in_5 and len(only_in_5) > LIMIT:
too_many_5 = f"!!! TOO MANY [{len(only_in_5)}] "
if len(only_in_7) > LIMIT:
if only_in_7 and len(only_in_7) > LIMIT:
too_many_7 = f"!!! TOO MANY [{len(only_in_7)}] "

do_not_show = do_not_show or "CI" in os.environ or "GITHUB_ACTION" in os.environ
Expand All @@ -325,9 +330,12 @@ def _cmp_values(self, table_name: str, vals5, only_in_5, vals7, only_in_7, do_no
only_in_5 = [x if "@" not in x else "....." for x in only_in_5]
only_in_7 = [x if "@" not in x else "....." for x in only_in_7]

_logger.info(f"Table [{table_name}]: v5:[{len(vals5)}], v7:[{len(vals7)}]\n"
f" {too_many_5}only in v5:[{only_in_5[:LIMIT]}]\n"
f" {too_many_7}only in v7:[{only_in_7[:LIMIT]}]")
_logger.info(
f"Table [{table_name}]: v5:[{len(vals5) if vals5 is not None else 0}], "
f"v7:[{len(vals7) if vals7 is not None else 0}]\n"
f" {too_many_5 or ''}only in v5:[{(only_in_5[:LIMIT] if only_in_5 else [])}]\n"
f" {too_many_7 or ''}only in v7:[{(only_in_7[:LIMIT] if only_in_7 else [])}]"
)

def diff_table_cmp_cols(self, db5, table_name: str, compare_arr: list, gdpr: bool = True):
cols5, vals5, cols7, vals7 = self._fetch_all_vals(db5, table_name)
Expand All @@ -340,7 +348,7 @@ def diff_table_cmp_cols(self, db5, table_name: str, compare_arr: list, gdpr: boo

only_in_5 = list(set(vals5_cmp).difference(vals7_cmp))
only_in_7 = list(set(vals7_cmp).difference(vals5_cmp))
if len(only_in_5) + len(only_in_7) == 0:
if (only_in_5 and len(only_in_5) or 0) + (only_in_7 and len(only_in_7) or 0) == 0:
_logger.info(f"Table [{table_name: >20}] is THE SAME in v5 and v7!")
return
self._cmp_values(table_name, vals5, only_in_5, vals7, only_in_7, do_not_show)
Expand All @@ -351,19 +359,22 @@ def diff_table_cmp_len(self, db5, table_name: str, nonnull: list = None, gdpr: b
cols5, vals5, cols7, vals7 = self._fetch_all_vals(db5, table_name)
do_not_show = gdpr and "email" in nonnull

if len(vals5) != len(vals7) and sql is not None:
len_vals5 = len(vals5) if vals5 is not None else 0
len_vals7 = len(vals7) if vals7 is not None else 0

if len_vals5 != len_vals7 and sql is not None:
cols5, vals5, cols7, vals7 = self._fetch_all_vals(db5, table_name, sql)
sql_info = True

msg = " OK " if len(vals5) == len(vals7) else " !!! WARN !!! "
msg = " OK " if len_vals5 == len_vals7 else " !!! WARN !!! "
_logger.info(
f"Table [{table_name: >20}] {msg} compared by len only v5:[{len(vals5)}], v7:[{len(vals7)}]")
f"Table [{table_name: >20}] {msg} compared by len only v5:[{len_vals5}], v7:[{len_vals7}]")

for col_name in nonnull:
vals5_cmp = [x for x in self._filter_vals(
vals5, cols5, [col_name]) if x[0] is not None]
vals7_cmp = [x for x in self._filter_vals(
vals7, cols7, [col_name]) if x[0] is not None]
vals5_cmp = [x for x in self._filter_vals(vals5 or [], cols5 or [],
[col_name]) if x[0] is not None]
vals7_cmp = [x for x in self._filter_vals(vals7 or [], cols7 or [],
[col_name]) if x[0] is not None]

msg = " OK " if len(vals5_cmp) == len(vals7_cmp) else " !!! WARN !!! "
_logger.info(
Expand Down Expand Up @@ -418,7 +429,7 @@ def validate(self, to_validate):
self.diff_table_cmp_len(db5, table_name, cmp)

# compare only len
if len(defin) == 0:
if not defin:
self.diff_table_cmp_len(db5, table_name)

cmp = defin.get("len", None)
Expand Down
8 changes: 4 additions & 4 deletions src/pump/_eperson.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__(self, eperson_file_str: str):
self._email2id = {}
self._id2uuid = {}

if len(self._epersons) == 0:
if not self._epersons:
_logger.info(f"Empty input: [{eperson_file_str}].")
return

Expand All @@ -66,7 +66,7 @@ def __init__(self, eperson_file_str: str):
self._email2id[email] = e['eperson_id']

def __len__(self):
return len(self._epersons)
return len(self._epersons) if self._epersons is not None else 0

def by_email(self, email: str):
return self._email2id.get(email, None)
Expand Down Expand Up @@ -160,12 +160,12 @@ def __init__(self, egroups_file_str: str):

self._id2uuid = {}

if len(self._groups) == 0:
if not self._groups:
_logger.info(f"Empty input: [{egroups_file_str}].")
return

def __len__(self):
return len(self._groups)
return len(self._groups) if self._groups is not None else 0

@property
def imported(self):
Expand Down
8 changes: 4 additions & 4 deletions src/pump/_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,10 @@ def __init__(self, eperson_file_str: str, g2g_file_str: str):
# all imported group
self._id2uuid = {}

if len(self._eperson) == 0:
if not self._eperson:
_logger.info(f"Empty input collections: [{eperson_file_str}].")

if len(self._g2g) == 0:
if not self._g2g:
_logger.info(f"Empty input collections: [{g2g_file_str}].")

@property
Expand Down Expand Up @@ -149,7 +149,7 @@ def _import_eperson(self, dspace, metadatas):
Import data into database.
Mapped tables: epersongroup
"""
expected = len(self._eperson)
expected = len(self._eperson) if self._eperson is not None else 0
log_key = "epersongroup"
log_before_import(log_key, expected)

Expand Down Expand Up @@ -201,7 +201,7 @@ def _import_group2group(self, dspace):
Import data into database.
Mapped tables: group2group
"""
expected = len(self._g2g)
expected = len(self._g2g) if self._g2g is not None else 0
log_key = "epersons g2g (could have children)"
log_before_import(log_key, expected)

Expand Down
8 changes: 4 additions & 4 deletions src/pump/_handle.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self, file_str: str):
arr.append(h)

def __len__(self):
return len(self._handles)
return len(self._handles) if self._handles is not None else 0

@property
def imported(self):
Expand Down Expand Up @@ -60,7 +60,7 @@ def get_handles_by_type(self, type_id: int = None, res_id: int = None):
@time_method
def import_to(self, dspace):
# external
arr = self.get_handles_by_type(None, None)
arr = self.get_handles_by_type(None, None) or []
expected = len(arr)
log_key = "external handles"
log_before_import(log_key, expected)
Expand All @@ -69,7 +69,7 @@ def import_to(self, dspace):
self._imported += cnt

# no object
arr = self.get_handles_by_type(items.TYPE, None)
arr = self.get_handles_by_type(items.TYPE, None) or []
expected = len(arr)
log_key = "handles"
log_before_import(log_key, expected)
Expand All @@ -83,7 +83,7 @@ def get(self, type_id: int, obj_id: int):
"""
Get handle based on object type and its id.
"""
arr = self.get_handles_by_type(type_id, obj_id)
arr = self.get_handles_by_type(type_id, obj_id) or []
if len(arr) == 0:
return None
return arr[0]['handle']
Loading