Skip to content

Switch verifyarchives to use our serializer helpers to fix python3 use #266

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: edge
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions mig/server/verifyarchives.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# --- BEGIN_HEADER ---
#
# verifyarchives - Search for missing files in user Archives
# Copyright (C) 2021-2024 The MiG Project lead by Brian Vinter
# Copyright (C) 2003-2025 The MiG Project by the Science HPC Center at UCPH
#
# This file is part of MiG.
#
Expand All @@ -20,21 +20,20 @@
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

Check warning on line 23 in mig/server/verifyarchives.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

line too long (81 > 80 characters)
#
# -- END_HEADER ---
#

"""Verify Archive intergrity by comparing archive cache with actual contents"""

Check warning on line 28 in mig/server/verifyarchives.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

First line should end with a period (not 's')

from __future__ import print_function
from __future__ import absolute_import

import datetime

Check failure on line 33 in mig/server/verifyarchives.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

unused import 'datetime' (90% confidence)
import fnmatch
import getopt
import os
import pickle
import sys
import time

Expand All @@ -43,10 +42,11 @@
public_archive_index, public_archive_files, public_archive_doi, \
keyword_pending, keyword_final
from mig.shared.freezefunctions import sorted_hash_algos, checksum_file
from mig.shared.serial import load


def fuzzy_match(i, j, offset=2.0):
"""Compare the float values i and j and return true if j is within offset

Check warning on line 49 in mig/server/verifyarchives.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

First line should end with a period (not 't')

Check warning on line 49 in mig/server/verifyarchives.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

1 blank line required between summary line and description (found 0)
value of i.
Useful for comparing e.g. file timestamps with minor fluctuations due to
I/O times and rounding.
Expand All @@ -55,7 +55,7 @@


def check_archive_integrity(configuration, user_id, freeze_path, verbose=False):
"""Inspect Archives in freeze_path and compare contents to pickled cache.

Check warning on line 58 in mig/server/verifyarchives.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

1 blank line required between summary line and description (found 0)
The cache is a list with one dictionary per file using the format:
{'sha512sum': '...', 'name': 'relpath/to/file.ext',
'timestamp': 1624273389.482884, 'md5sum': '...', 'sha256sum': '...',
Expand All @@ -73,7 +73,7 @@
# NOTE: if archive has no actual files it has no cache file either
if not os.path.exists(cache_path):
archive_list = os.listdir(freeze_path)
if [i for i in archive_list if not i in ignore_files]:

Check warning on line 76 in mig/server/verifyarchives.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

test for membership should be 'not in'
print("Archive %s has data content but no file cache in %s" %
(freeze_path, cache_path))
return False
Expand All @@ -81,12 +81,8 @@
return True

try:
cache_fd = open(cache_path)
cache = pickle.load(cache_fd)
cache_fd.close()
meta_fd = open(meta_path)
meta = pickle.load(meta_fd)
meta_fd.close()
cache = load(cache_path)
meta = load(meta_path)
except Exception as exc:
print("Could not open archive helpers %s and %s for verification: %s" %
(cache_path, meta_path, exc))
Expand All @@ -112,9 +108,9 @@
# NOTE: we allow a minor time offset to accept various fs hiccups
elif not fuzzy_match(entry['timestamp'], archived_created) and \
not fuzzy_match(entry['timestamp'], archived_modified) and \
not fuzzy_match(entry.get('file_mtime', -1), archived_modified):

Check warning on line 111 in mig/server/verifyarchives.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

line too long (84 > 80 characters)
if meta_state == keyword_final:
print("Archive entry %s has wrong timestamp %f / %f (expected %f, %s)" %

Check warning on line 113 in mig/server/verifyarchives.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

line too long (92 > 80 characters)
(archive_path, archived_created, archived_modified,
entry['timestamp'], archived_stat))
chksum_verified = False
Expand Down Expand Up @@ -157,14 +153,14 @@
-B CREATED_BEFORE Limit to Archives created before CREATED_BEFORE (epoch)
-c CONF_FILE Use CONF_FILE as server configuration
-h Show this help
-I CERT_DN Filter to Archives of user ID (distinguished name pattern)

Check warning on line 156 in mig/server/verifyarchives.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

line too long (81 > 80 characters)
-n ARCHIVE_NAME Filter to specific Archive name(s) (pattern)
-v Verbose output
""" % {'name': name})


if '__main__' == __name__:
conf_path = None

Check failure on line 163 in mig/server/verifyarchives.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

unused variable 'conf_path' (60% confidence)
verbose = False
opt_args = 'A:B:c:hI:n:v'
now = int(time.time())
Expand All @@ -172,7 +168,7 @@
distinguished_name = '*'
archive_name = '*'
try:
(opts, args) = getopt.getopt(sys.argv[1:], opt_args)

Check failure on line 171 in mig/server/verifyarchives.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

unused variable 'args' (60% confidence)
except getopt.GetoptError as err:
print('Error: ', err.msg)
usage()
Expand All @@ -198,7 +194,7 @@
before = int(val)
created_before = before
elif opt == '-c':
conf_path = val

Check failure on line 197 in mig/server/verifyarchives.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

unused variable 'conf_path' (60% confidence)
elif opt == '-h':
usage()
sys.exit(0)
Expand All @@ -213,7 +209,7 @@
usage()
sys.exit(0)

archive_hits = {}

Check failure on line 212 in mig/server/verifyarchives.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

Need type annotation for "archive_hits" (hint: "archive_hits: Dict[<type>, <type>] = ...") [var-annotated]
archive_fails = 0
from mig.shared.conf import get_configuration_object
configuration = get_configuration_object()
Expand All @@ -225,7 +221,7 @@
if not os.path.isdir(base_path) or user_dir.find('+') == -1:
continue
user_id = client_dir_id(user_dir)
user_dict = distinguished_name_to_user(user_id)

Check failure on line 224 in mig/server/verifyarchives.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

unused variable 'user_dict' (60% confidence)
if not fnmatch.fnmatch(user_id, distinguished_name):
if verbose:
print("skip Archives for %s not matching owner pattern %s" %
Expand Down