Skip to content

Commit

Permalink
various: better UTC times
Browse files Browse the repository at this point in the history
Co-Authored-by: Peter Weber <peter.weber@rero.ch>
  • Loading branch information
rerowep committed Aug 24, 2022
1 parent 00dc17b commit 5e12db9
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 54 deletions.
16 changes: 16 additions & 0 deletions rero_mef/agents/mef/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

"""API for manipulating MEF records."""

from datetime import datetime, timezone

import click
from elasticsearch_dsl import Q
from flask import current_app
Expand Down Expand Up @@ -166,6 +168,20 @@ def create_or_update_mef_viaf_record(self, dbcommit=False, reindex=False,
"""
return self, Action.ERROR, None, False

@classmethod
def create_deleted(cls, record, dbcommit=False, reindex=False):
"""Create a deleted record for an record.
:param record: Record to create.
:param dbcommit: Commit changes to DB.
:param reindex: Reindex record.
:returns: Created record.
"""
data = {record.name: {
'$ref': build_ref_string(record.pid, record.name)}}
data['deleted'] = datetime.now(timezone.utc).isoformat()
return cls.create(data=data, dbcommit=dbcommit, reindex=reindex)


class AgentMefIndexer(ReroIndexer):
"""AgentMefIndexer."""
Expand Down
19 changes: 2 additions & 17 deletions rero_mef/api_mef.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@

"""API for manipulating MEF records."""

from datetime import datetime
from datetime import datetime, timezone

import click
import pytz
from elasticsearch_dsl import Q
from flask import current_app
from invenio_search import current_search
Expand Down Expand Up @@ -212,24 +211,10 @@ def mark_as_deleted(self, dbcommit=False, reindex=False):
:param reindex: Reindex record.
:returns: Modified record.
"""
self['deleted'] = pytz.utc.localize(datetime.now()).isoformat()
self['deleted'] = datetime.now(timezone.utc).isoformat()
self.update(data=self, dbcommit=dbcommit, reindex=reindex)
return self

@classmethod
def create_deleted(cls, record, dbcommit=False, reindex=False):
"""Create a deleted record for an record.
:param record: Record to create.
:param dbcommit: Commit changes to DB.
:param reindex: Reindex record.
:returns: Created record.
"""
data = {record.name: {
'$ref': build_ref_string(record.pid, record.name)}}
data['deleted'] = pytz.utc.localize(datetime.now()).isoformat()
return cls.create(data=data, dbcommit=dbcommit, reindex=reindex)

@classmethod
def flush_indexes(cls):
"""Update indexes."""
Expand Down
16 changes: 16 additions & 0 deletions rero_mef/concepts/mef/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

"""API for manipulating MEF records."""

from datetime import datetime, timezone

from flask import current_app
from invenio_search import current_search
from invenio_search.api import RecordsSearch
Expand Down Expand Up @@ -112,6 +114,20 @@ def create_or_update_mef_viaf_record(self, dbcommit=False, reindex=False,
"""
return self, Action.Error, None, False

@classmethod
def create_deleted(cls, record, dbcommit=False, reindex=False):
"""Create a deleted record for an record.
:param record: Record to create.
:param dbcommit: Commit changes to DB.
:param reindex: Reindex record.
:returns: Created record.
"""
data = {record.name: {
'$ref': build_ref_string(record.pid, record.name)}}
data['deleted'] = datetime.now(timezone.utc).isoformat()
return cls.create(data=data, dbcommit=dbcommit, reindex=reindex)


class ConceptMefIndexer(ReroIndexer):
"""MefIndexer."""
Expand Down
8 changes: 2 additions & 6 deletions rero_mef/marctojson/do_gnd_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@
# https://www.dnb.de/EN/Professionell/Metadatendienste/Datenbezug/GND_Aenderungsdienst/gndAenderungsdienst_node.html

import re
from datetime import datetime

import pytz
from datetime import datetime, timezone

from rero_mef.marctojson.helper import COUNTRIES, COUNTRY_UNIMARC_MARC21, \
LANGUAGES, build_string_list_from_fields
Expand Down Expand Up @@ -76,9 +74,7 @@ def trans_gnd_deleted(self):
if self.logger and self.verbose:
self.logger.info('Call Function', 'trans_gnd_deleted')
if self.marc.leader[5] in ['c', 'd', 'x']:
self.json_dict['deleted'] = pytz.utc.localize(
datetime.now()
).isoformat()
self.json_dict['deleted'] = datetime.now(timezone.utc).isoformat()

def trans_gnd_relation_pid(self):
"""Transformation relation pids 682 $0.
Expand Down
8 changes: 2 additions & 6 deletions rero_mef/marctojson/do_idref_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@

"""Marctojsons transformer for IDREF records."""

from datetime import datetime

import pytz
from datetime import datetime, timezone

from rero_mef.marctojson.helper import COUNTRIES, COUNTRY_UNIMARC_MARC21, \
LANGUAGES, build_string_list_from_fields, remove_trailing_punctuation
Expand Down Expand Up @@ -149,9 +147,7 @@ def trans_idref_deleted(self):
if self.logger and self.verbose:
self.logger.info('Call Function', 'trans_idref_deleted')
if self.marc.leader[5] == 'd':
self.json_dict['deleted'] = pytz.utc.localize(
datetime.now()
).isoformat()
self.json_dict['deleted'] = datetime.now(timezone.utc).isoformat()

def trans_idref_relation_pid(self):
"""Transformation old pids 035 $a $9 = sudoc."""
Expand Down
53 changes: 28 additions & 25 deletions rero_mef/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,13 @@
# under the terms of the MIT License; see LICENSE file for more details.

"""Utilities."""
import datetime
import gc
import hashlib
import json
import os
import traceback
from copy import deepcopy
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from functools import wraps
from io import StringIO
from json import JSONDecodeError, JSONDecoder, dumps
Expand Down Expand Up @@ -61,6 +60,9 @@
from sickle.iterator import OAIItemIterator
from sickle.oaiexceptions import NoRecordsMatch

# Hours can not be retrieved by get_info_by_oai_name
# TIME_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
TIME_FORMAT = '%Y-%m-%d'

def add_oai_source(name, baseurl, metadataprefix='marc21',
setspecs='', comment='', update=False):
Expand Down Expand Up @@ -120,7 +122,7 @@ def oai_set_last_run(name, date, verbose=False):
oai_source = get_oaiharvest_object(name)
lastrun_date = date
if isinstance(date, str):
lastrun_date = parser.parse(date)
lastrun_date = parser.isoparse(date).astimezone(timezone.utc)
oai_source.update_lastrun(lastrun_date)
oai_source.save()
db.session.commit()
Expand Down Expand Up @@ -227,7 +229,7 @@ def oai_process_records_from_dates(name, sickle, oai_item_iterator,
and dates_inital['from'] > dates_inital['until']:
raise WrongDateCombination("'Until' date larger than 'from' date.")

last_run_date = datetime.now()
last_run_date = datetime.now(timezone.utc)

# If we don't have specifications for set searches the setspecs will be
# set to e list with None to go into the retrieval loop without
Expand All @@ -249,17 +251,19 @@ def oai_process_records_from_dates(name, sickle, oai_item_iterator,
if spec:
params['set'] = spec

my_from_date = parser.parse(dates['from'])
my_from_date = parser.isoparse(
dates['from']).astimezone(timezone.utc)
my_until_date = last_run_date
if dates['until']:
my_until_date = parser.parse(dates['until'])
my_until_date = parser.isoparse(
dates['until']).astimezone(timezone.utc)
while my_from_date <= my_until_date:
until_date = my_from_date + timedelta(days=days_span)
if until_date > my_until_date:
until_date = my_until_date
dates = {
'from': my_from_date.strftime("%Y-%m-%d"),
'until': until_date.strftime("%Y-%m-%d")
'from': my_from_date.strftime(TIME_FORMAT),
'until': until_date.strftime(TIME_FORMAT)
}
params.update(dates)

Expand Down Expand Up @@ -327,12 +331,12 @@ def oai_process_records_from_dates(name, sickle, oai_item_iterator,
if debug:
traceback.print_exc()
count = -1
my_from_date = my_from_date + timedelta(days=days_span + 1)
if verbose:
from_date = my_from_date.strftime("%Y-%m-%d")
from_date = my_from_date.strftime(TIME_FORMAT)
click.echo(
f'OAI {name} {spec}: {from_date} .. +{days_span}'
)
my_from_date = my_from_date + timedelta(days=days_span + 1)

if update_last_run:
if verbose:
Expand Down Expand Up @@ -371,7 +375,7 @@ def oai_save_records_from_dates(name, file_name, sickle, oai_item_iterator,
and dates_inital['from'] > dates_inital['until']:
raise WrongDateCombination("'Until' date larger than 'from' date.")

last_run_date = datetime.now()
last_run_date = datetime.now(timezone.utc)

# If we don't have specifications for set searches the setspecs will be
# set to e list with None to go into the retrieval loop without
Expand All @@ -387,30 +391,31 @@ def oai_save_records_from_dates(name, file_name, sickle, oai_item_iterator,
}
if access_token:
params['accessToken'] = access_token
params.update(dates)
params |= dates
if spec:
params['set'] = spec

my_from_date = parser.parse(dates['from'])
my_from_date = parser.parse(dates['from'], tzinfos=timezone.utc)
my_until_date = last_run_date
if dates['until']:
my_until_date = parser.parse(dates['until'])
my_until_date = parser.isoparse(
dates['until']).astimezone(timezone.utc)
while my_from_date <= my_until_date:
until_date = my_from_date + timedelta(days=days_span)
if until_date > my_until_date:
until_date = my_until_date
dates = {
'from': my_from_date.strftime("%Y-%m-%d"),
'until': until_date.strftime("%Y-%m-%d")
'from': my_from_date.strftime(TIME_FORMAT),
'until': until_date.strftime(TIME_FORMAT)
}
params.update(dates)
params |= dates

try:
for record in request.ListRecords(**params):
count += 1
records = parse_xml_to_array(StringIO(record.raw))
if verbose:
from_date = my_from_date.strftime("%Y-%m-%d")
from_date = my_from_date.strftime(TIME_FORMAT)
click.echo(
f'OAI {name} spec({spec}): {from_date} '
f'count:{count:>10} = {id}'
Expand All @@ -427,7 +432,7 @@ def oai_save_records_from_dates(name, file_name, sickle, oai_item_iterator,

my_from_date = my_from_date + timedelta(days=days_span + 1)
if verbose:
from_date = my_from_date.strftime("%Y-%m-%d")
from_date = my_from_date.strftime(TIME_FORMAT)
click.echo(
f'OAI {name} spec({spec}): '
f'{from_date} .. +{days_span}'
Expand Down Expand Up @@ -695,7 +700,7 @@ def bulk_load_agent(agent, data, table, columns, bulk_count=0, verbose=False,
buffer = StringIO()
buffer_uuid = []
index = columns.index('id') if 'id' in columns else -1
start_time = datetime.now()
start_time = datetime.now(timezone.utc)
with open(data, 'r', encoding='utf-8', buffering=1) as input_file:
for line in input_file:
count += 1
Expand All @@ -706,7 +711,7 @@ def bulk_load_agent(agent, data, table, columns, bulk_count=0, verbose=False,
buffer.flush()
buffer.seek(0)
if verbose:
end_time = datetime.now()
end_time = datetime.now(timezone.utc)
diff_time = end_time - start_time
start_time = end_time
click.echo(
Expand All @@ -730,7 +735,7 @@ def bulk_load_agent(agent, data, table, columns, bulk_count=0, verbose=False,
buffer = StringIO()

if verbose:
end_time = datetime.now()
end_time = datetime.now(timezone.utc)
diff_time = end_time - start_time
click.echo(
f'{agent} copy from file: {count} {diff_time.seconds}s',
Expand Down Expand Up @@ -1113,9 +1118,7 @@ def get_timestamp(name):
:returns: time of time stamp
"""
time_stamps = current_cache.get('timestamps')
if not time_stamps:
return None
return time_stamps.get(name)
return time_stamps.get(name) if time_stamps else None


def settimestamp(func):
Expand Down

0 comments on commit 5e12db9

Please sign in to comment.