Skip to content

Commit 5f178aa

Browse files
committed
weko#25440 - fixbug harvest permission on OAI-PMH
1 parent 69a1b05 commit 5f178aa

File tree

3 files changed

+83
-38
lines changed

3 files changed

+83
-38
lines changed

modules/invenio-oaiserver/invenio_oaiserver/query.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -146,16 +146,19 @@ def add_condition_doi_and_future_date(query):
146146
search = search.filter('range', **{'_updated': time_range})
147147

148148
search = search.query('match', **{'relation_version_is_last': 'true'})
149-
indexes = Indexes.get_unharvested_indexes()
150-
for index in indexes:
151-
search = search.query(
152-
'bool',
153-
**{'must_not': [
154-
{'wildcard': {'path': str(index.id)}}]})
155-
search = search.query(
156-
'bool',
157-
**{'must_not': [
158-
{'wildcard': {'path': '*/' + str(index.id)}}]})
149+
index_paths = Indexes.get_harverted_index_list()
150+
query_filter = [
151+
# script get deleted items.
152+
{"bool": {"must_not": {"exists": {"field": "path"}}}}
153+
]
154+
for index_path in index_paths:
155+
query_filter.append({
156+
"wildcard": {
157+
"path": index_path
158+
}
159+
})
160+
search = search.query(
161+
'bool', **{'must': [{'bool': {'should': query_filter}}]})
159162
add_condition_doi_and_future_date(search)
160163
response = search.execute().to_dict()
161164
else:

modules/invenio-oaiserver/invenio_oaiserver/response.py

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -281,28 +281,7 @@ def is_private_workflow(record):
281281

282282
def is_private_index(record):
283283
"""Check index of workflow is private."""
284-
def is_future_date(public_date):
285-
"""Check public date (Index Tree) is future date."""
286-
cur_date = datetime.now()
287-
return public_date and public_date > cur_date
288-
289-
# Get current and parent indexes of this record.
290-
list_index = record.get("path")
291-
index_lst = []
292-
if list_index:
293-
index_id_lst = []
294-
for index in list_index:
295-
indexes = str(index).split('/')
296-
index_id_lst.append(indexes[-1])
297-
index_lst = index_id_lst
298-
# check private and future date of all(current and parent) indexes.
299-
indexes = Indexes.get_path_list(index_lst)
300-
publish_state = 6
301-
publish_date = 7
302-
for index in indexes:
303-
if not index[publish_state] or is_future_date(index[publish_date]):
304-
return True
305-
return False
284+
return not Indexes.is_public_state(copy.deepcopy(record.get("path")))
306285

307286

308287
def set_identifier(param_record, param_rec):
@@ -351,17 +330,18 @@ def get_error_code_msg():
351330
e_record = SubElement(e_getrecord, etree.QName(NS_OAIPMH, 'record'))
352331
set_identifier(record, record)
353332
# Harvest is private
333+
_is_private_index = is_private_index(record)
354334
if not harvest_public_state or\
355335
(identify and not identify.outPutSetting) or \
356-
(is_private_index(record)
336+
(_is_private_index
357337
and harvest_public_state and is_exists_doi(record)):
358338
return error(get_error_code_msg(), **kwargs)
359339
# Item is deleted
360340
# or Harvest is public & Item is private
361341
# or Harvest is public & Index is private
362342
elif is_deleted_workflow(pid) or (
363343
harvest_public_state and is_private_workflow(record)) or (
364-
harvest_public_state and is_private_index(record)):
344+
harvest_public_state and _is_private_index):
365345
header(
366346
e_record,
367347
identifier=pid.pid_value,
@@ -446,8 +426,6 @@ def append_deleted_record(e_listrecords, pid_object, rec):
446426
pid = oaiid_fetcher(record['id'], record['json']['_source'])
447427
pid_object = OAIIDProvider.get(pid_value=pid.pid_value).pid
448428
rec = WekoRecord.get_record(record['id'])
449-
harvest_public_state, r = \
450-
WekoRecord.get_record_with_hps(pid_object.object_uuid)
451429
set_identifier(record, rec)
452430
# Check output delete, noRecordsMatch
453431
if not is_private_index(rec):
@@ -474,7 +452,7 @@ def append_deleted_record(e_listrecords, pid_object, rec):
474452
e_metadata.append(record_dumper(pid, etree_record))
475453
except Exception:
476454
current_app.logger.error(traceback.print_exc())
477-
current_app.logger.error('Error when exporting item id'
455+
current_app.logger.error('Error when exporting item id '
478456
+ str(record['id']))
479457
# Check <record> tag not exist.
480458
if len(e_listrecords) == 0:

modules/weko-index-tree/weko_index_tree/api.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
"""API for weko-index-tree."""
2222

2323
from copy import deepcopy
24-
from datetime import datetime
24+
from datetime import date, datetime
2525
from functools import partial
2626

2727
from flask import current_app, json
@@ -1187,6 +1187,34 @@ def get_harvest_public_state(cls, path):
11871187
current_app.logger.debug(se)
11881188
return False
11891189

1190+
@classmethod
1191+
def is_public_state(cls, paths):
1192+
"""Check have public state."""
1193+
def _query(path):
1194+
return db.session. \
1195+
query(func.every(db.and_(
1196+
Index.public_state,
1197+
db.or_(
1198+
Index.public_date == None,
1199+
Index.public_date <= date.today()
1200+
))).label('parent_state')
1201+
).filter(Index.id.in_(path))
1202+
1203+
try:
1204+
last_path = paths.pop(-1).split('/')
1205+
qry = _query(last_path)
1206+
for i in range(len(paths)):
1207+
paths[i] = paths[i].split('/')
1208+
paths[i] = _query(paths[i])
1209+
smt = qry.union_all(*paths).subquery()
1210+
result = db.session.query(
1211+
func.bool_or(
1212+
smt.c.parent_state).label('parent_state')).one()
1213+
return result.parent_state
1214+
except Exception as se:
1215+
current_app.logger.debug(se)
1216+
return False
1217+
11901218
@classmethod
11911219
def set_item_sort_custom(cls, index_id, sort_json={}):
11921220
"""Set custom sort."""
@@ -1496,3 +1524,39 @@ def get_full_path(cls, index_id=0):
14961524
obj = db.session.query(*qlst). \
14971525
order_by(recursive_t.c.pid).first()
14981526
return obj.path if obj else ''
1527+
1528+
@classmethod
1529+
def get_harverted_index_list(cls):
1530+
"""Get full path of index.
1531+
1532+
:return: path.
1533+
"""
1534+
recursive_t = db.session.query(
1535+
Index.parent.label("pid"),
1536+
Index.id.label("cid"),
1537+
func.cast(Index.id, db.Text).label("path")
1538+
).filter(
1539+
Index.parent == 0,
1540+
Index.harvest_public_state == True
1541+
).cte(name="recursive_t", recursive=True)
1542+
1543+
rec_alias = aliased(recursive_t, name="rec")
1544+
test_alias = aliased(Index, name="t")
1545+
recursive_t = recursive_t.union_all(
1546+
db.session.query(
1547+
test_alias.parent,
1548+
test_alias.id,
1549+
rec_alias.c.path + '/' + func.cast(test_alias.id, db.Text)
1550+
).filter(
1551+
test_alias.parent == rec_alias.c.cid,
1552+
test_alias.harvest_public_state == True)
1553+
)
1554+
1555+
paths = []
1556+
with db.session.begin_nested():
1557+
qlst = [recursive_t.c.path]
1558+
indexes = db.session.query(*qlst). \
1559+
order_by(recursive_t.c.pid).all()
1560+
for idx in indexes:
1561+
paths.append(idx.path)
1562+
return paths

0 commit comments

Comments
 (0)