Skip to content

Commit

Permalink
WebSearch: improvements for collection admin
Browse files Browse the repository at this point in the history
* Fix collection loop check in webcoll (in get_ancestors and
  get_descendants) that was broken for some cases.

* New fix_collection_scores() function in websearchadminlib.py to
  correct and uniformize all collection scores in collection_collection
  table.

* Improve speed of perform_checkcollectionstatus by correctly exploiting
  restricted_collections cache and by checking only if collections have
  sons without calculating the whole descendants set.
  • Loading branch information
kaplun authored and tiborsimko committed Mar 25, 2011
1 parent 24eca22 commit 9b93e94
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 23 deletions.
6 changes: 3 additions & 3 deletions modules/websearch/doc/admin/websearch-admin-guide.webdoc
Original file line number Diff line number Diff line change
Expand Up @@ -707,9 +707,9 @@ the Apache groups mentioned in this column.
<dd>If no errors was found, <em>OK</em> is displayed for each
collection. If an error was found, then an error number and short
message are shown. The meaning of the error messages is the
following: <em>1:Query</em> means that the collection was defined via
a query but also via subcollections too; <em>2:Query</em> means that
the collection wasn't defined neither via query nor via
following: <em>1:Conflict</em> means that the collection was defined
via a query but also via subcollections too; <em>2:Empty</em> means
that the collection wasn't defined neither via query nor via
subcollections.

</dl>
Expand Down
19 changes: 13 additions & 6 deletions modules/websearch/lib/websearch_webcoll.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
from invenio.bibrank_record_sorter import get_bibrank_methods
from invenio.dateutils import convert_datestruct_to_dategui
from invenio.bibformat import format_record
from invenio.intbitset import intbitset
from invenio.websearch_external_collections import \
external_collection_load_states, \
dico_collection_external_searches, \
Expand Down Expand Up @@ -208,6 +209,7 @@ def get_name(self, ln=CFG_SITE_LANG, name_type="ln", prolog="", epilog="", prolo
def get_ancestors(self):
"Returns list of ancestors of the current collection."
ancestors = []
ancestors_ids = intbitset()
id_son = self.id
while 1:
query = "SELECT cc.id_dad,c.name FROM collection_collection AS cc, collection AS c "\
Expand All @@ -216,11 +218,12 @@ def get_ancestors(self):
if res:
col_ancestor = get_collection(res[0][1])
# looking for loops
if col_ancestor in ancestors:
if self.id in ancestors_ids:
write_message("Loop found in collection %s" % self.name, stream=sys.stderr)
raise OverflowError
raise OverflowError("Loop found in collection %s" % self.name)
else:
ancestors.append(col_ancestor)
ancestors_ids.add(col_ancestor.id)
id_son = res[0][0]
else:
break
Expand Down Expand Up @@ -250,19 +253,24 @@ def get_sons(self, type='r'):
def get_descendants(self, type='r'):
"Returns list of all descendants of type 'type' for the current collection."
descendants = []
descendant_ids = intbitset()
id_dad = self.id
query = "SELECT cc.id_son,c.name FROM collection_collection AS cc, collection AS c "\
"WHERE cc.id_dad=%d AND cc.type='%s' AND c.id=cc.id_son ORDER BY score DESC" % (int(id_dad), type)
res = run_sql(query)
for row in res:
col_desc = get_collection(row[1])
# looking for loops
if col_desc in descendants:
if self.id in descendant_ids:
write_message("Loop found in collection %s" % self.name, stream=sys.stderr)
raise OverflowError
raise OverflowError("Loop found in collection %s" % self.name)
else:
descendants.append(col_desc)
descendants += col_desc.get_descendants()
descendant_ids.add(col_desc.id)
tmp_descendants = col_desc.get_descendants()
for descendant in tmp_descendants:
descendant_ids.add(descendant.id)
descendants += tmp_descendants
return descendants

def write_cache_file(self, filename='', filebody=''):
Expand Down Expand Up @@ -447,7 +455,6 @@ def create_latest_additions_info(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, ln=CFG_S
# apply special filters:
if self.name in ['Videos']:
# select only videos with movies:
from invenio.intbitset import intbitset
recIDs = list(intbitset(recIDs) & \
search_pattern(p='collection:"PUBLVIDEOMOVIE"'))
# sort some CERN collections specially:
Expand Down
41 changes: 27 additions & 14 deletions modules/websearch/lib/websearchadminlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
from invenio.access_control_admin import acc_get_action_id
from invenio.access_control_config import VIEWRESTRCOLL
from invenio.errorlib import register_exception
from invenio.intbitset import intbitset

def getnavtrail(previous = ''):
"""Get the navtrail"""
Expand All @@ -77,6 +78,14 @@ def getnavtrail(previous = ''):
navtrail = navtrail + previous
return navtrail

def fix_collection_scores():
"""
Re-calculate and re-normalize de scores of the collection relationship.
"""
for id_dad in intbitset(run_sql("SELECT id_dad FROM collection_collection")):
for index, id_son in enumerate(run_sql("SELECT id_son FROM collection_collection WHERE id_dad=%s ORDER BY score DESC", (id_dad, ))):
run_sql("UPDATE collection_collection SET score=%s WHERE id_dad=%s AND id_son=%s", (index * 10 + 10, id_dad, id_son[0]))

def perform_modifytranslations(colID, ln, sel_type='', trans=[], confirm=-1, callback='yes'):
"""Modify the translations of a collection
sel_type - the nametype to modify
Expand Down Expand Up @@ -2462,7 +2471,7 @@ def perform_modifyrestricted(colID, ln, rest='', callback='yes', confirm=-1):
def perform_checkcollectionstatus(colID, ln, confirm=0, callback='yes'):
"""Check the configuration of the collections."""

from invenio.search_engine import collection_restricted_p
from invenio.search_engine import collection_restricted_p, restricted_collection_cache

subtitle = """<a name="11"></a>Collection Status&nbsp;&nbsp;&nbsp;[<a href="%s/help/admin/websearch-admin-guide#6">?</a>]""" % CFG_SITE_URL
output = ""
Expand All @@ -2477,9 +2486,11 @@ def perform_checkcollectionstatus(colID, ln, confirm=0, callback='yes'):
rnk_list = get_def_name('', "rnkMETHOD")
actions = []

restricted_collection_cache.recreate_cache_if_needed()

for (id, name, dbquery, nbrecs) in collections:
reg_sons = len(get_col_tree(id, 'r'))
vir_sons = len(get_col_tree(id, 'v'))
reg_sons = col_has_son(id, 'r')
vir_sons = col_has_son(id, 'v')
status = ""
hosted = ""

Expand All @@ -2494,28 +2505,26 @@ def perform_checkcollectionstatus(colID, ln, confirm=0, callback='yes'):
i8n += "%s, " % lang
else:
i8n = """<b><span class="info">None</span></b>"""
if (reg_sons > 1 and dbquery) or dbquery=="":
status = """<b><span class="warning">1:Query</span></b>"""
elif dbquery is None and reg_sons == 1:
status = """<b><span class="warning">2:Query</span></b>"""
elif dbquery == "" and reg_sons == 1:
status = """<b><span class="warning">3:Query</span></b>"""

if (reg_sons > 1 or vir_sons > 1):
if reg_sons and dbquery:
status = """<b><span class="warning">1:Conflict</span></b>"""
elif not dbquery and not reg_sons:
status = """<b><span class="warning">2:Empty</span></b>"""

if (reg_sons or vir_sons):
subs = """<b><span class="info">Yes</span></b>"""
else:
subs = """<b><span class="info">No</span></b>"""

if dbquery is None:
dbquery = """<b><span class="info">No</span></b>"""

restricted = collection_restricted_p(name)
restricted = collection_restricted_p(name, recreate_cache_if_needed=False)
if restricted:
restricted = """<b><span class="warning">Yes</span></b>"""
if status:
status += """<b><span class="warning">,4:Restricted</span></b>"""
status += """<b><span class="warning">,3:Restricted</span></b>"""
else:
status += """<b><span class="warning">4:Restricted</span></b>"""
status += """<b><span class="warning">3:Restricted</span></b>"""
else:
restricted = """<b><span class="info">No</span></b>"""

Expand Down Expand Up @@ -2668,6 +2677,10 @@ def perform_checkexternalcollections(colID, ln, icl=None, update="", confirm=0,
else:
return addadminbox(subtitle, body)

def col_has_son(colID, rtype='r'):
"""Return True if the collection has at least one son."""
return run_sql("SELECT id_son FROM collection_collection WHERE id_dad=%s and type=%s LIMIT 1", (colID, rtype)) != ()

def get_col_tree(colID, rtype=''):
"""Returns a presentation of the tree as a list. TODO: Add loop detection
colID - startpoint for the tree
Expand Down

0 comments on commit 9b93e94

Please sign in to comment.