diff --git a/modules/websearch/doc/admin/websearch-admin-guide.webdoc b/modules/websearch/doc/admin/websearch-admin-guide.webdoc
index 8d9c3e67b..ecf4fcbd1 100644
--- a/modules/websearch/doc/admin/websearch-admin-guide.webdoc
+++ b/modules/websearch/doc/admin/websearch-admin-guide.webdoc
@@ -707,9 +707,9 @@ the Apache groups mentioned in this column.
If no errors was found, OK is displayed for each
collection. If an error was found, then an error number and short
message are shown. The meaning of the error messages is the
-following: 1:Query means that the collection was defined via
-a query but also via subcollections too; 2:Query means that
-the collection wasn't defined neither via query nor via
+following: 1:Conflict means that the collection was defined
+via a query but also via subcollections too; 2:Empty means
+that the collection wasn't defined neither via query nor via
subcollections.
diff --git a/modules/websearch/lib/websearch_webcoll.py b/modules/websearch/lib/websearch_webcoll.py
index fe87a78e3..16e0caf19 100644
--- a/modules/websearch/lib/websearch_webcoll.py
+++ b/modules/websearch/lib/websearch_webcoll.py
@@ -45,6 +45,7 @@
from invenio.bibrank_record_sorter import get_bibrank_methods
from invenio.dateutils import convert_datestruct_to_dategui
from invenio.bibformat import format_record
+from invenio.intbitset import intbitset
from invenio.websearch_external_collections import \
external_collection_load_states, \
dico_collection_external_searches, \
@@ -208,6 +209,7 @@ def get_name(self, ln=CFG_SITE_LANG, name_type="ln", prolog="", epilog="", prolo
def get_ancestors(self):
"Returns list of ancestors of the current collection."
ancestors = []
+ ancestors_ids = intbitset()
id_son = self.id
while 1:
query = "SELECT cc.id_dad,c.name FROM collection_collection AS cc, collection AS c "\
@@ -216,11 +218,12 @@ def get_ancestors(self):
if res:
col_ancestor = get_collection(res[0][1])
# looking for loops
- if col_ancestor in ancestors:
+ if self.id in ancestors_ids:
write_message("Loop found in collection %s" % self.name, stream=sys.stderr)
- raise OverflowError
+ raise OverflowError("Loop found in collection %s" % self.name)
else:
ancestors.append(col_ancestor)
+ ancestors_ids.add(col_ancestor.id)
id_son = res[0][0]
else:
break
@@ -250,6 +253,7 @@ def get_sons(self, type='r'):
def get_descendants(self, type='r'):
"Returns list of all descendants of type 'type' for the current collection."
descendants = []
+ descendant_ids = intbitset()
id_dad = self.id
query = "SELECT cc.id_son,c.name FROM collection_collection AS cc, collection AS c "\
"WHERE cc.id_dad=%d AND cc.type='%s' AND c.id=cc.id_son ORDER BY score DESC" % (int(id_dad), type)
@@ -257,12 +261,16 @@ def get_descendants(self, type='r'):
for row in res:
col_desc = get_collection(row[1])
# looking for loops
- if col_desc in descendants:
+ if self.id in descendant_ids:
write_message("Loop found in collection %s" % self.name, stream=sys.stderr)
- raise OverflowError
+ raise OverflowError("Loop found in collection %s" % self.name)
else:
descendants.append(col_desc)
- descendants += col_desc.get_descendants()
+ descendant_ids.add(col_desc.id)
+ tmp_descendants = col_desc.get_descendants()
+ for descendant in tmp_descendants:
+ descendant_ids.add(descendant.id)
+ descendants += tmp_descendants
return descendants
def write_cache_file(self, filename='', filebody=''):
@@ -447,7 +455,6 @@ def create_latest_additions_info(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, ln=CFG_S
# apply special filters:
if self.name in ['Videos']:
# select only videos with movies:
- from invenio.intbitset import intbitset
recIDs = list(intbitset(recIDs) & \
search_pattern(p='collection:"PUBLVIDEOMOVIE"'))
# sort some CERN collections specially:
diff --git a/modules/websearch/lib/websearchadminlib.py b/modules/websearch/lib/websearchadminlib.py
index 05d9f33f7..896dae124 100644
--- a/modules/websearch/lib/websearchadminlib.py
+++ b/modules/websearch/lib/websearchadminlib.py
@@ -69,6 +69,7 @@
from invenio.access_control_admin import acc_get_action_id
from invenio.access_control_config import VIEWRESTRCOLL
from invenio.errorlib import register_exception
+from invenio.intbitset import intbitset
def getnavtrail(previous = ''):
"""Get the navtrail"""
@@ -77,6 +78,14 @@ def getnavtrail(previous = ''):
navtrail = navtrail + previous
return navtrail
+def fix_collection_scores():
+ """
+ Re-calculate and re-normalize de scores of the collection relationship.
+ """
+ for id_dad in intbitset(run_sql("SELECT id_dad FROM collection_collection")):
+ for index, id_son in enumerate(run_sql("SELECT id_son FROM collection_collection WHERE id_dad=%s ORDER BY score DESC", (id_dad, ))):
+ run_sql("UPDATE collection_collection SET score=%s WHERE id_dad=%s AND id_son=%s", (index * 10 + 10, id_dad, id_son[0]))
+
def perform_modifytranslations(colID, ln, sel_type='', trans=[], confirm=-1, callback='yes'):
"""Modify the translations of a collection
sel_type - the nametype to modify
@@ -2462,7 +2471,7 @@ def perform_modifyrestricted(colID, ln, rest='', callback='yes', confirm=-1):
def perform_checkcollectionstatus(colID, ln, confirm=0, callback='yes'):
"""Check the configuration of the collections."""
- from invenio.search_engine import collection_restricted_p
+ from invenio.search_engine import collection_restricted_p, restricted_collection_cache
subtitle = """Collection Status [?]""" % CFG_SITE_URL
output = ""
@@ -2477,9 +2486,11 @@ def perform_checkcollectionstatus(colID, ln, confirm=0, callback='yes'):
rnk_list = get_def_name('', "rnkMETHOD")
actions = []
+ restricted_collection_cache.recreate_cache_if_needed()
+
for (id, name, dbquery, nbrecs) in collections:
- reg_sons = len(get_col_tree(id, 'r'))
- vir_sons = len(get_col_tree(id, 'v'))
+ reg_sons = col_has_son(id, 'r')
+ vir_sons = col_has_son(id, 'v')
status = ""
hosted = ""
@@ -2494,14 +2505,12 @@ def perform_checkcollectionstatus(colID, ln, confirm=0, callback='yes'):
i8n += "%s, " % lang
else:
i8n = """None"""
- if (reg_sons > 1 and dbquery) or dbquery=="":
- status = """1:Query"""
- elif dbquery is None and reg_sons == 1:
- status = """2:Query"""
- elif dbquery == "" and reg_sons == 1:
- status = """3:Query"""
-
- if (reg_sons > 1 or vir_sons > 1):
+ if reg_sons and dbquery:
+ status = """1:Conflict"""
+ elif not dbquery and not reg_sons:
+ status = """2:Empty"""
+
+ if (reg_sons or vir_sons):
subs = """Yes"""
else:
subs = """No"""
@@ -2509,13 +2518,13 @@ def perform_checkcollectionstatus(colID, ln, confirm=0, callback='yes'):
if dbquery is None:
dbquery = """No"""
- restricted = collection_restricted_p(name)
+ restricted = collection_restricted_p(name, recreate_cache_if_needed=False)
if restricted:
restricted = """Yes"""
if status:
- status += """,4:Restricted"""
+ status += """,3:Restricted"""
else:
- status += """4:Restricted"""
+ status += """3:Restricted"""
else:
restricted = """No"""
@@ -2668,6 +2677,10 @@ def perform_checkexternalcollections(colID, ln, icl=None, update="", confirm=0,
else:
return addadminbox(subtitle, body)
+def col_has_son(colID, rtype='r'):
+ """Return True if the collection has at least one son."""
+ return run_sql("SELECT id_son FROM collection_collection WHERE id_dad=%s and type=%s LIMIT 1", (colID, rtype)) != ()
+
def get_col_tree(colID, rtype=''):
"""Returns a presentation of the tree as a list. TODO: Add loop detection
colID - startpoint for the tree