BibAuthorID: fix author page query links

* Improvements for fallback solution in case: 1) No entry in bibauthorid 2) >1 entries in bibauthorid (future: 'did you mean' landing page) 3) BibAuthorID not available * URL handling improved to accept ?recid= param while still supporting <recid>:<name> passing. * Fix links to point to a compound-exact-author-search that respects all name variants. Interim solution until author id index is instated. (closes #461) * Fixed bad behaviour in which bibauthorid would return incompatible results. Now low-probability hits are excluded. * Param escaping to protect from XSS attacks. * Warning concerning 'this author is not on this paper' case.
adams164 · Feb 9, 2011 · 3693b38 · 3693b38
1 parent d460173
commit 3693b38
Show file tree

Hide file tree

Showing 5 changed files with 181 additions and 56 deletions.
diff --git a/modules/bibauthorid/lib/bibauthorid_personid_tables_utils.py b/modules/bibauthorid/lib/bibauthorid_personid_tables_utils.py
@@ -478,7 +478,7 @@ def find_personIDs_by_name_string(namestring):
 #   matching_pids = sorted(matching_pids, key=lambda k: k[3], reverse=True)
     persons = {}
     for n in matching_pids:
-        if n[3] >= 0.0:
+        if n[3] >= 0.8:
             if n[0] not in persons:
                 persons[n[0]] = sorted([[p[1], p[2], p[3]] for p in  matching_pids if p[0] == n[0]],
                                 key=lambda k: k[2], reverse=True)

diff --git a/modules/bibauthorid/lib/bibauthorid_webapi.py b/modules/bibauthorid/lib/bibauthorid_webapi.py
@@ -172,6 +172,25 @@ def get_person_names_from_id(person_id= -1):
     return tu.get_person_names_count((person_id,))
 
 
+def get_person_db_names_from_id(person_id= -1):
+    '''
+    Finds and returns the names associated with this person as stored in the 
+    meta data of the underlying data set along with the
+    frequency of occurrence (i.e. the number of papers)
+
+    @param person_id: an id to find the names for
+    @type person_id: int
+
+    @return: name and number of occurrences of the name
+    @rtype: tuple of tuple
+    '''
+#    #retrieve all rows for the person
+    if (not person_id > -1) or (not isinstance(person_id, int)):
+        return []
+
+    return tu.get_person_db_names_count((person_id,))
+
+
 def get_paper_status(person_id, bibref):
     '''
     Finds an returns the status of a bibrec to person assignment

diff --git a/modules/bibformat/lib/elements/bfe_authors.py b/modules/bibformat/lib/elements/bfe_authors.py
@@ -27,7 +27,8 @@ def format_element(bfo, limit, separator=' ; ',
            affiliation_prefix=' (',
            affiliation_suffix=')',
            interactive="no",
-           highlight="no"):
+           highlight="no",
+           link_author_pages="no"):
     """
     Prints the list of authors of a record.
 
@@ -69,15 +70,15 @@ def format_element(bfo, limit, separator=' ; ',
                                                         bfo.search_pattern)
 
             if print_links.lower() == "yes":
-                if True: # FIXME: /author/123:Ellis is not a user-friendly default
+                if link_author_pages == "no":
                     author['a'] = '<a href="' + CFG_SITE_URL + \
                                   '/search?f=author&amp;p=' + quote(author['a']) + \
                                   '&amp;ln=' + bfo.lang + \
                                   '">' + escape(author['a']) + '</a>'
                 else:
                     author['a'] = '<a href="' + CFG_SITE_URL + \
-                                  '/author/' + bibrec_id + ':' + \
-                                  quote(author['a']) + \
+                                  '/author/' + quote(author['a']) + \
+                                  '?recid=' +  bibrec_id + \
                                   '&ln=' + bfo.lang + \
                                   '">' + escape(author['a']) + '</a>'
 

diff --git a/modules/websearch/lib/websearch_templates.py b/modules/websearch/lib/websearch_templates.py
@@ -3889,7 +3889,7 @@ def tmpl_detailed_record_citations_self_cited(self, recID, ln,
         return out
 
     def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubdict,
-                                citedbylist, kwtuples, authors, vtuples, names_dict, admin_link, ln):
+                                citedbylist, kwtuples, authors, vtuples, names_dict, admin_link, is_bibauthorid, ln):
         """Prints stuff about the author given as authorname.
            1. Author name + his/her institutes. Each institute I has a link
               to papers where the auhtor has I as institute.
@@ -3911,6 +3911,7 @@ def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubd
         from invenio.search_engine import perform_request_search
         from operator import itemgetter
         _ = gettext_set_language(ln)
+        ib_pubs = intbitset(pubs)
 
         # Prepare data for display
         # construct names box
@@ -3927,19 +3928,41 @@ def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubd
             content.append("%s (%s)" % (name, name_lnk))
 
         if not content:
-            content = [_("No name variants found")]
+            content = [_("No Name Variants")]
 
         names_box = self.tmpl_print_searchresultbox(header, "<br />\n".join(content))
+        # construct an extended search as an interim solution for author id
+        # searches. Will build "(exactauthor:v1 OR exactauthor:v2)" strings
+        extended_author_search_str = ""
+
+        if is_bibauthorid:
+            if len(names_dict.keys()) > 1:
+                extended_author_search_str = '('
+
+            for name_index, name_query in enumerate(names_dict.keys()):
+                if name_index > 0:
+                    extended_author_search_str += " OR "
+
+                extended_author_search_str += 'exactauthor:"' + name_query + '"'
+
+            if len(names_dict.keys()) > 1:
+                extended_author_search_str += ')'
 
         # construct papers box
-        searchstr = create_html_link(self.build_search_url(p=authorname,
-                                     f='exactauthor'),
+        rec_query = 'exactauthor:"' + authorname + '"'
+
+        if is_bibauthorid and extended_author_search_str:
+            rec_query = extended_author_search_str
+
+        searchstr = create_html_link(self.build_search_url(p=rec_query),
                                      {}, "All papers (" + str(len(pubs)) + ")",)
-        line1 = "<strong>" + _("Records") + "</strong>"
+        line1 = "<strong>" + _("Papers") + "</strong>"
         line2 = searchstr
+
         if CFG_BIBRANK_SHOW_DOWNLOAD_STATS and num_downloads:
             line2 += " (" + _("downloaded") + " "
             line2 += str(num_downloads) + " " + _("times") + ")"
+
         if CFG_INSPIRE_SITE:
             CFG_COLLS = ['Book',
                          'Conference',
@@ -3956,30 +3979,42 @@ def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubd
                          'Preprint', ]
         collsd = {}
         for coll in CFG_COLLS:
-            coll_num_papers = len(intbitset(pubs) & intbitset(perform_request_search(p="collection:" + coll)))
-            if coll_num_papers:
-                collsd[coll] = coll_num_papers
+            coll_papers = list(ib_pubs & intbitset(perform_request_search(f="collection", p=coll)))
+            if coll_papers:
+                collsd[coll] = coll_papers
         colls = collsd.keys()
-        colls.sort(lambda x, y: cmp(collsd[y], collsd[x])) # sort by number of papers
+        colls.sort(lambda x, y: cmp(len(collsd[y]), len(collsd[x]))) # sort by number of papers
         for coll in colls:
-            line2 += "<br>" + create_html_link(self.build_search_url(p='exactauthor:"' + authorname + '" ' + \
-                                                                     'collection:' + coll),
-                                                   {}, coll + " (" + str(collsd[coll]) + ")",)
+            rec_query = 'exactauthor:"' + authorname + '" ' + 'collection:' + coll
+
+            if is_bibauthorid and extended_author_search_str:
+                rec_query = extended_author_search_str + ' collection:' + coll
+
+            line2 += "<br />" + create_html_link(self.build_search_url(p=rec_query),
+                                                                       {}, coll + " (" + str(len(collsd[coll])) + ")",)
+
+        if not pubs:
+            line2 = _("No Papers")
 
         papers_box = self.tmpl_print_searchresultbox(line1, line2)
 
         #make a authoraff string that looks like CERN (1), Caltech (2) etc
         authoraff = ""
         aff_pubdict_keys = aff_pubdict.keys()
         aff_pubdict_keys.sort(lambda x, y: cmp(len(aff_pubdict[y]), len(aff_pubdict[x])))
-        for a in aff_pubdict_keys:
-            recids = "+or+".join(map(str, aff_pubdict[a]))
-            print_a = a
-            if (print_a == ' '):
-                print_a = _("unknown")
-            if authoraff:
-                authoraff += '<br>'
-            authoraff += "<a href=\"../search?f=recid&p=" + recids + "\">" + print_a + ' (' + str(len(aff_pubdict[a])) + ")</a>"
+
+        if aff_pubdict_keys:
+            for a in aff_pubdict_keys:
+                print_a = a
+                if (print_a == ' '):
+                    print_a = _("unknown affiliation")
+                if authoraff:
+                    authoraff += '<br>'
+                authoraff += create_html_link(self.build_search_url(p=' or '.join(["%s" % x for x in aff_pubdict[a]]),
+                                                                       f='recid'),
+                                                                       {}, print_a + ' (' + str(len(aff_pubdict[a])) + ')',)
+        else:
+            authoraff = _("No Affiliations")
 
         line1 = "<strong>" + _("Affiliations") + "</strong>"
         line2 = authoraff
@@ -3991,15 +4026,17 @@ def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubd
             for (kw, freq) in kwtuples:
                 if keywstr:
                     keywstr += '<br>'
-                #create a link in author=x, keyword=y
-                searchstr = create_html_link(self.build_search_url(
-                                                p='exactauthor:"' + authorname + '" ' +
-                                                  'keyword:"' + kw + '"'),
-                                                {}, kw + " (" + str(freq) + ")",)
+                rec_query = 'exactauthor:"' + authorname + '" ' + 'keyword:"' + kw + '"'
+
+                if is_bibauthorid and extended_author_search_str:
+                    rec_query = extended_author_search_str + ' keyword:"' + kw + '"'
+
+                searchstr = create_html_link(self.build_search_url(p=rec_query),
+                                                                   {}, kw + " (" + str(freq) + ")",)
                 keywstr = keywstr + " " + searchstr
 
         else:
-            keywstr += 'No Keywords found'
+            keywstr += _('No Keywords')
 
 
         line1 = "<strong>" + _("Frequent keywords") + "</strong>"
@@ -4009,16 +4046,19 @@ def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubd
 
         header = "<strong>" + _("Frequent co-authors") + "</strong>"
         content = []
+        sorted_coauthors = sorted(sorted(authors.iteritems(), key=itemgetter(0)), key=itemgetter(1), reverse=True)
 
-        for name, frequency in sorted(authors.iteritems(),
-                                      key=itemgetter(1),
-                                      reverse=True):
-            lnk = create_html_link(self.build_search_url(p=name,
-                f='exactauthor'), {}, "%s (%s)" % (name, frequency),)
+        for name, frequency in sorted_coauthors:
+            rec_query = 'exactauthor:"' + authorname + '" ' + 'exactauthor:"' + name + '"'
+
+            if is_bibauthorid and extended_author_search_str:
+                rec_query = extended_author_search_str + ' exactauthor:"' + name + '"'
+
+            lnk = create_html_link(self.build_search_url(p=rec_query), {}, "%s (%s)" % (name, frequency),)
             content.append("%s" % lnk)
 
         if not content:
-            content = [_("No frequent co-authors")]
+            content = [_("No Frequent Co-authors")]
 
         coauthor_box = self.tmpl_print_searchresultbox(header, "<br />\n".join(content))
 
@@ -4044,9 +4084,18 @@ def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubd
         req.write("</td></tr></table>")
 
         # print citations:
+        rec_query = 'exactauthor:"' + authorname + '"'
+
+        if is_bibauthorid and extended_author_search_str:
+            rec_query = extended_author_search_str
+
         if len(citedbylist):
             line1 = "<strong>" + _("Citations:") + "</strong>"
             line2 = ""
+
+            if not pubs:
+                line2 = _("No Citation Information available")
+
             req.write(self.tmpl_print_searchresultbox(line1, line2))
 
         # print frequent co-authors: