Skip to content

Commit

Permalink
BibAuthorID: fix author page query links
Browse files Browse the repository at this point in the history
* Improvements for fallback solution in case:
  1) No entry in bibauthorid
  2) >1 entries in bibauthorid (future: 'did you mean' landing page)
  3) BibAuthorID not available

* URL handling improved to accept ?recid= param
  while still supporting <recid>:<name> passing.

* Fix links to point to a compound-exact-author-search
  that respects all name variants. Interim solution until
  author id index is instated.
  (closes #461)

* Fixed bad behaviour in which bibauthorid would return
  incompatible results. Now low-probability hits are excluded.

* Param escaping to protect from XSS attacks.

* Warning concerning 'this author is not on this paper' case.
  • Loading branch information
Henning Weiler authored and tiborsimko committed Feb 9, 2011
1 parent d460173 commit 3693b38
Show file tree
Hide file tree
Showing 5 changed files with 181 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ def find_personIDs_by_name_string(namestring):
# matching_pids = sorted(matching_pids, key=lambda k: k[3], reverse=True)
persons = {}
for n in matching_pids:
if n[3] >= 0.0:
if n[3] >= 0.8:
if n[0] not in persons:
persons[n[0]] = sorted([[p[1], p[2], p[3]] for p in matching_pids if p[0] == n[0]],
key=lambda k: k[2], reverse=True)
Expand Down
19 changes: 19 additions & 0 deletions modules/bibauthorid/lib/bibauthorid_webapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,25 @@ def get_person_names_from_id(person_id= -1):
return tu.get_person_names_count((person_id,))


def get_person_db_names_from_id(person_id= -1):
'''
Finds and returns the names associated with this person as stored in the
meta data of the underlying data set along with the
frequency of occurrence (i.e. the number of papers)
@param person_id: an id to find the names for
@type person_id: int
@return: name and number of occurrences of the name
@rtype: tuple of tuple
'''
# #retrieve all rows for the person
if (not person_id > -1) or (not isinstance(person_id, int)):
return []

return tu.get_person_db_names_count((person_id,))


def get_paper_status(person_id, bibref):
'''
Finds an returns the status of a bibrec to person assignment
Expand Down
9 changes: 5 additions & 4 deletions modules/bibformat/lib/elements/bfe_authors.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def format_element(bfo, limit, separator=' ; ',
affiliation_prefix=' (',
affiliation_suffix=')',
interactive="no",
highlight="no"):
highlight="no",
link_author_pages="no"):
"""
Prints the list of authors of a record.
Expand Down Expand Up @@ -69,15 +70,15 @@ def format_element(bfo, limit, separator=' ; ',
bfo.search_pattern)

if print_links.lower() == "yes":
if True: # FIXME: /author/123:Ellis is not a user-friendly default
if link_author_pages == "no":
author['a'] = '<a href="' + CFG_SITE_URL + \
'/search?f=author&amp;p=' + quote(author['a']) + \
'&amp;ln=' + bfo.lang + \
'">' + escape(author['a']) + '</a>'
else:
author['a'] = '<a href="' + CFG_SITE_URL + \
'/author/' + bibrec_id + ':' + \
quote(author['a']) + \
'/author/' + quote(author['a']) + \
'?recid=' + bibrec_id + \
'&ln=' + bfo.lang + \
'">' + escape(author['a']) + '</a>'

Expand Down
113 changes: 81 additions & 32 deletions modules/websearch/lib/websearch_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -3889,7 +3889,7 @@ def tmpl_detailed_record_citations_self_cited(self, recID, ln,
return out

def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubdict,
citedbylist, kwtuples, authors, vtuples, names_dict, admin_link, ln):
citedbylist, kwtuples, authors, vtuples, names_dict, admin_link, is_bibauthorid, ln):
"""Prints stuff about the author given as authorname.
1. Author name + his/her institutes. Each institute I has a link
to papers where the auhtor has I as institute.
Expand All @@ -3911,6 +3911,7 @@ def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubd
from invenio.search_engine import perform_request_search
from operator import itemgetter
_ = gettext_set_language(ln)
ib_pubs = intbitset(pubs)

# Prepare data for display
# construct names box
Expand All @@ -3927,19 +3928,41 @@ def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubd
content.append("%s (%s)" % (name, name_lnk))

if not content:
content = [_("No name variants found")]
content = [_("No Name Variants")]

names_box = self.tmpl_print_searchresultbox(header, "<br />\n".join(content))
# construct an extended search as an interim solution for author id
# searches. Will build "(exactauthor:v1 OR exactauthor:v2)" strings
extended_author_search_str = ""

if is_bibauthorid:
if len(names_dict.keys()) > 1:
extended_author_search_str = '('

for name_index, name_query in enumerate(names_dict.keys()):
if name_index > 0:
extended_author_search_str += " OR "

extended_author_search_str += 'exactauthor:"' + name_query + '"'

if len(names_dict.keys()) > 1:
extended_author_search_str += ')'

# construct papers box
searchstr = create_html_link(self.build_search_url(p=authorname,
f='exactauthor'),
rec_query = 'exactauthor:"' + authorname + '"'

if is_bibauthorid and extended_author_search_str:
rec_query = extended_author_search_str

searchstr = create_html_link(self.build_search_url(p=rec_query),
{}, "All papers (" + str(len(pubs)) + ")",)
line1 = "<strong>" + _("Records") + "</strong>"
line1 = "<strong>" + _("Papers") + "</strong>"
line2 = searchstr

if CFG_BIBRANK_SHOW_DOWNLOAD_STATS and num_downloads:
line2 += " (" + _("downloaded") + " "
line2 += str(num_downloads) + " " + _("times") + ")"

if CFG_INSPIRE_SITE:
CFG_COLLS = ['Book',
'Conference',
Expand All @@ -3956,30 +3979,42 @@ def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubd
'Preprint', ]
collsd = {}
for coll in CFG_COLLS:
coll_num_papers = len(intbitset(pubs) & intbitset(perform_request_search(p="collection:" + coll)))
if coll_num_papers:
collsd[coll] = coll_num_papers
coll_papers = list(ib_pubs & intbitset(perform_request_search(f="collection", p=coll)))
if coll_papers:
collsd[coll] = coll_papers
colls = collsd.keys()
colls.sort(lambda x, y: cmp(collsd[y], collsd[x])) # sort by number of papers
colls.sort(lambda x, y: cmp(len(collsd[y]), len(collsd[x]))) # sort by number of papers
for coll in colls:
line2 += "<br>" + create_html_link(self.build_search_url(p='exactauthor:"' + authorname + '" ' + \
'collection:' + coll),
{}, coll + " (" + str(collsd[coll]) + ")",)
rec_query = 'exactauthor:"' + authorname + '" ' + 'collection:' + coll

if is_bibauthorid and extended_author_search_str:
rec_query = extended_author_search_str + ' collection:' + coll

line2 += "<br />" + create_html_link(self.build_search_url(p=rec_query),
{}, coll + " (" + str(len(collsd[coll])) + ")",)

if not pubs:
line2 = _("No Papers")

papers_box = self.tmpl_print_searchresultbox(line1, line2)

#make a authoraff string that looks like CERN (1), Caltech (2) etc
authoraff = ""
aff_pubdict_keys = aff_pubdict.keys()
aff_pubdict_keys.sort(lambda x, y: cmp(len(aff_pubdict[y]), len(aff_pubdict[x])))
for a in aff_pubdict_keys:
recids = "+or+".join(map(str, aff_pubdict[a]))
print_a = a
if (print_a == ' '):
print_a = _("unknown")
if authoraff:
authoraff += '<br>'
authoraff += "<a href=\"../search?f=recid&p=" + recids + "\">" + print_a + ' (' + str(len(aff_pubdict[a])) + ")</a>"

if aff_pubdict_keys:
for a in aff_pubdict_keys:
print_a = a
if (print_a == ' '):
print_a = _("unknown affiliation")
if authoraff:
authoraff += '<br>'
authoraff += create_html_link(self.build_search_url(p=' or '.join(["%s" % x for x in aff_pubdict[a]]),
f='recid'),
{}, print_a + ' (' + str(len(aff_pubdict[a])) + ')',)
else:
authoraff = _("No Affiliations")

line1 = "<strong>" + _("Affiliations") + "</strong>"
line2 = authoraff
Expand All @@ -3991,15 +4026,17 @@ def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubd
for (kw, freq) in kwtuples:
if keywstr:
keywstr += '<br>'
#create a link in author=x, keyword=y
searchstr = create_html_link(self.build_search_url(
p='exactauthor:"' + authorname + '" ' +
'keyword:"' + kw + '"'),
{}, kw + " (" + str(freq) + ")",)
rec_query = 'exactauthor:"' + authorname + '" ' + 'keyword:"' + kw + '"'

if is_bibauthorid and extended_author_search_str:
rec_query = extended_author_search_str + ' keyword:"' + kw + '"'

searchstr = create_html_link(self.build_search_url(p=rec_query),
{}, kw + " (" + str(freq) + ")",)
keywstr = keywstr + " " + searchstr

else:
keywstr += 'No Keywords found'
keywstr += _('No Keywords')


line1 = "<strong>" + _("Frequent keywords") + "</strong>"
Expand All @@ -4009,16 +4046,19 @@ def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubd

header = "<strong>" + _("Frequent co-authors") + "</strong>"
content = []
sorted_coauthors = sorted(sorted(authors.iteritems(), key=itemgetter(0)), key=itemgetter(1), reverse=True)

for name, frequency in sorted(authors.iteritems(),
key=itemgetter(1),
reverse=True):
lnk = create_html_link(self.build_search_url(p=name,
f='exactauthor'), {}, "%s (%s)" % (name, frequency),)
for name, frequency in sorted_coauthors:
rec_query = 'exactauthor:"' + authorname + '" ' + 'exactauthor:"' + name + '"'

if is_bibauthorid and extended_author_search_str:
rec_query = extended_author_search_str + ' exactauthor:"' + name + '"'

lnk = create_html_link(self.build_search_url(p=rec_query), {}, "%s (%s)" % (name, frequency),)
content.append("%s" % lnk)

if not content:
content = [_("No frequent co-authors")]
content = [_("No Frequent Co-authors")]

coauthor_box = self.tmpl_print_searchresultbox(header, "<br />\n".join(content))

Expand All @@ -4044,9 +4084,18 @@ def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubd
req.write("</td></tr></table>")

# print citations:
rec_query = 'exactauthor:"' + authorname + '"'

if is_bibauthorid and extended_author_search_str:
rec_query = extended_author_search_str

if len(citedbylist):
line1 = "<strong>" + _("Citations:") + "</strong>"
line2 = ""

if not pubs:
line2 = _("No Citation Information available")

req.write(self.tmpl_print_searchresultbox(line1, line2))

# print frequent co-authors:
Expand Down
Loading

0 comments on commit 3693b38

Please sign in to comment.