diff --git a/modules/bibauthorid/lib/bibauthorid_personid_tables_utils.py b/modules/bibauthorid/lib/bibauthorid_personid_tables_utils.py index c26634f27..efea88746 100644 --- a/modules/bibauthorid/lib/bibauthorid_personid_tables_utils.py +++ b/modules/bibauthorid/lib/bibauthorid_personid_tables_utils.py @@ -478,7 +478,7 @@ def find_personIDs_by_name_string(namestring): # matching_pids = sorted(matching_pids, key=lambda k: k[3], reverse=True) persons = {} for n in matching_pids: - if n[3] >= 0.0: + if n[3] >= 0.8: if n[0] not in persons: persons[n[0]] = sorted([[p[1], p[2], p[3]] for p in matching_pids if p[0] == n[0]], key=lambda k: k[2], reverse=True) diff --git a/modules/bibauthorid/lib/bibauthorid_webapi.py b/modules/bibauthorid/lib/bibauthorid_webapi.py index 303ecedb3..66cba635b 100644 --- a/modules/bibauthorid/lib/bibauthorid_webapi.py +++ b/modules/bibauthorid/lib/bibauthorid_webapi.py @@ -172,6 +172,25 @@ def get_person_names_from_id(person_id= -1): return tu.get_person_names_count((person_id,)) +def get_person_db_names_from_id(person_id= -1): + ''' + Finds and returns the names associated with this person as stored in the + meta data of the underlying data set along with the + frequency of occurrence (i.e. the number of papers) + + @param person_id: an id to find the names for + @type person_id: int + + @return: name and number of occurrences of the name + @rtype: tuple of tuple + ''' +# #retrieve all rows for the person + if (not person_id > -1) or (not isinstance(person_id, int)): + return [] + + return tu.get_person_db_names_count((person_id,)) + + def get_paper_status(person_id, bibref): ''' Finds an returns the status of a bibrec to person assignment diff --git a/modules/bibformat/lib/elements/bfe_authors.py b/modules/bibformat/lib/elements/bfe_authors.py index d099d73df..2a2cc8711 100644 --- a/modules/bibformat/lib/elements/bfe_authors.py +++ b/modules/bibformat/lib/elements/bfe_authors.py @@ -27,7 +27,8 @@ def format_element(bfo, limit, separator=' ; ', affiliation_prefix=' (', affiliation_suffix=')', interactive="no", - highlight="no"): + highlight="no", + link_author_pages="no"): """ Prints the list of authors of a record. @@ -69,15 +70,15 @@ def format_element(bfo, limit, separator=' ; ', bfo.search_pattern) if print_links.lower() == "yes": - if True: # FIXME: /author/123:Ellis is not a user-friendly default + if link_author_pages == "no": author['a'] = '' + escape(author['a']) + '' else: author['a'] = '' + escape(author['a']) + '' diff --git a/modules/websearch/lib/websearch_templates.py b/modules/websearch/lib/websearch_templates.py index aeca59ac0..1547d04c2 100644 --- a/modules/websearch/lib/websearch_templates.py +++ b/modules/websearch/lib/websearch_templates.py @@ -3889,7 +3889,7 @@ def tmpl_detailed_record_citations_self_cited(self, recID, ln, return out def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubdict, - citedbylist, kwtuples, authors, vtuples, names_dict, admin_link, ln): + citedbylist, kwtuples, authors, vtuples, names_dict, admin_link, is_bibauthorid, ln): """Prints stuff about the author given as authorname. 1. Author name + his/her institutes. Each institute I has a link to papers where the auhtor has I as institute. @@ -3911,6 +3911,7 @@ def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubd from invenio.search_engine import perform_request_search from operator import itemgetter _ = gettext_set_language(ln) + ib_pubs = intbitset(pubs) # Prepare data for display # construct names box @@ -3927,19 +3928,41 @@ def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubd content.append("%s (%s)" % (name, name_lnk)) if not content: - content = [_("No name variants found")] + content = [_("No Name Variants")] names_box = self.tmpl_print_searchresultbox(header, "
\n".join(content)) + # construct an extended search as an interim solution for author id + # searches. Will build "(exactauthor:v1 OR exactauthor:v2)" strings + extended_author_search_str = "" + + if is_bibauthorid: + if len(names_dict.keys()) > 1: + extended_author_search_str = '(' + + for name_index, name_query in enumerate(names_dict.keys()): + if name_index > 0: + extended_author_search_str += " OR " + + extended_author_search_str += 'exactauthor:"' + name_query + '"' + + if len(names_dict.keys()) > 1: + extended_author_search_str += ')' # construct papers box - searchstr = create_html_link(self.build_search_url(p=authorname, - f='exactauthor'), + rec_query = 'exactauthor:"' + authorname + '"' + + if is_bibauthorid and extended_author_search_str: + rec_query = extended_author_search_str + + searchstr = create_html_link(self.build_search_url(p=rec_query), {}, "All papers (" + str(len(pubs)) + ")",) - line1 = "" + _("Records") + "" + line1 = "" + _("Papers") + "" line2 = searchstr + if CFG_BIBRANK_SHOW_DOWNLOAD_STATS and num_downloads: line2 += " (" + _("downloaded") + " " line2 += str(num_downloads) + " " + _("times") + ")" + if CFG_INSPIRE_SITE: CFG_COLLS = ['Book', 'Conference', @@ -3956,15 +3979,22 @@ def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubd 'Preprint', ] collsd = {} for coll in CFG_COLLS: - coll_num_papers = len(intbitset(pubs) & intbitset(perform_request_search(p="collection:" + coll))) - if coll_num_papers: - collsd[coll] = coll_num_papers + coll_papers = list(ib_pubs & intbitset(perform_request_search(f="collection", p=coll))) + if coll_papers: + collsd[coll] = coll_papers colls = collsd.keys() - colls.sort(lambda x, y: cmp(collsd[y], collsd[x])) # sort by number of papers + colls.sort(lambda x, y: cmp(len(collsd[y]), len(collsd[x]))) # sort by number of papers for coll in colls: - line2 += "
" + create_html_link(self.build_search_url(p='exactauthor:"' + authorname + '" ' + \ - 'collection:' + coll), - {}, coll + " (" + str(collsd[coll]) + ")",) + rec_query = 'exactauthor:"' + authorname + '" ' + 'collection:' + coll + + if is_bibauthorid and extended_author_search_str: + rec_query = extended_author_search_str + ' collection:' + coll + + line2 += "
" + create_html_link(self.build_search_url(p=rec_query), + {}, coll + " (" + str(len(collsd[coll])) + ")",) + + if not pubs: + line2 = _("No Papers") papers_box = self.tmpl_print_searchresultbox(line1, line2) @@ -3972,14 +4002,19 @@ def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubd authoraff = "" aff_pubdict_keys = aff_pubdict.keys() aff_pubdict_keys.sort(lambda x, y: cmp(len(aff_pubdict[y]), len(aff_pubdict[x]))) - for a in aff_pubdict_keys: - recids = "+or+".join(map(str, aff_pubdict[a])) - print_a = a - if (print_a == ' '): - print_a = _("unknown") - if authoraff: - authoraff += '
' - authoraff += "" + print_a + ' (' + str(len(aff_pubdict[a])) + ")" + + if aff_pubdict_keys: + for a in aff_pubdict_keys: + print_a = a + if (print_a == ' '): + print_a = _("unknown affiliation") + if authoraff: + authoraff += '
' + authoraff += create_html_link(self.build_search_url(p=' or '.join(["%s" % x for x in aff_pubdict[a]]), + f='recid'), + {}, print_a + ' (' + str(len(aff_pubdict[a])) + ')',) + else: + authoraff = _("No Affiliations") line1 = "" + _("Affiliations") + "" line2 = authoraff @@ -3991,15 +4026,17 @@ def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubd for (kw, freq) in kwtuples: if keywstr: keywstr += '
' - #create a link in author=x, keyword=y - searchstr = create_html_link(self.build_search_url( - p='exactauthor:"' + authorname + '" ' + - 'keyword:"' + kw + '"'), - {}, kw + " (" + str(freq) + ")",) + rec_query = 'exactauthor:"' + authorname + '" ' + 'keyword:"' + kw + '"' + + if is_bibauthorid and extended_author_search_str: + rec_query = extended_author_search_str + ' keyword:"' + kw + '"' + + searchstr = create_html_link(self.build_search_url(p=rec_query), + {}, kw + " (" + str(freq) + ")",) keywstr = keywstr + " " + searchstr else: - keywstr += 'No Keywords found' + keywstr += _('No Keywords') line1 = "" + _("Frequent keywords") + "" @@ -4009,16 +4046,19 @@ def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubd header = "" + _("Frequent co-authors") + "" content = [] + sorted_coauthors = sorted(sorted(authors.iteritems(), key=itemgetter(0)), key=itemgetter(1), reverse=True) - for name, frequency in sorted(authors.iteritems(), - key=itemgetter(1), - reverse=True): - lnk = create_html_link(self.build_search_url(p=name, - f='exactauthor'), {}, "%s (%s)" % (name, frequency),) + for name, frequency in sorted_coauthors: + rec_query = 'exactauthor:"' + authorname + '" ' + 'exactauthor:"' + name + '"' + + if is_bibauthorid and extended_author_search_str: + rec_query = extended_author_search_str + ' exactauthor:"' + name + '"' + + lnk = create_html_link(self.build_search_url(p=rec_query), {}, "%s (%s)" % (name, frequency),) content.append("%s" % lnk) if not content: - content = [_("No frequent co-authors")] + content = [_("No Frequent Co-authors")] coauthor_box = self.tmpl_print_searchresultbox(header, "
\n".join(content)) @@ -4044,9 +4084,18 @@ def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubd req.write("") # print citations: + rec_query = 'exactauthor:"' + authorname + '"' + + if is_bibauthorid and extended_author_search_str: + rec_query = extended_author_search_str + if len(citedbylist): line1 = "" + _("Citations:") + "" line2 = "" + + if not pubs: + line2 = _("No Citation Information available") + req.write(self.tmpl_print_searchresultbox(line1, line2)) # print frequent co-authors: diff --git a/modules/websearch/lib/websearch_webinterface.py b/modules/websearch/lib/websearch_webinterface.py index 6b213b85a..f995244c5 100644 --- a/modules/websearch/lib/websearch_webinterface.py +++ b/modules/websearch/lib/websearch_webinterface.py @@ -229,7 +229,7 @@ class WebInterfaceAuthorPages(WebInterfaceDirectory): def __init__(self, pageparam=''): """Constructor.""" - self.pageparam = pageparam.replace("+", " ") + self.pageparam = cgi.escape(pageparam.replace("+", " ")) self.personid = -1 self.authorname = " " @@ -240,25 +240,32 @@ def _lookup(self, component, path): def __call__(self, req, form): """Serve the page in the given language.""" - is_bibauthorid = True + is_bibauthorid = False try: from invenio.bibauthorid_webapi import search_person_ids_by_name from invenio.bibauthorid_webapi import get_papers_by_person_id from invenio.bibauthorid_webapi import get_person_names_from_id + from invenio.bibauthorid_webapi import get_person_db_names_from_id from invenio.bibauthorid_utils import create_normalized_name from invenio.bibauthorid_utils import split_name_parts from invenio.bibauthorid_config import CLAIMPAPER_CLAIM_OTHERS_PAPERS from invenio.access_control_admin import acc_find_user_role_actions + is_bibauthorid = True except (ImportError): is_bibauthorid = False from operator import itemgetter - argd = wash_urlargd(form, {'ln': (str, CFG_SITE_LANG), 'verbose': (int, 0) }) + argd = wash_urlargd(form, + {'ln': (str, CFG_SITE_LANG), + 'verbose': (int, 0), + 'recid': (int, -1) + }) ln = argd['ln'] verbose = argd['verbose'] req.argd = argd #needed since perform_req_search + param_recid = argd['recid'] # start page req.content_type = "text/html" @@ -270,6 +277,8 @@ def __call__(self, req, form): recid = None nquery = "" names_dict = {} + db_names_dict = {} + _ = gettext_set_language(ln) #let's see what takes time.. time1 = time.time() @@ -282,9 +291,13 @@ def __call__(self, req, form): except (ValueError, TypeError): self.personid = -1 - # Well, it's not a person id, maybe a bibrec:name or name:bibrec pair? - if self.personid < 1 and is_bibauthorid: - if self.pageparam.count(":"): + if self.personid < 0 and is_bibauthorid: + if param_recid > -1: + # Well, it's not a person id, did we get a record ID? + recid = param_recid + nquery = self.pageparam + elif self.pageparam.count(":"): + # No recid passed, maybe name is recid:name or name:recid pair? left, right = self.pageparam.split(":") try: @@ -298,6 +311,7 @@ def __call__(self, req, form): recid = None nquery = self.pageparam else: + # No recid could be determined. Work with name only nquery = self.pageparam sorted_results = search_person_ids_by_name(nquery) @@ -317,25 +331,28 @@ def __call__(self, req, form): search_results = authors - if search_results: + if len(search_results) == 1: self.personid = search_results[0][0] + #@todo: Show selection of possible Person entities if len > 1 - if self.personid < 1 or not is_bibauthorid: + if self.personid < 0 or not is_bibauthorid: # Well, no person. Fall back to the exact author name search then. self.authorname = self.pageparam if not self.authorname: return websearch_templates.tmpl_author_information(req, {}, self.authorname, - 0, {}, {}, - {}, {}, {}, {}, ln) + 0, {}, {}, {}, + {}, {}, {}, {}, False, ln) #search the publications by this author pubs = perform_request_search(req=req, p=self.authorname, f="exactauthor") names_dict[self.authorname] = len(pubs) + db_names_dict[self.authorname] = len(pubs) - elif is_bibauthorid: + elif is_bibauthorid and self.personid > -1: #yay! Person found! find only papers not disapproved by humans + req.write("") full_pubs = get_papers_by_person_id(self.personid, -1) pubs = [int(row[0]) for row in full_pubs] longest_name = "" @@ -352,16 +369,34 @@ def __call__(self, req, form): if len(norm_name) > len(longest_name): longest_name = norm_name + for aname, acount in get_person_db_names_from_id(self.personid): + aname = aname.replace('"','').strip() + db_names_dict[aname] = acount + self.authorname = longest_name + if not pubs and param_recid > -1: + req.write("

") + req.write(_("We're sorry. The requested author \"%s\" seems not to be listed on the specified paper." + % (self.pageparam,))) + req.write("
") + req.write(_("Please try the following link to start a broader search on the author: ")) + req.write('%s' + % (CFG_SITE_URL, self.pageparam, self.pageparam)) + req.write("

") + + return page_end(req, 'hb', ln) + #get most frequent authors of these pubs popular_author_tuples = get_most_popular_field_values(pubs, (AUTHOR_TAG, COAUTHOR_TAG)) coauthors = {} for (coauthor, frequency) in popular_author_tuples: - if len(authors) < MAX_COLLAB_LIST: - if coauthor not in names_dict: - coauthors[coauthor] = frequency + if coauthor not in db_names_dict: + coauthors[coauthor] = frequency + + if len(coauthors) > MAX_COLLAB_LIST: + break time1 = time.time() if verbose == 9: @@ -408,7 +443,7 @@ def __call__(self, req, form): req.write("
misc: " + str(time2 - time1) + "
") #a dict. keys: affiliations, values: lists of publications - author_aff_pubs = self.get_institute_pub_dict(pubs, names_dict.keys()) + author_aff_pubs = self.get_institute_pub_dict(pubs, db_names_dict.keys()) time1 = time.time() if verbose == 9: @@ -443,17 +478,38 @@ def __call__(self, req, form): author_aff_pubs, citedbylist, kwtuples, coauthors, vtuples, - names_dict, admin_link, ln) + db_names_dict, admin_link, + is_bibauthorid, ln) time1 = time.time() #cited-by summary - out = summarize_records(intbitset(pubs), 'hcs', ln, req=req) + rec_query = 'exactauthor:"' + self.authorname + '"' + + extended_author_search_str = "" + + if is_bibauthorid: + if len(db_names_dict.keys()) > 1: + extended_author_search_str = '(' + + for name_index, name_query in enumerate(db_names_dict.keys()): + if name_index > 0: + extended_author_search_str += " OR " + + extended_author_search_str += 'exactauthor:"' + name_query + '"' + + if len(db_names_dict.keys()) > 1: + extended_author_search_str += ')' + + if is_bibauthorid and extended_author_search_str: + rec_query = extended_author_search_str + + + if pubs: + req.write(summarize_records(intbitset(pubs), 'hcs', ln, rec_query, req=req)) time2 = time.time() if verbose == 9: req.write("
summarizer: " + str(time2 - time1) + "
") - req.write(out) - # simauthbox = create_similarly_named_authors_link_box(self.authorname) # req.write(simauthbox) if verbose == 9: