Skip to content

Commit

Permalink
BibMatch: using logical fields in querystrings
Browse files Browse the repository at this point in the history
* Added the functionality to allow use of predefined logical fields,
  like 'author' and 'title', in querystrings. I.e -q 'title||author' will
  match with the logical fields instead of physical MARC tags in mind.
  (closes #202)

* Cleaned up and removed some unused variables.

* Updated documentation and added regression test.
  • Loading branch information
jalavik authored and tiborsimko committed Sep 20, 2010
1 parent 2410257 commit d9b05ea
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 14 deletions.
12 changes: 10 additions & 2 deletions modules/bibmatch/doc/admin/bibmatch-admin-guide.webdoc
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@ To print potential duplicate entries before manual upload, run:
</pre>
</blockquote>

To print undecided result from a match on multiple fields:
To print undecided result from a match on multiple fields, including predefined fields (title, author etc.):

<blockquote>
<pre>
$ bibmatch --print-ambiguous --query-string=\"245__a||100__a\" < input.xml > output.xml
$ bibmatch --print-ambiguous --query-string=\"245__a||author\" < input.xml > output.xml
</pre>
</blockquote>

Expand Down Expand Up @@ -146,5 +146,13 @@ Command line options:
-a, --alter-recid The recid (controlfield 001) of matched or fuzzy matched records in
output will be replaced by the 001 value of the matched record.
Useful to prepare files to then be used with BibUpload.

Common predefined fields used in querystrings: (for Invenio demo site, your fields may vary!)

'abstract', 'affiliation', 'anyfield', 'author', 'coden', 'collaboration',
'collection', 'datecreated', 'datemodified', 'division', 'exactauthor',
'experiment', 'fulltext', 'isbn', 'issn', 'journal', 'keyword', 'recid',
'reference', 'reportnumber', 'subject', 'title', 'year'

</pre>
</blockquote>
25 changes: 13 additions & 12 deletions modules/bibmatch/lib/bibmatch_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,12 @@ def usage():
output will be replaced by the 001 value of the matched record.
Useful to prepare files to then be used with BibUpload.
Predefined fields:
Common predefined fields used in querystrings: (for Invenio demo site, your fields may vary!)
The following predefined field names can be used:
"title"
"author"
'abstract', 'affiliation', 'anyfield', 'author', 'coden', 'collaboration',
'collection', 'datecreated', 'datemodified', 'division', 'exactauthor',
'experiment', 'fulltext', 'isbn', 'issn', 'journal', 'keyword', 'recid',
'reference', 'reportnumber', 'subject', 'title', 'year'
Examples:
Expand Down Expand Up @@ -144,7 +145,6 @@ def __init__(self, mode="1"):
self.format.append([])
self.format.append([])
self.format.append([])
self.advanced = 0
return

def from_qrystr(self, qrystr="", search_mode="eee", operator="aa"):
Expand Down Expand Up @@ -204,7 +204,6 @@ def default(self):
self.format.append([])
self.format.append([])
self.format.append([])
self.advanced = 1
return

def change_search_mode(self, mode="a"):
Expand Down Expand Up @@ -367,6 +366,11 @@ def match_records(records, qrystrs=None, perform_request_search_mode="eee", \

### get appropriate fields from database
for field in querystring.field:
tags = get_field_tags(field)
if len(tags) > 0:
# Fetch value from input record of first tag only
# FIXME: Extracting more then first tag, evaluating each
field = tags[0]
### use expanded tags
tag = field[0:3]
ind1 = field[3:4]
Expand Down Expand Up @@ -416,7 +420,6 @@ def match_records(records, qrystrs=None, perform_request_search_mode="eee", \
p3 = inst[2]
f3 = querystring.field[2]
m3 = querystring.mode[2]
aas = querystring.advanced

#1st run the basic perform_req_search
recID_list = server.search(
Expand Down Expand Up @@ -570,8 +573,7 @@ def main():
batch_output = "" #print stuff in files
f_input = "" #read from where, if param "i"
server_url = CFG_SITE_URL #url to server performing search, local by default
modify = 0 #alter output with matched record indentifiers
predefined_fields = ["title", "author"]
modify = 0 #alter output with matched record identifiers
textmarc_output = 0

for opt, opt_value in opts:
Expand Down Expand Up @@ -612,9 +614,8 @@ def main():
if opt in ["-a", "--alter-recid"]:
modify = 1
if opt in ["-f", "--field"]:
alternate_querystring = []
if opt_value in predefined_fields:
alternate_querystring = get_field_tags(opt_value)
alternate_querystring = get_field_tags(opt_value)
if len(alternate_querystring) > 0:
for item in alternate_querystring:
qrystrs.append(item)
else:
Expand Down
7 changes: 7 additions & 0 deletions modules/bibmatch/lib/bibmatch_regression_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,13 @@ def test_check_altered(self):
[dummy1, matchedrecs, dummy3, dummy4] = match_records(records, modify=1)
self.assertTrue(record_has_field(matchedrecs[0][0], '001'))

def test_check_qrystr(self):
"""bibmatch - check querystrings"""
qrystrs = ["author||reportnumber"]
records = create_records(self.recxml1)
[dummy1, matchedrecs, dummy3, dummy4] = match_records(records, qrystrs=qrystrs)
self.assertEqual(1,len(matchedrecs))

TEST_SUITE = make_test_suite(BibMatchTest)

if __name__ == "__main__":
Expand Down

0 comments on commit d9b05ea

Please sign in to comment.