Skip to content

Commit

Permalink
WebSearch: fix structured regexp query parsing
Browse files Browse the repository at this point in the history
* Fix parsing of some structured regexp queries of the form
  `field:/pattern/` that was not working properly when field
  was either a MARC tag or a refersto/citedby operator.
  (closes #470)
  • Loading branch information
tiborsimko committed Jan 31, 2011
1 parent 106ba5f commit 5edccee
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 9 deletions.
10 changes: 5 additions & 5 deletions modules/websearch/lib/search_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -691,15 +691,15 @@ def create_basic_search_units(req, p, f, m=None, of='hb'):
opfts.append([oi, "%" + pi + "%", fi, 'a'])
else: # unbalanced quotes, so fall back to WRD query:
opfts.append([oi, pi, fi, 'w'])
elif pi.startswith('/') and pi.endswith('/'):
# B3b - pi has slashes around => do regexp search
opfts.append([oi, pi[1:-1], fi, 'r'])
elif fi and str(fi[0]).isdigit() and str(fi[0]).isdigit():
# B3b - fi exists and starts by two digits => do ACC search
# B3c - fi exists and starts by two digits => do ACC search
opfts.append([oi, pi, fi, 'a'])
elif fi and not get_index_id_from_field(fi) and get_field_name(fi):
# B3c - logical field fi exists but there is no WRD index for fi => try ACC search
# B3d - logical field fi exists but there is no WRD index for fi => try ACC search
opfts.append([oi, pi, fi, 'a'])
elif pi.startswith('/') and pi.endswith('/'):
# B3d - pi has slashes around => do regexp search
opfts.append([oi, pi[1:-1], fi, 'r'])
else:
# B3e - general case => do WRD search
pi = strip_accents(pi) # strip accents for 'w' mode, FIXME: delete when not needed
Expand Down
10 changes: 10 additions & 0 deletions modules/websearch/lib/search_engine_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,16 @@ def test_parsing_structured_regexp_query(self):
self._check("title:/(one|two)/", '', None,
[['+', '(one|two)', 'title', 'r']])

def test_parsing_structured_regexp_marc_query(self):
"search engine - parsing structured regexp MARC query"
self._check("245__a:/(one|two)/", '', None,
[['+', '(one|two)', '245__a', 'r']])

def test_parsing_structured_regexp_refersto_query(self):
"search engine - parsing structured regexp refersto query"
self._check("refersto:/(one|two)/", '', None,
[['+', '(one|two)', 'refersto', 'r']])

def test_parsing_combined_structured_query_in_a_field(self):
"search engine - parsing structured query in a field"
self._check("title:muon author:ellis", 'abstract', None,
Expand Down
5 changes: 1 addition & 4 deletions modules/websearch/lib/websearch_regression_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1295,12 +1295,9 @@ def test_many_marc_tags_partial_phrase_query(self):

def test_single_marc_tag_regexp_query(self):
"""websearch - single MARC tag, regexp query"""
# NOTE: regexp queries for physical MARC tags (e.g. 245:/and/)
# are not treated by the search engine by purpose. But maybe
# we should support them?!
self.assertEqual([],
test_web_page_content(CFG_SITE_URL + '/search?of=id&p=245%3A%2Fand%2F',
expected_text="[]"))
expected_text="[1, 8, 9, 14, 15, 20, 22, 24, 28, 33, 47, 48, 49, 51, 53, 64, 69, 71, 79, 82, 83, 85, 91, 96]"))

class WebSearchExtSysnoQueryTest(unittest.TestCase):
"""Test of queries using external system numbers."""
Expand Down

0 comments on commit 5edccee

Please sign in to comment.