Skip to content

Commit

Permalink
Merge pull request #62 from izihawa/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
ppodolsky authored Jul 7, 2022
2 parents 07a568c + ab57bd4 commit 106856f
Show file tree
Hide file tree
Showing 16 changed files with 278 additions and 68 deletions.
4 changes: 2 additions & 2 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ rust_library(
),
edition = "2021",
deps = all_crate_deps() + [":proto_grpc_rust"],
version = "0.8.5",
version = "0.8.7",
)

rust_binary(
Expand Down Expand Up @@ -62,4 +62,4 @@ container_push(
registry = "index.docker.io",
repository = "izihawa/summa-server",
tag = "latest",
)
)
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
edition = "2021"
name = "summa"
version = "0.8.5"
version = "0.8.7"
license-file = "LICENSE"
description = "Fast full-text search server"
homepage = "https://github.com/izihawa/summa"
Expand Down
10 changes: 6 additions & 4 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ http_archive(
)
http_archive(
name = "rules_python",
sha256 = "5fa3c738d33acca3b97622a13a741129f67ef43f5fdfcec63b29374cc0574c29",
strip_prefix = "rules_python-0.9.0",
urls = ["https://github.com/bazelbuild/rules_python/archive/0.9.0.tar.gz"],
sha256 = "95525d542c925bc2f4a7ac9b68449fc96ca52cfba15aa883f7193cdf745c38ff",
strip_prefix = "rules_python-cccbfb920c8b100744c53c0c03900f1be4040fe8",
url = "https://github.com/ppodolsky/rules_python/archive/cccbfb920c8b100744c53c0c03900f1be4040fe8.tar.gz",
)

# GRPC
Expand All @@ -53,7 +53,9 @@ grpc_extra_deps()
# Rust
load("@rules_rust//rust:repositories.bzl", "rust_register_toolchains", "rules_rust_dependencies")
rules_rust_dependencies()
rust_register_toolchains()
rust_register_toolchains(
version="1.62.0",
)
load("@rules_rust//crate_universe:repositories.bzl", "crate_universe_dependencies")
crate_universe_dependencies(bootstrap = True)
load("@rules_rust//crate_universe:defs.bzl", "crate", "crates_repository", "render_config")
Expand Down
2 changes: 1 addition & 1 deletion aiosumma/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ py_wheel(
"summa/proto/summa_py_pb",
"aiosumma",
],
version = "2.3.15",
version = "2.3.23",
deps = [
":aiosumma",
":data",
Expand Down
18 changes: 9 additions & 9 deletions aiosumma/aiosumma/parser/elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def to_summa_query(self):
elif isinstance(self.expr, Regex):
return {'regex': {'field': self.name, 'value': self.expr.value}}
elif isinstance(self.expr, Proximity):
return {'phrase': {'field': self.name, 'value': self.expr.term, 'slop': self.expr.slop}}
return {'phrase': {'field': self.name, 'value': self.expr.term.value, 'slop': self.expr.slop}}
else:
raise UnsupportedQueryError(error=f'{self.expr} in search field `{self.name}`')

Expand Down Expand Up @@ -253,10 +253,10 @@ def __str__(self):
class BaseApprox(Item):
"""Base for approximations, that is fuzziness and proximity
"""
_equality_attrs = ['term', 'degree']
_equality_attrs = ['term', 'slop']

def __repr__(self): # pragma: no cover
return "%s(%s, %s)" % (self.__class__.__name__, self.term.__repr__(), self.degree)
return "%s(%s, %s)" % (self.__class__.__name__, self.term.__repr__(), self.slop)

@property
def children(self):
Expand All @@ -266,17 +266,17 @@ def children(self):
class Fuzzy(BaseApprox):
"""Fuzzy search on word
:param Word term: the approximated term
:param degree: the degree which will be converted to :py:class:`decimal.Decimal`.
:param slop: the degree which will be converted to :py:class:`decimal.Decimal`.
"""
def __init__(self, term, degree=None):
def __init__(self, term, slop=None):
super().__init__()
self.term = term
if degree is None:
degree = 0.5
self.degree = Decimal(degree).normalize()
if slop is None:
slop = 0.5
self.slop = Decimal(slop).normalize()

def __str__(self):
return "%s~%s" % (self.term, self.degree)
return "%s~%s" % (self.term, self.slop)


class Proximity(BaseApprox):
Expand Down
4 changes: 2 additions & 2 deletions aiosumma/aiosumma/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@

# r'(?P<phrase>"(?:[^\\"]|\\"|\\[^"])*")' # this is quite complicated to handle \"
# modifiers after term or phrase
APPROX_RE = r'~(?P<degree>[0-9.]+)?'
APPROX_RE = r'~(?P<slop>[0-9.]+)?'
BOOST_RE = r'\^(?P<score>[0-9.]+)?'

# regex
Expand Down Expand Up @@ -199,7 +199,7 @@ def t_REGEX(t):
@lex.TOKEN(APPROX_RE)
def t_APPROX(t):
m = re.match(APPROX_RE, t.value)
t.value = m.group("degree")
t.value = m.group("slop")
return t


Expand Down
30 changes: 15 additions & 15 deletions aiosumma/aiosumma/tests/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,51 +85,51 @@ def test_production_chain():
'query': {'match': {'value': 'claudio'}}},
{'occur': 'should',
'query': {'boost': {'query': {'match': {'value': 'claudios'}},
'score': '0.85'}}},
'score': '0.65'}}},
{'occur': 'should',
'query': {'match': {'value': 'claude'}}},
{'occur': 'should',
'query': {'boost': {'query': {'match': {'value': 'clauded'}},
'score': '0.85'}}},
'score': '0.65'}}},
{'occur': 'should',
'query': {'boost': {'query': {'match': {'value': 'claudes'}},
'score': '0.85'}}},
'score': '0.65'}}},
{'occur': 'should',
'query': {'boost': {'query': {'match': {'value': 'clauding'}},
'score': '0.85'}}},
'score': '0.65'}}},
{'occur': 'should',
'query': {'match': {'value': 'rugarli'}}},
{'occur': 'should',
'query': {'boost': {'query': {'match': {'value': 'rugarlis'}},
'score': '0.85'}}}]},
'score': '0.65'}}}]},
}
processed_query = query_processor.process('+(search engine) -car', 'en')
assert processed_query.to_summa_query() == {
'boolean': {'subqueries': [
{'occur': 'must', 'query': {'match': {'value': 'search'}}},
{'occur': 'must', 'query': {'boost': {'query': {'match': {'value': 'searches'}}, 'score': '0.85'}}},
{'occur': 'must', 'query': {'boost': {'query': {'match': {'value': 'searches'}}, 'score': '0.65'}}},
{'occur': 'must', 'query': {'match': {'value': 'engine'}}},
{'occur': 'must', 'query': {'boost': {'query': {'match': {'value': 'engines'}}, 'score': '0.85'}}},
{'occur': 'must', 'query': {'boost': {'query': {'match': {'value': 'engines'}}, 'score': '0.65'}}},
{'occur': 'must_not', 'query': {'match': {'value': 'car'}}},
{'occur': 'must_not', 'query': {'boost': {'query': {'match': {'value': 'cars'}}, 'score': '0.85'}}}]}}
{'occur': 'must_not', 'query': {'boost': {'query': {'match': {'value': 'cars'}}, 'score': '0.65'}}}]}}

processed_query = query_processor.process('search engine', 'en')
assert processed_query.to_summa_query() == {'boolean': {'subqueries': [
{'occur': 'should', 'query': {'match': {'value': 'search'}}},
{'occur': 'should', 'query': {'boost': {'query': {'match': {'value': 'searches'}}, 'score': '0.85'}}},
{'occur': 'should', 'query': {'boost': {'query': {'match': {'value': 'searches'}}, 'score': '0.65'}}},
{'occur': 'should', 'query': {'match': {'value': 'engine'}}},
{'occur': 'should', 'query': {'boost': {'query': {'match': {'value': 'engines'}}, 'score': '0.85'}}}
{'occur': 'should', 'query': {'boost': {'query': {'match': {'value': 'engines'}}, 'score': '0.65'}}}
]}}
processed_query = query_processor.process('author:Smith +"title book"', 'en')
assert processed_query.to_summa_query() == {'boolean': {'subqueries': [
{'occur': 'should', 'query': {'term': {'field': 'author', 'value': 'smith'}}},
{'occur': 'should', 'query':
{'boost': {'query': {'term': {'field': 'author', 'value': 'smiths'}}, 'score': '0.85'}}},
{'boost': {'query': {'term': {'field': 'author', 'value': 'smiths'}}, 'score': '0.65'}}},
{'occur': 'must', 'query': {'match': {'value': '"title book"'}}}]}}
processed_query = query_processor.process('science +year:[2010 TO *]', 'en')
assert processed_query.to_summa_query() == {'boolean': {'subqueries': [
{'occur': 'should', 'query': {'match': {'value': 'science'}}},
{'occur': 'should', 'query': {'boost': {'query': {'match': {'value': 'sciences'}}, 'score': '0.85'}}},
{'occur': 'should', 'query': {'boost': {'query': {'match': {'value': 'sciences'}}, 'score': '0.65'}}},
{'occur': 'must', 'query': {'range': {
'field': 'year', 'value': {'including_left': True, 'including_right': True, 'left': '2010', 'right': '*'}}}}
]}}
Expand All @@ -140,9 +140,9 @@ def test_unknown_language_transformer():
processed_query = query_processor.process('search engine', 'zz')
assert processed_query.to_summa_query() == {'boolean': {'subqueries': [
{'occur': 'should', 'query': {'match': {'value': 'search'}}},
{'occur': 'should', 'query': {'boost': {'query': {'match': {'value': 'searches'}}, 'score': '0.85'}}},
{'occur': 'should', 'query': {'boost': {'query': {'match': {'value': 'searches'}}, 'score': '0.65'}}},
{'occur': 'should', 'query': {'match': {'value': 'engine'}}},
{'occur': 'should', 'query': {'boost': {'query': {'match': {'value': 'engines'}}, 'score': '0.85'}}}
{'occur': 'should', 'query': {'boost': {'query': {'match': {'value': 'engines'}}, 'score': '0.65'}}}
]}}


Expand All @@ -166,7 +166,7 @@ def test_exact_match_transformers():
{'occur': 'should', 'query': {'match': {'value': 'search'}}},
{'occur': 'should', 'query': {'match': {'value': 'engine'}}},
{'occur': 'should', 'query': {'boost': {'query': {
'phrase': {'field': 'title', 'value': 'search engine'}}, 'score': '2'}}
'phrase': {'field': 'title', 'slop': 3, 'value': 'search engine'}}, 'score': '2'}}
}
]}}

Expand Down
11 changes: 9 additions & 2 deletions aiosumma/aiosumma/tree_transformers/exact_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from ..parser.elements import (
Boost,
Phrase,
Proximity,
SearchField,
SynonymsGroup,
Expand Down Expand Up @@ -35,16 +36,22 @@ def visit_group(self, node, context, parents=None):
phrase.append(operand.value)
elif isinstance(operand, SynonymsGroup):
phrase.append(operand.operands[0].value)
elif isinstance(operand, SearchField):
continue
else:
return node, False

if not phrase:
return node, False

phrase = ' '.join(phrase)

score = self.score
if callable(score):
score = score(node, context)
if self.default_phrase_field:
new_operands.append(Boost(SearchField(self.default_phrase_field, Proximity(phrase, slop=3)), score))
new_operands.append(Boost(SearchField(self.default_phrase_field, Proximity(Phrase(phrase), slop=3)), score))
else:
new_operands.append(Boost(Proximity(phrase, slop=3), score))
new_operands.append(Boost(Proximity(Phrase(phrase), slop=3), score))
node.operands = new_operands
return node, False
5 changes: 3 additions & 2 deletions aiosumma/aiosumma/tree_transformers/morphy.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@ class MorphyTreeTransformer(TreeTransformer):
'en': EnglishMorphology('en_core_web_sm'),
}

def __init__(self, enable_morph=True, enable_accent=True, ignore_nodes=None):
def __init__(self, enable_morph=True, enable_accent=True, score: str = '0.65', ignore_nodes=None):
super().__init__(ignore_nodes=ignore_nodes)
self.enable_morph = enable_morph
self.enable_accent = enable_accent
self.score = score

def visit_word(self, node, context, parents=None):
forms = [node]
Expand All @@ -30,7 +31,7 @@ def visit_word(self, node, context, parents=None):
if self.enable_morph and context.language in self.morphology:
for w in self.morphology[context.language].derive_forms(node.value):
if node.value != w:
forms.append(Boost(Word(w), score='0.85'))
forms.append(Boost(Word(w), score=self.score))

if len(forms) == 1:
return node, True
Expand Down
2 changes: 2 additions & 0 deletions aiosumma/aiosumma/tree_transformers/tantivy.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ def visit_phrase(self, node, context, parents=None):
return node, False

def visit_minus(self, node, context, parents=None):
if parents is None:
return node.a, False
if isinstance(node.a, BaseGroup):
op = node.a
new_operands = []
Expand Down
Loading

0 comments on commit 106856f

Please sign in to comment.