Skip to content

Commit dfb847c

Browse files
author
Chris Rossi
authored
Implement filters for queries. (#55)
Implement filters for queries. Does not include post-filter functionality.
1 parent 1eff5fa commit dfb847c

File tree

5 files changed

+482
-37
lines changed

5 files changed

+482
-37
lines changed

packages/google-cloud-ndb/src/google/cloud/ndb/_datastore_query.py

Lines changed: 136 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@
1414

1515
"""Translate NDB queries to Datastore calls."""
1616

17+
import itertools
1718
import logging
1819

1920
from google.cloud.datastore_v1.proto import datastore_pb2
2021
from google.cloud.datastore_v1.proto import entity_pb2
2122
from google.cloud.datastore_v1.proto import query_pb2
23+
from google.cloud.datastore import helpers
2224

2325
from google.cloud.ndb import context as context_module
2426
from google.cloud.ndb import _datastore_api
@@ -34,6 +36,50 @@
3436
RESULT_TYPE_FULL = ResultType.Value("FULL")
3537
RESULT_TYPE_PROJECTION = ResultType.Value("PROJECTION")
3638

39+
FILTER_OPERATORS = {
40+
"=": query_pb2.PropertyFilter.EQUAL,
41+
"<": query_pb2.PropertyFilter.LESS_THAN,
42+
"<=": query_pb2.PropertyFilter.LESS_THAN_OR_EQUAL,
43+
">": query_pb2.PropertyFilter.GREATER_THAN,
44+
">=": query_pb2.PropertyFilter.GREATER_THAN_OR_EQUAL,
45+
}
46+
47+
48+
def make_filter(name, op, value):
49+
"""Make a property filter protocol buffer.
50+
51+
Args:
52+
name (str): The name of the property to filter by.
53+
op (str): The operator to apply in the filter. Must be one of "=", "<",
54+
"<=", ">", or ">=".
55+
value (Any): The value for comparison.
56+
57+
Returns:
58+
query_pb2.PropertyFilter: The filter protocol buffer.
59+
"""
60+
filter_pb = query_pb2.PropertyFilter(
61+
property=query_pb2.PropertyReference(name=name),
62+
op=FILTER_OPERATORS[op],
63+
)
64+
helpers._set_protobuf_value(filter_pb.value, value)
65+
return filter_pb
66+
67+
68+
def make_composite_and_filter(filter_pbs):
69+
"""Make a composite filter protocol buffer using AND.
70+
71+
Args:
72+
List[Union[query_pb2.PropertyFilter, query_pb2.CompositeFilter]]: The
73+
list of filters to be combined.
74+
75+
Returns:
76+
query_pb2.CompositeFilter: The new composite filter.
77+
"""
78+
return query_pb2.CompositeFilter(
79+
op=query_pb2.CompositeFilter.AND,
80+
filters=[_filter_pb(filter_pb) for filter_pb in filter_pbs],
81+
)
82+
3783

3884
@tasklets.tasklet
3985
def fetch(query):
@@ -45,7 +91,7 @@ def fetch(query):
4591
Returns:
4692
tasklets.Future: Result is List[model.Model]: The query results.
4793
"""
48-
for name in ("filters", "orders", "default_options"):
94+
for name in ("orders", "default_options"):
4995
if getattr(query, name, None):
5096
raise NotImplementedError(
5197
"{} is not yet implemented for queries.".format(name)
@@ -61,14 +107,59 @@ def fetch(query):
61107
if not namespace:
62108
namespace = client.namespace
63109

64-
query_pb = _query_to_protobuf(query)
65-
results = yield _run_query(project_id, namespace, query_pb)
110+
filter_pbs = (None,)
111+
if query.filters:
112+
filter_pbs = query.filters._to_filter()
113+
if not isinstance(filter_pbs, (tuple, list)):
114+
filter_pbs = (filter_pbs,)
115+
116+
queries = [
117+
_run_query(project_id, namespace, _query_to_protobuf(query, filter_pb))
118+
for filter_pb in filter_pbs
119+
]
120+
results = yield queries
121+
122+
if len(results) > 1:
123+
results = _merge_results(results)
124+
else:
125+
results = results[0]
126+
66127
return [
67128
_process_result(result_type, result, query.projection)
68129
for result_type, result in results
69130
]
70131

71132

133+
def _merge_results(results):
134+
"""Merge the results of distinct queries.
135+
136+
Some queries that in NDB are logically a single query have to be broken
137+
up into two or more Datastore queries, because Datastore doesn't have a
138+
composite filter with a boolean OR. The `results` are the result sets from
139+
two or more queries which logically form a composite query joined by OR.
140+
The individual result sets are combined into a single result set,
141+
consolidating any results which may be common to two or more result sets.
142+
143+
Args:
144+
results (List[Tuple[query_pb2.EntityResult.ResultType,
145+
query_pb2.EntityResult]]): List of individual result sets as
146+
returned by :func:`_run_query`. These are merged into the final
147+
result.
148+
149+
Returns:
150+
List[Tuple[query_pb2.EntityResult.ResultType,
151+
query_pb2.EntityResult]]: The merged result set.
152+
"""
153+
seen_keys = set()
154+
for result_type, result in itertools.chain(*results):
155+
hash_key = result.entity.key.SerializeToString()
156+
if hash_key in seen_keys:
157+
continue
158+
159+
seen_keys.add(hash_key)
160+
yield result_type, result
161+
162+
72163
def _process_result(result_type, result, projection):
73164
"""Process a single entity result.
74165
@@ -98,11 +189,13 @@ def _process_result(result_type, result, projection):
98189
)
99190

100191

101-
def _query_to_protobuf(query):
192+
def _query_to_protobuf(query, filter_pb=None):
102193
"""Convert an NDB query to a Datastore protocol buffer.
103194
104195
Args:
105196
query (query.Query): The query.
197+
filter_pb (Optional[query_pb2.Filter]): The filter to apply for this
198+
query.
106199
107200
Returns:
108201
query_pb2.Query: The protocol buffer representation of the query.
@@ -125,22 +218,55 @@ def _query_to_protobuf(query):
125218
for name in query.distinct_on
126219
]
127220

128-
filters = []
129221
if query.ancestor:
130222
ancestor_pb = query.ancestor._key.to_protobuf()
131-
filter_pb = query_pb2.PropertyFilter(
223+
ancestor_filter_pb = query_pb2.PropertyFilter(
132224
property=query_pb2.PropertyReference(name="__key__"),
133225
op=query_pb2.PropertyFilter.HAS_ANCESTOR,
134226
)
135-
filter_pb.value.key_value.CopyFrom(ancestor_pb)
136-
filters.append(filter_pb)
227+
ancestor_filter_pb.value.key_value.CopyFrom(ancestor_pb)
228+
229+
if filter_pb is None:
230+
filter_pb = ancestor_filter_pb
137231

138-
if len(filters) == 1:
139-
query_args["filter"] = query_pb2.Filter(property_filter=filters[0])
232+
elif isinstance(filter_pb, query_pb2.CompositeFilter):
233+
filter_pb.filters.add(property_filter=ancestor_filter_pb)
234+
235+
else:
236+
filter_pb = query_pb2.CompositeFilter(
237+
op=query_pb2.CompositeFilter.AND,
238+
filters=[
239+
_filter_pb(filter_pb),
240+
_filter_pb(ancestor_filter_pb),
241+
],
242+
)
243+
244+
if filter_pb is not None:
245+
query_args["filter"] = _filter_pb(filter_pb)
140246

141247
return query_pb2.Query(**query_args)
142248

143249

250+
def _filter_pb(filter_pb):
251+
"""Convenience function to compose a filter protocol buffer.
252+
253+
The Datastore protocol uses a Filter message which has one of either a
254+
PropertyFilter or CompositeFilter as a sole attribute.
255+
256+
Args:
257+
filter_pb (Union[query_pb2.CompositeFilter, query_pb2.PropertyFilter]):
258+
The actual filter.
259+
260+
Returns:
261+
query_pb2.Filter: The filter at the higher level of abstraction
262+
required to use it in a query.
263+
"""
264+
if isinstance(filter_pb, query_pb2.CompositeFilter):
265+
return query_pb2.Filter(composite_filter=filter_pb)
266+
267+
return query_pb2.Filter(property_filter=filter_pb)
268+
269+
144270
@tasklets.tasklet
145271
def _run_query(project_id, namespace, query_pb):
146272
"""Run a query in Datastore.

packages/google-cloud-ndb/src/google/cloud/ndb/query.py

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ def __gt__(self, unused_other):
279279
raise TypeError("Nodes cannot be ordered")
280280

281281
def _to_filter(self, post=False):
282-
"""Helper to convert to low-level filter, or :data:`None`.
282+
"""Helper to convert to low-level filter.
283283
284284
Raises:
285285
NotImplementedError: Always. This method is virtual.
@@ -409,7 +409,7 @@ def __eq__(self, other):
409409
)
410410

411411
def _to_filter(self, post=False):
412-
"""Helper to convert to low-level filter, or :data:`None`.
412+
"""Helper to convert to low-level filter.
413413
414414
Args:
415415
post (bool): Indicates if this is a post-filter node.
@@ -540,22 +540,21 @@ def __eq__(self, other):
540540
)
541541

542542
def _to_filter(self, post=False):
543-
"""Helper to convert to low-level filter, or :data:`None`.
543+
"""Helper to convert to low-level filter.
544544
545545
Args:
546546
post (bool): Indicates if this is a post-filter node.
547547
548548
Returns:
549-
None: If this is a post-filter.
549+
Optional[query_pb2.PropertyFilter]: Returns :data:`None`, if
550+
this is a post-filter, otherwise returns the protocol buffer
551+
representation of the filter.
550552
551553
Raises:
552554
NotImplementedError: If the ``opsymbol`` is ``!=`` or ``in``, since
553555
they should correspond to a composite filter. This should
554556
never occur since the constructor will create ``OR`` nodes for
555557
``!=`` and ``in``
556-
NotImplementedError: If not a post-filter and the ``opsymbol``
557-
is a simple comparison. (For now) this is because the original
558-
implementation relied on a low-level datastore query module.
559558
"""
560559
if post:
561560
return None
@@ -566,7 +565,9 @@ def _to_filter(self, post=False):
566565
"to a single filter ({!r})".format(self._opsymbol)
567566
)
568567

569-
raise NotImplementedError("Missing datastore_query.make_filter")
568+
return _datastore_query.make_filter(
569+
self._name, self._opsymbol, self._value
570+
)
570571

571572

572573
class PostFilterNode(Node):
@@ -612,7 +613,7 @@ def __eq__(self, other):
612613
return self is other or self.predicate == other.predicate
613614

614615
def _to_filter(self, post=False):
615-
"""Helper to convert to low-level filter, or :data:`None`.
616+
"""Helper to convert to low-level filter.
616617
617618
Args:
618619
post (bool): Indicates if this is a post-filter node.
@@ -801,19 +802,14 @@ def __eq__(self, other):
801802
return self._nodes == other._nodes
802803

803804
def _to_filter(self, post=False):
804-
"""Helper to convert to low-level filter, or :data:`None`.
805+
"""Helper to convert to low-level filter.
805806
806807
Args:
807808
post (bool): Indicates if this is a post-filter node.
808809
809810
Returns:
810811
Optional[Node]: The single or composite filter corresponding to
811-
the pre- or post-filter nodes stored.
812-
813-
Raises:
814-
NotImplementedError: If a composite filter must be returned. This
815-
is because the original implementation relied on a low-level
816-
datastore query module.
812+
the pre- or post-filter nodes stored. May return :data:`None`.
817813
"""
818814
filters = []
819815
for node in self._nodes:
@@ -827,7 +823,7 @@ def _to_filter(self, post=False):
827823
if len(filters) == 1:
828824
return filters[0]
829825

830-
raise NotImplementedError("Missing datastore_query.CompositeFilter")
826+
return _datastore_query.make_composite_and_filter(filters)
831827

832828
def _post_filters(self):
833829
"""Helper to extract post-filter nodes, if any.
@@ -962,6 +958,23 @@ def resolve(self, bindings, used):
962958

963959
return DisjunctionNode(*resolved_nodes)
964960

961+
def _to_filter(self, post=False):
962+
"""Helper to convert to low-level filters.
963+
964+
Args:
965+
post (bool): Indicates if this is a post-filter node.
966+
967+
Returns:
968+
Optional[List[Node]]: List of filter protocol buffers that should
969+
be combined using OR. The code in `_datastore_query` will
970+
recognize that a list has been returned and run multiple
971+
queries.
972+
"""
973+
if post:
974+
raise NotImplementedError("No idea what I should do here, yet.")
975+
976+
return [node._to_filter(post=post) for node in self._nodes]
977+
965978

966979
# AND and OR are preferred aliases for these.
967980
AND = ConjunctionNode

packages/google-cloud-ndb/tests/system/test_query.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,3 +162,64 @@ class SomeKind(ndb.Model):
162162
assert results[0].foo == 1
163163
assert results[0].bar == "a"
164164
assert results[0].key.namespace() == OTHER_NAMESPACE
165+
166+
167+
@pytest.mark.usefixtures("client_context")
168+
def test_filter_equal(ds_entity):
169+
for i in range(5):
170+
entity_id = test_utils.system.unique_resource_id()
171+
ds_entity(KIND, entity_id, foo=i)
172+
173+
class SomeKind(ndb.Model):
174+
foo = ndb.IntegerProperty()
175+
176+
# query = SomeKind.query() # Not implemented yet
177+
query = ndb.Query(kind=KIND).filter(SomeKind.foo == 2)
178+
results = query.fetch()
179+
assert len(results) == 1
180+
assert results[0].foo == 2
181+
182+
183+
@pytest.mark.usefixtures("client_context")
184+
def test_filter_not_equal(ds_entity):
185+
for i in range(5):
186+
entity_id = test_utils.system.unique_resource_id()
187+
ds_entity(KIND, entity_id, foo=i)
188+
189+
class SomeKind(ndb.Model):
190+
foo = ndb.IntegerProperty()
191+
192+
# query = SomeKind.query() # Not implemented yet
193+
query = ndb.Query(kind=KIND).filter(SomeKind.foo != 2)
194+
results = query.fetch()
195+
assert len(results) == 4
196+
197+
results = sorted(results, key=operator.attrgetter("foo"))
198+
assert [entity.foo for entity in results] == [0, 1, 3, 4]
199+
200+
201+
@pytest.mark.usefixtures("client_context")
202+
def test_filter_or(dispose_of):
203+
class SomeKind(ndb.Model):
204+
foo = ndb.IntegerProperty()
205+
bar = ndb.StringProperty()
206+
207+
@ndb.tasklet
208+
def make_entities():
209+
keys = yield (
210+
SomeKind(foo=1, bar="a").put_async(),
211+
SomeKind(foo=2, bar="b").put_async(),
212+
SomeKind(foo=1, bar="c").put_async(),
213+
)
214+
for key in keys:
215+
dispose_of(key._key)
216+
217+
make_entities().check_success()
218+
query = ndb.Query(kind=KIND).filter(
219+
ndb.OR(SomeKind.foo == 1, SomeKind.bar == "c")
220+
)
221+
results = query.fetch()
222+
assert len(results) == 2
223+
224+
results = sorted(results, key=operator.attrgetter("foo"))
225+
assert [entity.bar for entity in results] == ["a", "c"]

0 commit comments

Comments
 (0)