Skip to content

Commit 481b467

Browse files
committed
Merge pull request #1326 from dhermes/bigtable-union-and-intersection-filter
Implementing Bigtable union/intersection row filters.
2 parents 0c74a59 + bba4ddb commit 481b467

File tree

2 files changed

+211
-0
lines changed

2 files changed

+211
-0
lines changed

gcloud/bigtable/row.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,74 @@ def to_pb(self):
663663
return data_pb2.RowFilter(apply_label_transformer=self.label)
664664

665665

666+
class _FilterCombination(RowFilter):
667+
"""Chain of row filters.
668+
669+
Sends rows through several filters in sequence. The filters are "chained"
670+
together to process a row. After the first filter is applied, the second
671+
is applied to the filtered output and so on for subsequent filters.
672+
673+
:type filters: list
674+
:param filters: List of :class:`RowFilter`
675+
"""
676+
677+
def __init__(self, filters=None):
678+
if filters is None:
679+
filters = []
680+
self.filters = filters
681+
682+
def __eq__(self, other):
683+
if not isinstance(other, self.__class__):
684+
return False
685+
return other.filters == self.filters
686+
687+
688+
class RowFilterChain(_FilterCombination):
689+
"""Chain of row filters.
690+
691+
Sends rows through several filters in sequence. The filters are "chained"
692+
together to process a row. After the first filter is applied, the second
693+
is applied to the filtered output and so on for subsequent filters.
694+
695+
:type filters: list
696+
:param filters: List of :class:`RowFilter`
697+
"""
698+
699+
def to_pb(self):
700+
"""Converts the row filter to a protobuf.
701+
702+
:rtype: :class:`.data_pb2.RowFilter`
703+
:returns: The converted current object.
704+
"""
705+
chain = data_pb2.RowFilter.Chain(
706+
filters=[row_filter.to_pb() for row_filter in self.filters])
707+
return data_pb2.RowFilter(chain=chain)
708+
709+
710+
class RowFilterUnion(_FilterCombination):
711+
"""Union of row filters.
712+
713+
Sends rows through several filters simultaneously, then
714+
merges / interleaves all the filtered results together.
715+
716+
If multiple cells are produced with the same column and timestamp,
717+
they will all appear in the output row in an unspecified mutual order.
718+
719+
:type filters: list
720+
:param filters: List of :class:`RowFilter`
721+
"""
722+
723+
def to_pb(self):
724+
"""Converts the row filter to a protobuf.
725+
726+
:rtype: :class:`.data_pb2.RowFilter`
727+
:returns: The converted current object.
728+
"""
729+
interleave = data_pb2.RowFilter.Interleave(
730+
filters=[row_filter.to_pb() for row_filter in self.filters])
731+
return data_pb2.RowFilter(interleave=interleave)
732+
733+
666734
class ConditionalRowFilter(RowFilter):
667735
"""Conditional row filter which exhibits ternary behavior.
668736

gcloud/bigtable/test_row.py

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,149 @@ def test_to_pb(self):
781781
self.assertEqual(pb_val, expected_pb)
782782

783783

784+
class Test_FilterCombination(unittest2.TestCase):
785+
786+
def _getTargetClass(self):
787+
from gcloud.bigtable.row import _FilterCombination
788+
return _FilterCombination
789+
790+
def _makeOne(self, *args, **kwargs):
791+
return self._getTargetClass()(*args, **kwargs)
792+
793+
def test_constructor_defaults(self):
794+
row_filter = self._makeOne()
795+
self.assertEqual(row_filter.filters, [])
796+
797+
def test_constructor_explicit(self):
798+
filters = object()
799+
row_filter = self._makeOne(filters=filters)
800+
self.assertTrue(row_filter.filters is filters)
801+
802+
def test___eq__(self):
803+
filters = object()
804+
row_filter1 = self._makeOne(filters=filters)
805+
row_filter2 = self._makeOne(filters=filters)
806+
self.assertEqual(row_filter1, row_filter2)
807+
808+
def test___eq__type_differ(self):
809+
filters = object()
810+
row_filter1 = self._makeOne(filters=filters)
811+
row_filter2 = object()
812+
self.assertNotEqual(row_filter1, row_filter2)
813+
814+
815+
class TestRowFilterChain(unittest2.TestCase):
816+
817+
def _getTargetClass(self):
818+
from gcloud.bigtable.row import RowFilterChain
819+
return RowFilterChain
820+
821+
def _makeOne(self, *args, **kwargs):
822+
return self._getTargetClass()(*args, **kwargs)
823+
824+
def test_to_pb(self):
825+
from gcloud.bigtable._generated import bigtable_data_pb2 as data_pb2
826+
from gcloud.bigtable.row import RowSampleFilter
827+
from gcloud.bigtable.row import StripValueTransformerFilter
828+
829+
row_filter1 = StripValueTransformerFilter(True)
830+
row_filter1_pb = row_filter1.to_pb()
831+
832+
row_filter2 = RowSampleFilter(0.25)
833+
row_filter2_pb = row_filter2.to_pb()
834+
835+
row_filter3 = self._makeOne(filters=[row_filter1, row_filter2])
836+
filter_pb = row_filter3.to_pb()
837+
838+
expected_pb = data_pb2.RowFilter(
839+
chain=data_pb2.RowFilter.Chain(
840+
filters=[row_filter1_pb, row_filter2_pb],
841+
),
842+
)
843+
self.assertEqual(filter_pb, expected_pb)
844+
845+
def test_to_pb_nested(self):
846+
from gcloud.bigtable._generated import bigtable_data_pb2 as data_pb2
847+
from gcloud.bigtable.row import CellsRowLimitFilter
848+
from gcloud.bigtable.row import RowSampleFilter
849+
from gcloud.bigtable.row import StripValueTransformerFilter
850+
851+
row_filter1 = StripValueTransformerFilter(True)
852+
row_filter2 = RowSampleFilter(0.25)
853+
854+
row_filter3 = self._makeOne(filters=[row_filter1, row_filter2])
855+
row_filter3_pb = row_filter3.to_pb()
856+
857+
row_filter4 = CellsRowLimitFilter(11)
858+
row_filter4_pb = row_filter4.to_pb()
859+
860+
row_filter5 = self._makeOne(filters=[row_filter3, row_filter4])
861+
filter_pb = row_filter5.to_pb()
862+
863+
expected_pb = data_pb2.RowFilter(
864+
chain=data_pb2.RowFilter.Chain(
865+
filters=[row_filter3_pb, row_filter4_pb],
866+
),
867+
)
868+
self.assertEqual(filter_pb, expected_pb)
869+
870+
871+
class TestRowFilterUnion(unittest2.TestCase):
872+
873+
def _getTargetClass(self):
874+
from gcloud.bigtable.row import RowFilterUnion
875+
return RowFilterUnion
876+
877+
def _makeOne(self, *args, **kwargs):
878+
return self._getTargetClass()(*args, **kwargs)
879+
880+
def test_to_pb(self):
881+
from gcloud.bigtable._generated import bigtable_data_pb2 as data_pb2
882+
from gcloud.bigtable.row import RowSampleFilter
883+
from gcloud.bigtable.row import StripValueTransformerFilter
884+
885+
row_filter1 = StripValueTransformerFilter(True)
886+
row_filter1_pb = row_filter1.to_pb()
887+
888+
row_filter2 = RowSampleFilter(0.25)
889+
row_filter2_pb = row_filter2.to_pb()
890+
891+
row_filter3 = self._makeOne(filters=[row_filter1, row_filter2])
892+
filter_pb = row_filter3.to_pb()
893+
894+
expected_pb = data_pb2.RowFilter(
895+
interleave=data_pb2.RowFilter.Interleave(
896+
filters=[row_filter1_pb, row_filter2_pb],
897+
),
898+
)
899+
self.assertEqual(filter_pb, expected_pb)
900+
901+
def test_to_pb_nested(self):
902+
from gcloud.bigtable._generated import bigtable_data_pb2 as data_pb2
903+
from gcloud.bigtable.row import CellsRowLimitFilter
904+
from gcloud.bigtable.row import RowSampleFilter
905+
from gcloud.bigtable.row import StripValueTransformerFilter
906+
907+
row_filter1 = StripValueTransformerFilter(True)
908+
row_filter2 = RowSampleFilter(0.25)
909+
910+
row_filter3 = self._makeOne(filters=[row_filter1, row_filter2])
911+
row_filter3_pb = row_filter3.to_pb()
912+
913+
row_filter4 = CellsRowLimitFilter(11)
914+
row_filter4_pb = row_filter4.to_pb()
915+
916+
row_filter5 = self._makeOne(filters=[row_filter3, row_filter4])
917+
filter_pb = row_filter5.to_pb()
918+
919+
expected_pb = data_pb2.RowFilter(
920+
interleave=data_pb2.RowFilter.Interleave(
921+
filters=[row_filter3_pb, row_filter4_pb],
922+
),
923+
)
924+
self.assertEqual(filter_pb, expected_pb)
925+
926+
784927
class TestConditionalRowFilter(unittest2.TestCase):
785928

786929
def _getTargetClass(self):

0 commit comments

Comments
 (0)