Skip to content

Commit 7cf610b

Browse files
zakonstseaver
authored andcommitted
BigTable: provide better access to cell values (#4908)
1 parent 5d3c566 commit 7cf610b

File tree

2 files changed

+203
-0
lines changed

2 files changed

+203
-0
lines changed

bigtable/google/cloud/bigtable/row_data.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,15 @@
2121
from google.cloud._helpers import _datetime_from_microseconds
2222
from google.cloud._helpers import _to_bytes
2323

24+
_MISSING_COLUMN_FAMILY = (
25+
'Column family {} is not among the cells stored in this row.')
26+
_MISSING_COLUMN = (
27+
'Column {} is not among the cells stored in this row in the '
28+
'column family {}.')
29+
_MISSING_INDEX = (
30+
'Index {!r} is not valid for the cells stored in this row for column {} '
31+
'in the column family {}. There are {} such cells.')
32+
2433

2534
class Cell(object):
2635
"""Representation of a Google Cloud Bigtable Cell.
@@ -175,6 +184,103 @@ def row_key(self):
175184
"""
176185
return self._row_key
177186

187+
def find_cells(self, column_family_id, column):
188+
"""Get a time series of cells stored on this instance.
189+
190+
Args:
191+
column_family_id (str): The ID of the column family. Must be of the
192+
form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``.
193+
column (bytes): The column within the column family where the cells
194+
are located.
195+
196+
Returns:
197+
List[~google.cloud.bigtable.row_data.Cell]: The cells stored in the
198+
specified column.
199+
200+
Raises:
201+
KeyError: If ``column_family_id`` is not among the cells stored
202+
in this row.
203+
KeyError: If ``column`` is not among the cells stored in this row
204+
for the given ``column_family_id``.
205+
"""
206+
try:
207+
column_family = self._cells[column_family_id]
208+
except KeyError:
209+
raise KeyError(_MISSING_COLUMN_FAMILY.format(column_family_id))
210+
211+
try:
212+
cells = column_family[column]
213+
except KeyError:
214+
raise KeyError(_MISSING_COLUMN.format(column, column_family_id))
215+
216+
return cells
217+
218+
def cell_value(self, column_family_id, column, index=0):
219+
"""Get a single cell value stored on this instance.
220+
221+
Args:
222+
column_family_id (str): The ID of the column family. Must be of the
223+
form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``.
224+
column (bytes): The column within the column family where the cell
225+
is located.
226+
index (Optional[int]): The offset within the series of values. If
227+
not specified, will return the first cell.
228+
229+
Returns:
230+
~google.cloud.bigtable.row_data.Cell value: The cell value stored
231+
in the specified column and specified index.
232+
233+
Raises:
234+
KeyError: If ``column_family_id`` is not among the cells stored
235+
in this row.
236+
KeyError: If ``column`` is not among the cells stored in this row
237+
for the given ``column_family_id``.
238+
IndexError: If ``index`` cannot be found within the cells stored
239+
in this row for the given ``column_family_id``, ``column``
240+
pair.
241+
"""
242+
cells = self.find_cells(column_family_id, column)
243+
244+
try:
245+
cell = cells[index]
246+
except (TypeError, IndexError):
247+
num_cells = len(cells)
248+
msg = _MISSING_INDEX.format(
249+
index, column, column_family_id, num_cells)
250+
raise IndexError(msg)
251+
252+
return cell.value
253+
254+
def cell_values(self, column_family_id, column, max_count=None):
255+
"""Get a time series of cells stored on this instance.
256+
257+
Args:
258+
column_family_id (str): The ID of the column family. Must be of the
259+
form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``.
260+
column (bytes): The column within the column family where the cells
261+
are located.
262+
max_count (int): The maximum number of cells to use.
263+
264+
Returns:
265+
A generator which provides: cell.value, cell.timestamp_micros
266+
for each cell in the list of cells
267+
268+
Raises:
269+
KeyError: If ``column_family_id`` is not among the cells stored
270+
in this row.
271+
KeyError: If ``column`` is not among the cells stored in this row
272+
for the given ``column_family_id``.
273+
"""
274+
cells = self.find_cells(column_family_id, column)
275+
if max_count is None:
276+
max_count = len(cells)
277+
278+
for index, cell in enumerate(cells):
279+
if index == max_count:
280+
break
281+
282+
yield cell.value, cell.timestamp_micros
283+
178284

179285
class InvalidReadRowsResponse(RuntimeError):
180286
"""Exception raised to to invalid response data from back-end."""

bigtable/tests/unit/test_row_data.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,97 @@ def test_to_dict(self):
170170
}
171171
self.assertEqual(result, expected_result)
172172

173+
def test_cell_value(self):
174+
family_name = u'name1'
175+
qualifier = b'col1'
176+
cell = _make_cell(b'value-bytes')
177+
178+
partial_row_data = self._make_one(None)
179+
partial_row_data._cells = {
180+
family_name: {
181+
qualifier: [cell],
182+
},
183+
}
184+
185+
result = partial_row_data.cell_value(family_name, qualifier)
186+
self.assertEqual(result, cell.value)
187+
188+
def test_cell_value_invalid_index(self):
189+
family_name = u'name1'
190+
qualifier = b'col1'
191+
cell = _make_cell(b'')
192+
193+
partial_row_data = self._make_one(None)
194+
partial_row_data._cells = {
195+
family_name: {
196+
qualifier: [cell],
197+
},
198+
}
199+
200+
with self.assertRaises(IndexError):
201+
partial_row_data.cell_value(family_name, qualifier, index=None)
202+
203+
def test_cell_value_invalid_column_family_key(self):
204+
family_name = u'name1'
205+
qualifier = b'col1'
206+
207+
partial_row_data = self._make_one(None)
208+
209+
with self.assertRaises(KeyError):
210+
partial_row_data.cell_value(family_name, qualifier)
211+
212+
def test_cell_value_invalid_column_key(self):
213+
family_name = u'name1'
214+
qualifier = b'col1'
215+
216+
partial_row_data = self._make_one(None)
217+
partial_row_data._cells = {
218+
family_name: {},
219+
}
220+
221+
with self.assertRaises(KeyError):
222+
partial_row_data.cell_value(family_name, qualifier)
223+
224+
def test_cell_values(self):
225+
family_name = u'name1'
226+
qualifier = b'col1'
227+
cell = _make_cell(b'value-bytes')
228+
229+
partial_row_data = self._make_one(None)
230+
partial_row_data._cells = {
231+
family_name: {
232+
qualifier: [cell],
233+
},
234+
}
235+
236+
values = []
237+
for value, timestamp_micros in partial_row_data.cell_values(
238+
family_name, qualifier):
239+
values.append(value)
240+
241+
self.assertEqual(values[0], cell.value)
242+
243+
def test_cell_values_with_max_count(self):
244+
family_name = u'name1'
245+
qualifier = b'col1'
246+
cell_1 = _make_cell(b'value-bytes-1')
247+
cell_2 = _make_cell(b'value-bytes-2')
248+
249+
partial_row_data = self._make_one(None)
250+
partial_row_data._cells = {
251+
family_name: {
252+
qualifier: [cell_1, cell_2],
253+
},
254+
}
255+
256+
values = []
257+
for value, timestamp_micros in partial_row_data.cell_values(
258+
family_name, qualifier, max_count=1):
259+
values.append(value)
260+
261+
self.assertEqual(1, len(values))
262+
self.assertEqual(values[0], cell_1.value)
263+
173264
def test_cells_property(self):
174265
partial_row_data = self._make_one(None)
175266
cells = {1: 2}
@@ -732,3 +823,9 @@ def _ReadRowsResponseCellChunkPB(*args, **kw):
732823
message.family_name.value = family_name
733824
message.qualifier.value = qualifier
734825
return message
826+
827+
828+
def _make_cell(value):
829+
from google.cloud.bigtable import row_data
830+
831+
return row_data.Cell(value, TestCell.timestamp_micros)

0 commit comments

Comments
 (0)