Skip to content

Commit 6f36ff0

Browse files
authored
Merge pull request #40 from DrGFreeman/39-get-attributes
Add parameter to select item attributes to get
2 parents 6e78f3e + 81d076b commit 6f36ff0

File tree

4 files changed

+212
-71
lines changed

4 files changed

+212
-71
lines changed

dynamo_pandas/dynamo_pandas.py

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from .transactions import put_items
66

77

8-
def get_df(*, table, keys=None, dtype=None):
8+
def get_df(*, table, keys=None, attributes=None, dtype=None):
99
"""Get items from a table into a dataframe.
1010
1111
Parameters
@@ -16,9 +16,15 @@ def get_df(*, table, keys=None, dtype=None):
1616
keys : list[dict]
1717
List of keys to get where each key is represented by a dictionary.
1818
19-
dtype : dict
20-
Data type for data or columns. E.g. {‘a’: np.float64, ‘b’: np.int32, ‘c’:
21-
‘Int64’} Use str or object.
19+
attributes : list[str]
20+
Names of the item attributes to return as dataframe columns. If None (default),
21+
all attributes are returned.
22+
23+
dtype : data type or dict of column names -> data type
24+
Use a numpy.dtype or Python type to cast entire pandas object to the same type.
25+
Alternatively, use {col: dtype, …}, where col is a column label and dtype is a
26+
numpy.dtype or Python type to cast one or more of the DataFrame’s columns to
27+
column-specific types.
2228
2329
Returns
2430
-------
@@ -105,16 +111,27 @@ def get_df(*, table, keys=None, dtype=None):
105111
106112
>>> df = get_df(table="players")
107113
>>> print(df)
108-
bonus_points player_id last_play rating play_time
109-
0 4.0 player_three 2021-01-21 10:22:43 2.5 1 days 14:01:19
110-
1 NaN player_four 2021-01-22 13:51:12 4.8 0 days 03:45:49
111-
2 3.0 player_one 2021-01-18 22:47:23 4.3 2 days 17:41:55
112-
3 1.0 player_two 2021-01-19 19:07:54 3.8 0 days 22:07:34
114+
bonus_points player_id last_play rating play_time
115+
0 4.0 player_three 2021-01-21 10:22:43 2.5 1 days 14:01:19
116+
1 NaN player_four 2021-01-22 13:51:12 4.8 0 days 03:45:49
117+
2 3.0 player_one 2021-01-18 22:47:23 4.3 2 days 17:41:55
118+
3 1.0 player_two 2021-01-19 19:07:54 3.8 0 days 22:07:34
119+
120+
Specifying item attributes via the ``attributes`` parameter returns only the
121+
columns corresponding to the specified attributes:
122+
123+
>>> df = get_df(table="players", attributes=["player_id", "rating"])
124+
>>> print(df)
125+
player_id rating
126+
0 player_three 2.5
127+
1 player_four 4.8
128+
2 player_one 4.3
129+
3 player_two 3.8
113130
""" # noqa: E501
114131
if keys is not None:
115-
items = get_items(keys=keys, table=table)
132+
items = get_items(keys=keys, table=table, attributes=attributes)
116133
else:
117-
items = get_all_items(table=table)
134+
items = get_all_items(table=table, attributes=attributes)
118135

119136
return _to_df(items=items, dtype=dtype)
120137

dynamo_pandas/transactions/transactions.py

Lines changed: 65 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def _batches(items, batch_size):
2222
start += batch_size
2323

2424

25-
def get_item(*, key, table):
25+
def get_item(*, key, table, attributes=None):
2626
"""Get a single item from a table.
2727
2828
Parameters
@@ -33,6 +33,10 @@ def get_item(*, key, table):
3333
table : str
3434
Name of the DynamoDB table.
3535
36+
attributes : list[str]
37+
Names of the item attributes to return. If None (default), all attributes are
38+
returned.
39+
3640
Returns
3741
-------
3842
dict, None
@@ -49,15 +53,29 @@ def get_item(*, key, table):
4953
'last_play': '2021-01-19 19:07:54',
5054
'rating': 3.8,
5155
'play_time': '0 days 22:07:34'}
56+
57+
Get only specific attributes:
58+
59+
>>> item = get_item(
60+
... key={"player_id": "player_two"},
61+
... table="players",
62+
... attributes=["play_time", "rating"]
63+
... )
64+
>>> print(item)
65+
{'rating': 3.8, 'play_time': '0 days 22:07:34'}
5266
"""
5367
table = boto3.resource("dynamodb").Table(table)
5468

55-
item = table.get_item(Key=key).get("Item")
69+
kwargs = {}
70+
if attributes is not None:
71+
kwargs["ProjectionExpression"] = ", ".join(attributes)
72+
73+
item = table.get_item(Key=key, **kwargs).get("Item")
5674

5775
return _deserialize(item)
5876

5977

60-
def get_items(*, keys, table):
78+
def get_items(*, keys, table, attributes=None):
6179
"""Get multiple items from a table.
6280
6381
Parameters
@@ -68,6 +86,10 @@ def get_items(*, keys, table):
6886
table : str
6987
Name of the DynamoDB table.
7088
89+
attributes : list[str]
90+
Names of the item attributes to return. If None (default), all attributes are
91+
returned.
92+
7193
Returns
7294
-------
7395
list[dict]
@@ -88,12 +110,27 @@ def get_items(*, keys, table):
88110
>>> print(items)
89111
[{'bonus_points': 3, 'player_id': 'player_one', 'last_play': '2021-01-18 22:47:23', 'rating': 4.3, 'play_time': '2 days 17:41:55'},
90112
{'bonus_points': 1, 'player_id': 'player_two', 'last_play': '2021-01-19 19:07:54', 'rating': 3.8, 'play_time': '0 days 22:07:34'}]
113+
114+
Get only specific attributes:
115+
116+
>>> items = get_items(
117+
... keys=[{"player_id": "player_two"}, {"player_id": "player_one"}],
118+
... table="players",
119+
... attributes=["player_id", "play_time"]
120+
... )
121+
>>> print(items)
122+
[{'player_id': 'player_one', 'play_time': '2 days 17:41:55'}, {'player_id': 'player_two', 'play_time': '0 days 22:07:34'}]
91123
""" # noqa: E501
92124

93-
def _request(keys, table=table):
94-
return {table: {"Keys": keys}}
125+
def _request(keys, table=table, attributes=attributes):
126+
table_dict = {"Keys": keys}
127+
128+
if attributes is not None:
129+
table_dict["ProjectionExpression"] = ", ".join(attributes)
130+
131+
return {table: table_dict}
95132

96-
def _get_items(keys, table=table):
133+
def _get_items(keys, table=table, attributes=attributes):
97134
response = resource.batch_get_item(RequestItems=_request(keys))
98135
items = response["Responses"][table]
99136

@@ -114,7 +151,7 @@ def _get_items(keys, table=table):
114151
return _deserialize(items)
115152

116153

117-
def get_all_items(*, table):
154+
def get_all_items(*, table, attributes=None):
118155
"""Get all the items in a table.
119156
120157
This function performs a scan of the table.
@@ -124,6 +161,10 @@ def get_all_items(*, table):
124161
table : str
125162
Name of the DynamoDB table.
126163
164+
attributes : list[str]
165+
Names of the item attributes to return. If None (default), all attributes are
166+
returned.
167+
127168
Returns
128169
-------
129170
list[dict]
@@ -138,14 +179,28 @@ def get_all_items(*, table):
138179
{'bonus_points': None, 'player_id': 'player_four', 'last_play': '2021-01-22 13:51:12', 'rating': 4.8, 'play_time': '0 days 03:45:49'},
139180
{'bonus_points': 3, 'player_id': 'player_one', 'last_play': '2021-01-18 22:47:23', 'rating': 4.3, 'play_time': '2 days 17:41:55'},
140181
{'bonus_points': 1, 'player_id': 'player_two', 'last_play': '2021-01-19 19:07:54', 'rating': 3.8, 'play_time': '0 days 22:07:34'}]
182+
183+
Get only specific attributes:
184+
185+
>>> items = get_all_items(table="players", attributes=["player_id", "play_time"])
186+
>>> print(items)
187+
[{'player_id': 'player_three', 'play_time': '1 days 14:01:19'},
188+
{'player_id': 'player_four', 'play_time': '0 days 03:45:49'},
189+
{'player_id': 'player_one', 'play_time': '2 days 17:41:55'},
190+
{'player_id': 'player_two', 'play_time': '0 days 22:07:34'}]
141191
""" # noqa: E501
142192
table = boto3.resource("dynamodb").Table(table)
143193

144-
response = table.scan()
194+
kwargs = {}
195+
if attributes is not None:
196+
kwargs["ProjectionExpression"] = ", ".join(attributes)
197+
198+
response = table.scan(**kwargs)
199+
145200
items = response["Items"]
146201

147202
while "LastEvaluatedKey" in response:
148-
response = table.scan(ExclusiveStartKey=response["LastEvaluatedKey"])
203+
response = table.scan(ExclusiveStartKey=response["LastEvaluatedKey"], **kwargs)
149204
items.extend(response["Items"])
150205

151206
return _deserialize(items)
@@ -207,7 +262,7 @@ def put_items(*, items, table):
207262
Parameters
208263
----------
209264
items : list[dict]
210-
List of dictionaties where each dictionary represents an item's attributes.
265+
List of dictionaries where each dictionary represents an item's attributes.
211266
212267
table : str
213268
Name of the DynamoDB table.

tests/test_main.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,39 @@ def test_single_key_missing(self, test_df_table):
132132

133133
assert df.empty
134134

135+
def test_attributes_returns_specified_columns(self, test_df_table):
136+
"""Test that only columns corresponding to the specified attributes are
137+
returned."""
138+
df = get_df(
139+
table=test_df_table,
140+
attributes=["id", "A", "E"],
141+
keys=[{"id": 0}, {"id": 1}],
142+
)
143+
144+
assert df.equals(
145+
pd.DataFrame(
146+
[
147+
{"A": "abc", "E": "2000-01-01 00:00:00+00:00", "id": 0},
148+
{"A": None, "E": "2000-12-31 23:59:59+00:00", "id": 1},
149+
]
150+
)
151+
)
152+
153+
def test_attributes_and_keys(self, test_df_table):
154+
"""Test that only columns corresponding to the specified attributes are
155+
returned along with keys."""
156+
df = get_df(table=test_df_table, attributes=["id", "A", "E"])
157+
158+
assert df.equals(
159+
pd.DataFrame(
160+
[
161+
{"A": "abc", "E": "2000-01-01 00:00:00+00:00", "id": 0},
162+
{"A": None, "E": "2000-12-31 23:59:59+00:00", "id": 1},
163+
{"A": None, "E": None, "id": 2},
164+
]
165+
)
166+
)
167+
135168
def test_dtype(self, test_df_table):
136169
"""Test that the dtype parameter controls the returned data types."""
137170
df = get_df(

0 commit comments

Comments
 (0)