Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 28 additions & 11 deletions dynamo_pandas/dynamo_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .transactions import put_items


def get_df(*, table, keys=None, dtype=None):
def get_df(*, table, keys=None, attributes=None, dtype=None):
"""Get items from a table into a dataframe.

Parameters
Expand All @@ -16,9 +16,15 @@ def get_df(*, table, keys=None, dtype=None):
keys : list[dict]
List of keys to get where each key is represented by a dictionary.

dtype : dict
Data type for data or columns. E.g. {‘a’: np.float64, ‘b’: np.int32, ‘c’:
‘Int64’} Use str or object.
attributes : list[str]
Names of the item attributes to return as dataframe columns. If None (default),
all attributes are returned.

dtype : data type or dict of column names -> data type
Use a numpy.dtype or Python type to cast entire pandas object to the same type.
Alternatively, use {col: dtype, …}, where col is a column label and dtype is a
numpy.dtype or Python type to cast one or more of the DataFrame’s columns to
column-specific types.

Returns
-------
Expand Down Expand Up @@ -105,16 +111,27 @@ def get_df(*, table, keys=None, dtype=None):

>>> df = get_df(table="players")
>>> print(df)
bonus_points player_id last_play rating play_time
0 4.0 player_three 2021-01-21 10:22:43 2.5 1 days 14:01:19
1 NaN player_four 2021-01-22 13:51:12 4.8 0 days 03:45:49
2 3.0 player_one 2021-01-18 22:47:23 4.3 2 days 17:41:55
3 1.0 player_two 2021-01-19 19:07:54 3.8 0 days 22:07:34
bonus_points player_id last_play rating play_time
0 4.0 player_three 2021-01-21 10:22:43 2.5 1 days 14:01:19
1 NaN player_four 2021-01-22 13:51:12 4.8 0 days 03:45:49
2 3.0 player_one 2021-01-18 22:47:23 4.3 2 days 17:41:55
3 1.0 player_two 2021-01-19 19:07:54 3.8 0 days 22:07:34

Specifying item attributes via the ``attributes`` parameter returns only the
columns corresponding to the specified attributes:

>>> df = get_df(table="players", attributes=["player_id", "rating"])
>>> print(df)
player_id rating
0 player_three 2.5
1 player_four 4.8
2 player_one 4.3
3 player_two 3.8
""" # noqa: E501
if keys is not None:
items = get_items(keys=keys, table=table)
items = get_items(keys=keys, table=table, attributes=attributes)
else:
items = get_all_items(table=table)
items = get_all_items(table=table, attributes=attributes)

return _to_df(items=items, dtype=dtype)

Expand Down
75 changes: 65 additions & 10 deletions dynamo_pandas/transactions/transactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def _batches(items, batch_size):
start += batch_size


def get_item(*, key, table):
def get_item(*, key, table, attributes=None):
"""Get a single item from a table.

Parameters
Expand All @@ -33,6 +33,10 @@ def get_item(*, key, table):
table : str
Name of the DynamoDB table.

attributes : list[str]
Names of the item attributes to return. If None (default), all attributes are
returned.

Returns
-------
dict, None
Expand All @@ -49,15 +53,29 @@ def get_item(*, key, table):
'last_play': '2021-01-19 19:07:54',
'rating': 3.8,
'play_time': '0 days 22:07:34'}

Get only specific attributes:

>>> item = get_item(
... key={"player_id": "player_two"},
... table="players",
... attributes=["play_time", "rating"]
... )
>>> print(item)
{'rating': 3.8, 'play_time': '0 days 22:07:34'}
"""
table = boto3.resource("dynamodb").Table(table)

item = table.get_item(Key=key).get("Item")
kwargs = {}
if attributes is not None:
kwargs["ProjectionExpression"] = ", ".join(attributes)

item = table.get_item(Key=key, **kwargs).get("Item")

return _deserialize(item)


def get_items(*, keys, table):
def get_items(*, keys, table, attributes=None):
"""Get multiple items from a table.

Parameters
Expand All @@ -68,6 +86,10 @@ def get_items(*, keys, table):
table : str
Name of the DynamoDB table.

attributes : list[str]
Names of the item attributes to return. If None (default), all attributes are
returned.

Returns
-------
list[dict]
Expand All @@ -88,12 +110,27 @@ def get_items(*, keys, table):
>>> print(items)
[{'bonus_points': 3, 'player_id': 'player_one', 'last_play': '2021-01-18 22:47:23', 'rating': 4.3, 'play_time': '2 days 17:41:55'},
{'bonus_points': 1, 'player_id': 'player_two', 'last_play': '2021-01-19 19:07:54', 'rating': 3.8, 'play_time': '0 days 22:07:34'}]

Get only specific attributes:

>>> items = get_items(
... keys=[{"player_id": "player_two"}, {"player_id": "player_one"}],
... table="players",
... attributes=["player_id", "play_time"]
... )
>>> print(items)
[{'player_id': 'player_one', 'play_time': '2 days 17:41:55'}, {'player_id': 'player_two', 'play_time': '0 days 22:07:34'}]
""" # noqa: E501

def _request(keys, table=table):
return {table: {"Keys": keys}}
def _request(keys, table=table, attributes=attributes):
table_dict = {"Keys": keys}

if attributes is not None:
table_dict["ProjectionExpression"] = ", ".join(attributes)

return {table: table_dict}

def _get_items(keys, table=table):
def _get_items(keys, table=table, attributes=attributes):
response = resource.batch_get_item(RequestItems=_request(keys))
items = response["Responses"][table]

Expand All @@ -114,7 +151,7 @@ def _get_items(keys, table=table):
return _deserialize(items)


def get_all_items(*, table):
def get_all_items(*, table, attributes=None):
"""Get all the items in a table.

This function performs a scan of the table.
Expand All @@ -124,6 +161,10 @@ def get_all_items(*, table):
table : str
Name of the DynamoDB table.

attributes : list[str]
Names of the item attributes to return. If None (default), all attributes are
returned.

Returns
-------
list[dict]
Expand All @@ -138,14 +179,28 @@ def get_all_items(*, table):
{'bonus_points': None, 'player_id': 'player_four', 'last_play': '2021-01-22 13:51:12', 'rating': 4.8, 'play_time': '0 days 03:45:49'},
{'bonus_points': 3, 'player_id': 'player_one', 'last_play': '2021-01-18 22:47:23', 'rating': 4.3, 'play_time': '2 days 17:41:55'},
{'bonus_points': 1, 'player_id': 'player_two', 'last_play': '2021-01-19 19:07:54', 'rating': 3.8, 'play_time': '0 days 22:07:34'}]

Get only specific attributes:

>>> items = get_all_items(table="players", attributes=["player_id", "play_time"])
>>> print(items)
[{'player_id': 'player_three', 'play_time': '1 days 14:01:19'},
{'player_id': 'player_four', 'play_time': '0 days 03:45:49'},
{'player_id': 'player_one', 'play_time': '2 days 17:41:55'},
{'player_id': 'player_two', 'play_time': '0 days 22:07:34'}]
""" # noqa: E501
table = boto3.resource("dynamodb").Table(table)

response = table.scan()
kwargs = {}
if attributes is not None:
kwargs["ProjectionExpression"] = ", ".join(attributes)

response = table.scan(**kwargs)

items = response["Items"]

while "LastEvaluatedKey" in response:
response = table.scan(ExclusiveStartKey=response["LastEvaluatedKey"])
response = table.scan(ExclusiveStartKey=response["LastEvaluatedKey"], **kwargs)
items.extend(response["Items"])

return _deserialize(items)
Expand Down Expand Up @@ -207,7 +262,7 @@ def put_items(*, items, table):
Parameters
----------
items : list[dict]
List of dictionaties where each dictionary represents an item's attributes.
List of dictionaries where each dictionary represents an item's attributes.

table : str
Name of the DynamoDB table.
Expand Down
33 changes: 33 additions & 0 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,39 @@ def test_single_key_missing(self, test_df_table):

assert df.empty

def test_attributes_returns_specified_columns(self, test_df_table):
"""Test that only columns corresponding to the specified attributes are
returned."""
df = get_df(
table=test_df_table,
attributes=["id", "A", "E"],
keys=[{"id": 0}, {"id": 1}],
)

assert df.equals(
pd.DataFrame(
[
{"A": "abc", "E": "2000-01-01 00:00:00+00:00", "id": 0},
{"A": None, "E": "2000-12-31 23:59:59+00:00", "id": 1},
]
)
)

def test_attributes_and_keys(self, test_df_table):
"""Test that only columns corresponding to the specified attributes are
returned along with keys."""
df = get_df(table=test_df_table, attributes=["id", "A", "E"])

assert df.equals(
pd.DataFrame(
[
{"A": "abc", "E": "2000-01-01 00:00:00+00:00", "id": 0},
{"A": None, "E": "2000-12-31 23:59:59+00:00", "id": 1},
{"A": None, "E": None, "id": 2},
]
)
)

def test_dtype(self, test_df_table):
"""Test that the dtype parameter controls the returned data types."""
df = get_df(
Expand Down
Loading