Skip to content

Commit

Permalink
add support for pandas DataFrame (keras-team#8199)
Browse files Browse the repository at this point in the history
* add support for pandas DataFrame

* multiple updates according to @fchollet's review

* DataFrame should be handled correctly if list/dict is passed as model inputs/outputs
  • Loading branch information
icyblade authored and fchollet committed Oct 23, 2017
1 parent 89e6eb0 commit c173b76
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 1 deletion.
9 changes: 9 additions & 0 deletions keras/engine/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ def _standardize_input_data(data, names, shapes=None,
if data is None:
return [None for _ in range(len(names))]
if isinstance(data, dict):
for key, value in data.items():
if value.__class__.__name__ == 'DataFrame':
data[key] = value.values
arrays = []
for name in names:
if name not in data:
Expand All @@ -68,6 +71,9 @@ def _standardize_input_data(data, names, shapes=None,
str(names))
arrays.append(data[name])
elif isinstance(data, list):
for key, value in enumerate(data):
if value.__class__.__name__ == 'DataFrame':
data[key] = value.values
if len(data) != len(names):
if data and hasattr(data[0], 'shape'):
raise ValueError('Error when checking model ' +
Expand Down Expand Up @@ -95,6 +101,9 @@ def _standardize_input_data(data, names, shapes=None,
'The list you passed was: ' +
str(data)[:200])
arrays = data
elif data.__class__.__name__ == 'DataFrame':
# test if data is a DataFrame, without pandas installed
data = data.values
else:
if not hasattr(data, 'shape'):
raise TypeError('Error when checking model ' +
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
'tests': ['pytest',
'pytest-pep8',
'pytest-xdist',
'pytest-cov'],
'pytest-cov',
'pandas'],
},
classifiers=[
'Development Status :: 5 - Production/Stable',
Expand Down
14 changes: 14 additions & 0 deletions tests/keras/engine/test_training.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
import numpy as np
import pandas as pd
from numpy.testing import assert_allclose
import sys
import scipy.sparse as sparse
Expand Down Expand Up @@ -106,9 +107,13 @@ def test_model_methods():

input_a_np = np.random.random((10, 3))
input_b_np = np.random.random((10, 3))
input_a_df = pd.DataFrame(input_a_np)
input_b_df = pd.DataFrame(input_b_np)

output_a_np = np.random.random((10, 4))
output_b_np = np.random.random((10, 3))
output_a_df = pd.DataFrame(output_a_np)
output_b_df = pd.DataFrame(output_b_np)

# training/testing doesn't work before compiling.
with pytest.raises(RuntimeError):
Expand All @@ -124,6 +129,8 @@ def test_model_methods():
[output_a_np, output_b_np])
out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
{'dense_1': output_a_np, 'dropout': output_b_np})
out = model.train_on_batch([input_a_df, input_b_df],
[output_a_df, output_b_df])

# test fit
out = model.fit([input_a_np, input_b_np],
Expand All @@ -133,6 +140,8 @@ def test_model_methods():
out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
{'dense_1': output_a_np, 'dropout': output_b_np},
epochs=1, batch_size=4)
out = model.fit([input_a_df, input_b_df],
[output_a_df, output_b_df], epochs=1, batch_size=4)

# test validation_split
out = model.fit([input_a_np, input_b_np],
Expand Down Expand Up @@ -165,10 +174,13 @@ def test_model_methods():
[output_a_np, output_b_np])
out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
{'dense_1': output_a_np, 'dropout': output_b_np})
out = model.test_on_batch([input_a_df, input_b_df],
[output_a_df, output_b_df])

# predict_on_batch
out = model.predict_on_batch([input_a_np, input_b_np])
out = model.predict_on_batch({'input_a': input_a_np, 'input_b': input_b_np})
out = model.predict_on_batch([input_a_df, input_b_df])

# predict, evaluate
input_a_np = np.random.random((10, 3))
Expand All @@ -178,7 +190,9 @@ def test_model_methods():
output_b_np = np.random.random((10, 3))

out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4)
out = model.evaluate([input_a_df, input_b_df], [output_a_df, output_b_df], batch_size=4)
out = model.predict([input_a_np, input_b_np], batch_size=4)
out = model.predict([input_a_df, input_b_df], batch_size=4)

# with sample_weight
input_a_np = np.random.random((10, 3))
Expand Down

0 comments on commit c173b76

Please sign in to comment.