Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Removed EntitySet._import_from_dataframe #346

Merged
merged 2 commits into from
Dec 13, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 21 additions & 58 deletions featuretools/entityset/entityset.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,12 +690,20 @@ def entity_from_dataframe(self,
es["transactions"].df

"""
return self._import_from_dataframe(entity_id, dataframe.copy(), index=index,
make_index=make_index,
time_index=time_index,
secondary_time_index=secondary_time_index,
variable_types=variable_types,
already_sorted=already_sorted)
variable_types = variable_types or {}
entity = Entity(
entity_id,
dataframe,
self,
variable_types=variable_types,
index=index,
time_index=time_index,
secondary_time_index=secondary_time_index,
already_sorted=already_sorted,
make_index=make_index)
self.entity_dict[entity.id] = entity
self.reset_metadata()
return self

def normalize_entity(self, base_entity_id, new_entity_id, index,
additional_variables=None, copy_variables=None,
Expand Down Expand Up @@ -824,12 +832,13 @@ def normalize_entity(self, base_entity_id, new_entity_id, index,
ti_cols = [c if c != old_ti_name else secondary_time_index for c in ti_cols]
make_secondary_time_index = {secondary_time_index: ti_cols}

self._import_from_dataframe(new_entity_id, new_entity_df,
index,
time_index=new_entity_time_index,
secondary_time_index=make_secondary_time_index,
last_time_index=None,
variable_types=transfer_types)
self.entity_from_dataframe(
new_entity_id,
new_entity_df,
index,
time_index=new_entity_time_index,
secondary_time_index=make_secondary_time_index,
variable_types=transfer_types)

for v in additional_variables:
self.entity_dict[base_entity_id].delete_variable(v)
Expand Down Expand Up @@ -1083,52 +1092,6 @@ def related_instances(self, start_entity_id, final_entity_id,
# Private methods ######################################################
###########################################################################

def _import_from_dataframe(self,
entity_id,
dataframe,
index=None,
variable_types=None,
make_index=False,
time_index=None,
secondary_time_index=None,
last_time_index=None,
already_sorted=False):
"""
Load the data for a specified entity from a pandas dataframe.

Args:
entity_id (str) : Unique id to associate with this entity.
dataframe (pd.DataFrame) : Pandas dataframe containing the data.
index (str, optional): Name of the variable used to index the entity.
If None, take the first column.
variable_types (dict[str -> dict[str -> type]]) : Optional mapping of
entity_id to variable_types dict with which to initialize an
entity's store.
make_index (bool, optional) : If True, assume index does not exist as a column in
dataframe, and create a new column of that name using integers the (0, len(dataframe)).
Otherwise, assume index exists in dataframe.
time_index (str, optional) : Name of column to use as a time index for this entity. Must be
a Datetime or Numeric dtype.
secondary_time_index (str, optional): Name of variable containing
time data to use a second time index for the entity.
already_sorted (bool, optional) : If True, assumes that input dataframe is already sorted by time.
Defaults to False.
"""
variable_types = variable_types or {}
entity = Entity(entity_id,
dataframe,
self,
variable_types=variable_types,
index=index,
time_index=time_index,
secondary_time_index=secondary_time_index,
last_time_index=last_time_index,
already_sorted=already_sorted,
make_index=make_index)
self.entity_dict[entity.id] = entity
self.reset_metadata()
return self

def _add_multigenerational_link_vars(self, frames, start_entity_id,
end_entity_id=None, path=None):
"""
Expand Down
16 changes: 11 additions & 5 deletions featuretools/tests/entityset_tests/test_es.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,9 +322,12 @@ def test_converts_datetime():
'time': variable_types.Datetime}

entityset = EntitySet(id='test')
entityset._import_from_dataframe(entity_id='test_entity', index='id',
time_index="time", variable_types=vtypes,
dataframe=df)
entityset.entity_from_dataframe(
entity_id='test_entity',
index='id',
time_index="time",
variable_types=vtypes,
dataframe=df)
pd_col = entityset['test_entity'].df['time']
# assert type(entityset['test_entity']['time']) == variable_types.Datetime
assert type(pd_col[0]) == pd.Timestamp
Expand All @@ -343,8 +346,11 @@ def test_handles_datetime_format():
'time_no_format': variable_types.Datetime}

entityset = EntitySet(id='test')
entityset._import_from_dataframe(entity_id='test_entity', index='id',
variable_types=vtypes, dataframe=df)
entityset.entity_from_dataframe(
entity_id='test_entity',
index='id',
variable_types=vtypes,
dataframe=df)

col_format = entityset['test_entity'].df['time_format']
col_no_format = entityset['test_entity'].df['time_no_format']
Expand Down