Skip to content

Commit

Permalink
sorting of encoded datasets correct
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreasLH committed Dec 14, 2022
1 parent b0b40f4 commit 158effc
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions dataloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,18 @@ def dataset_info_restructure(dataset_name, data):
inv_var_dtype[x] = k
# finding number of values per variable
var_info = {}

offset = 0
for idx, variable_name in enumerate(list(data.columns)):
if inv_var_dtype[idx] == 'categorical':
new_columns = pd.get_dummies(data[variable_name])
new_columns_names = list(variable_name + '_' + new_columns.columns.astype('str'))
data[new_columns_names] = new_columns
for i, name in enumerate(new_columns_names):
data.insert(loc=idx+i+1+offset, column=name, value=new_columns.iloc[:,i])
# data[new_columns_names] = new_columns
num_unique = len(new_columns_names) # num unique values
# dropping original dataframe
offset += num_unique
offset -= 1
data.drop(columns=variable_name, inplace=True)
var_info[idx] = {'name': variable_name, 'dtype': 'categorical', 'num_vals': num_unique}

Expand Down

0 comments on commit 158effc

Please sign in to comment.