Skip to content

Commit

Permalink
fix fillna (openproblems-bio#53)
Browse files Browse the repository at this point in the history
  • Loading branch information
rcannood authored May 30, 2024
1 parent 22fc360 commit 7404b19
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
4 changes: 2 additions & 2 deletions src/task/methods/lgc_ensemble_helpers/prepare_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def prepare_data(par, paths):
mean_sm_name = de_sm_name.groupby('sm_name').mean().reset_index()
std_cell_type = de_cell_type.groupby('cell_type').std().reset_index()
std_sm_name = de_sm_name.groupby('sm_name').std().reset_index()
std_sm_name_filled = std_sm_name.fillna(0)
std_sm_name = std_sm_name.fillna(0)
cell_types = de_cell_type.groupby('cell_type').quantile(0.1).reset_index()['cell_type'] # This is just to get cell types in the right order for the next line
quantiles_cell_type = pd.concat([pd.DataFrame(cell_types)]+[de_cell_type.groupby('cell_type')[col]\
.quantile([0.25, 0.50, 0.75], interpolation='linear').unstack().reset_index(drop=True) for col in list(de_train.columns)[5:]], axis=1)
Expand All @@ -31,7 +31,7 @@ def prepare_data(par, paths):
mean_cell_type.to_csv(f'{paths["train_data_aug_dir"]}/mean_cell_type.csv', index=False)
std_cell_type.to_csv(f'{paths["train_data_aug_dir"]}/std_cell_type.csv', index=False)
mean_sm_name.to_csv(f'{paths["train_data_aug_dir"]}/mean_sm_name.csv', index=False)
std_sm_name_filled.to_csv(f'{paths["train_data_aug_dir"]}/std_sm_name.csv', index=False)
std_sm_name.to_csv(f'{paths["train_data_aug_dir"]}/std_sm_name.csv', index=False)
quantiles_cell_type.to_csv(f'{paths["train_data_aug_dir"]}/quantiles_cell_type.csv', index=False)
## Create one hot encoding features
one_hot_encode(de_train[["cell_type", "sm_name"]], id_map[["cell_type", "sm_name"]], out_dir=paths["train_data_aug_dir"])
Expand Down
4 changes: 2 additions & 2 deletions src/task/methods/lgc_ensemble_prepare/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
mean_sm_name = de_sm_name.groupby('sm_name').mean().reset_index()
std_cell_type = de_cell_type.groupby('cell_type').std().reset_index()
std_sm_name = de_sm_name.groupby('sm_name').std().reset_index()
std_sm_name_filled = std_sm_name.fillna(0)
std_sm_name = std_sm_name.fillna(0)
cell_types = de_cell_type.groupby('cell_type').quantile(0.1).reset_index()['cell_type'] # This is just to get cell types in the right order for the next line
quantiles_cell_type = pd.concat(
[pd.DataFrame(cell_types)] +
Expand All @@ -75,7 +75,7 @@
mean_cell_type.to_csv(f'{par["train_data_aug_dir"]}/mean_cell_type.csv', index=False)
std_cell_type.to_csv(f'{par["train_data_aug_dir"]}/std_cell_type.csv', index=False)
mean_sm_name.to_csv(f'{par["train_data_aug_dir"]}/mean_sm_name.csv', index=False)
std_sm_name_filled.to_csv(f'{par["train_data_aug_dir"]}/std_sm_name.csv', index=False)
std_sm_name.to_csv(f'{par["train_data_aug_dir"]}/std_sm_name.csv', index=False)
quantiles_cell_type.to_csv(f'{par["train_data_aug_dir"]}/quantiles_cell_type.csv', index=False)
with open(f'{par["train_data_aug_dir"]}/gene_names.json', 'w') as f:
json.dump(gene_names, f)
Expand Down

0 comments on commit 7404b19

Please sign in to comment.