Skip to content

Commit

Permalink
change script so that results are saved in tables (needed to generate…
Browse files Browse the repository at this point in the history
… tables)
  • Loading branch information
GeorgeBatch committed Sep 9, 2020
1 parent 101ea0c commit c944108
Show file tree
Hide file tree
Showing 6 changed files with 149 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
,Random Forests,Gaussian Processes
FreeSolv,[-0.332 0. ],[-0.96 0. ]
ESOL-full,[0.228 0.001],[-0.98 0. ]
ESOL-reduced,[0.45 0. ],[-0.98 0. ]
Lipophilicity,[0.726 0. ],[-0.991 0. ]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
,Random Forests,Gaussian Processes
FreeSolv,-0.242 +/- 0.53,-0.898 +/- 0.067
ESOL-full,-0.04 +/- 0.475,-0.946 +/- 0.027
ESOL-reduced,0.155 +/- 0.38,-0.946 +/- 0.028
Lipophilicity,0.268 +/- 0.346,-0.973 +/- 0.016
5 changes: 5 additions & 0 deletions ci_comparison_tables/mult_runs_total_rmse_pm_std.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
,Random Forests,Gaussian Processes
FreeSolv,1.177 +/- 0.264,1.417 +/- 0.251
ESOL-full,0.638 +/- 0.041,0.653 +/- 0.063
ESOL-reduced,0.68 +/- 0.046,0.657 +/- 0.063
Lipophilicity,0.671 +/- 0.02,0.637 +/- 0.024
5 changes: 5 additions & 0 deletions ci_comparison_tables/mult_runs_within95_pm_std.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
,Random Forests,Gaussian Processes
FreeSolv,0.994 +/- 0.009,0.996 +/- 0.005
ESOL-full,0.968 +/- 0.02,0.922 +/- 0.015
ESOL-reduced,0.919 +/- 0.047,0.922 +/- 0.015
Lipophilicity,0.811 +/- 0.042,0.938 +/- 0.009
5 changes: 5 additions & 0 deletions ci_comparison_tables/one_run_correlations_p_values.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
,Random Forests,Gaussian Processes
FreeSolv,"(-0.65, 0.0)","(-0.85, 0.0)"
ESOL-full,"(0.71, 0.0)","(-0.96, 0.0)"
ESOL-reduced,"(-0.16, 0.01461)","(-0.96, 0.0)"
Lipophilicity,"(0.73, 0.0)","(-0.97, 0.0)"
124 changes: 124 additions & 0 deletions scripts/ci_plots_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,20 @@
}


# ----------------------------------------------------------------------------
# variables to save things while the loop runs
#
# one run
one_run_correlations_p_values = {}
# mult runs mean, std
mult_runs_corr_rmse_percentile_pm_stds = {}
mult_runs_rmse_pm_std = {}
mult_runs_within95_pm_std = {}
# mult runs, correlation of ci-width vs percentile with p-value
mult_runs_corr_p_val_ciwidth_percentile = {}



# ----------------------------------------------------------------------------
# main loop

Expand All @@ -90,6 +104,20 @@
# report precision
rp = datasets_to_rounding_precision[dataset]

# for reocrding on all datasets
#
# one run
one_run_correlations_p_values[f'{dataset}_{cf}'] = {}
# mult runs mean, std
mult_runs_corr_rmse_percentile_pm_stds[f'{dataset}_{cf}'] = {}
mult_runs_rmse_pm_std[f'{dataset}_{cf}'] = {}
mult_runs_within95_pm_std[f'{dataset}_{cf}'] = {}
# mult runs, correlation of ci-width vs percentile with p-value
mult_runs_corr_p_val_ciwidth_percentile[f'{dataset}_{cf}'] = {}




for model in ['rf', 'gp']:
assert model in ['rf', 'gp']

Expand Down Expand Up @@ -171,6 +199,11 @@
plt.savefig(f'{PLOTS_DIR}/ci_plots/cumulrmse_vs_confidence_one_run_{dataset}_{cf}_{model}.png', dpi=DPI, bbox_inches='tight')
plt.close()

# --------------------------------------------------------------------
# record one-run correlations and p-values
corr, p_val = pearsonr(confidence_percentiles, flipped_cumul_rmse)
one_run_correlations_p_values[f'{dataset}_{cf}'][model] = round(corr, 2), round(p_val, 5)


# --------------------------------------------------------------------
# multiple runs
Expand Down Expand Up @@ -241,6 +274,34 @@
flipped_cumulrmse_upper = flipped_cumulrmse_mean + 1.96*flipped_cumulrmse_sdt


######################################################################

# --------------------------------------------------------------------
# correlation mean +/- std of cumulrmse vs percentile
corr_mean = np.mean(cumulrmse_vs_percentile_corr_mult_runs).round(3)
corr_std = np.std(cumulrmse_vs_percentile_corr_mult_runs).round(3)
mult_runs_corr_rmse_percentile_pm_stds[f'{dataset}_{cf}'][model] = f'{corr_mean} +/- {corr_std}'

# --------------------------------------------------------------------
# rmse mean +/- std
rmse_mean = np.mean(rmse_mult_runs).round(3)
rmse_std = np.std(rmse_mult_runs).round(3)
mult_runs_rmse_pm_std[f'{dataset}_{cf}'][model] = f'{rmse_mean} +/- {rmse_std}'


# --------------------------------------------------------------------
# within95 mean +/- std
within95_mean = np.mean(within_95_cis_mult_runs).round(3)
within95_std = np.std(within_95_cis_mult_runs).round(3)
mult_runs_within95_pm_std[f'{dataset}_{cf}'][model] = f'{within95_mean} +/- {within95_std}'

# --------------------------------------------------------------------
# correlation, and p-value of ci width against percentile
mult_runs_corr_p_val_ciwidth_percentile[f'{dataset}_{cf}'][model] = \
np.round(pearsonr(flipped_cumulrmse_sdt, confidence_percentiles), 3)

######################################################################

# --------------------------------------------------------------------
# big plots together

Expand Down Expand Up @@ -270,3 +331,66 @@
plt.close()

print()



row_mapper = {
'freesolv_full': 'FreeSolv',
'esol_full': 'ESOL-full',
'esol_reduced': 'ESOL-reduced',
'lipophilicity_full': 'Lipophilicity'
}

row_order = ['FreeSolv', 'ESOL-full', 'ESOL-reduced', 'Lipophilicity']

column_mapper = {
'rf': 'Random Forests',
'gp': 'Gaussian Processes'
}

row_order = ['FreeSolv', 'ESOL-full', 'ESOL-reduced', 'Lipophilicity']
column_order = ['Random Forests', 'Gaussian Processes']


print("\nOne run correlations and p-values:")
df = pd.DataFrame(one_run_correlations_p_values).T
df = df.rename(mapper=row_mapper, axis='index')
df = df.rename(mapper=column_mapper, axis='columns')
df = df.loc[row_order, column_order]
print(df)
df.to_csv('../ci_comparison_tables/one_run_correlations_p_values.csv', index=True)

print("\ncorrelation mean +/- std of cumulrmse vs percentile:")
df = pd.DataFrame(mult_runs_corr_rmse_percentile_pm_stds).T
df = df.rename(mapper=row_mapper, axis='index')
df = df.rename(mapper=column_mapper, axis='columns')
df = df.loc[row_order, column_order]
print(df)
df.to_csv('../ci_comparison_tables/mult_runs_corr_rmse_percentile_pm_stds.csv', index=True)


print("\nrmse mean +/- std:")
df = pd.DataFrame(mult_runs_rmse_pm_std).T
df = df.rename(mapper=row_mapper, axis='index')
df = df.rename(mapper=column_mapper, axis='columns')
df = df.loc[row_order, column_order]
df = df.loc[row_order, column_order]
print(df)
df.to_csv('../ci_comparison_tables/mult_runs_total_rmse_pm_std.csv', index=True)


print("\nwithin95 mean +/- std:")
df = pd.DataFrame(mult_runs_within95_pm_std).T
df = df.rename(mapper=row_mapper, axis='index')
df = df.rename(mapper=column_mapper, axis='columns')
df = df.loc[row_order, column_order]
print(df)
df.to_csv('../ci_comparison_tables/mult_runs_within95_pm_std.csv', index=True)

print("\ncorrelation, and p-value of ci width against percentile:")
df = pd.DataFrame(mult_runs_corr_p_val_ciwidth_percentile).T
df = df.rename(mapper=row_mapper, axis='index')
df = df.rename(mapper=column_mapper, axis='columns')
df = df.loc[row_order, column_order]
print(df)
df.to_csv('../ci_comparison_tables/mult_runs_corr_p_val_ciwidth_percentile.csv', index=True)

0 comments on commit c944108

Please sign in to comment.