change script so that results are saved in tables (needed to generate…

… tables)
GeorgeBatch · Sep 9, 2020 · c944108 · c944108
1 parent 101ea0c
commit c944108
Show file tree

Hide file tree

Showing 6 changed files with 149 additions and 0 deletions.
diff --git a/ci_comparison_tables/mult_runs_corr_p_val_ciwidth_percentile.csv b/ci_comparison_tables/mult_runs_corr_p_val_ciwidth_percentile.csv
@@ -0,0 +1,5 @@
+,Random Forests,Gaussian Processes
+FreeSolv,[-0.332  0.   ],[-0.96  0.  ]
+ESOL-full,[0.228 0.001],[-0.98  0.  ]
+ESOL-reduced,[0.45 0.  ],[-0.98  0.  ]
+Lipophilicity,[0.726 0.   ],[-0.991  0.   ]
diff --git a/ci_comparison_tables/mult_runs_corr_rmse_percentile_pm_stds.csv b/ci_comparison_tables/mult_runs_corr_rmse_percentile_pm_stds.csv
@@ -0,0 +1,5 @@
+,Random Forests,Gaussian Processes
+FreeSolv,-0.242 +/- 0.53,-0.898 +/- 0.067
+ESOL-full,-0.04 +/- 0.475,-0.946 +/- 0.027
+ESOL-reduced,0.155 +/- 0.38,-0.946 +/- 0.028
+Lipophilicity,0.268 +/- 0.346,-0.973 +/- 0.016
diff --git a/ci_comparison_tables/mult_runs_total_rmse_pm_std.csv b/ci_comparison_tables/mult_runs_total_rmse_pm_std.csv
@@ -0,0 +1,5 @@
+,Random Forests,Gaussian Processes
+FreeSolv,1.177 +/- 0.264,1.417 +/- 0.251
+ESOL-full,0.638 +/- 0.041,0.653 +/- 0.063
+ESOL-reduced,0.68 +/- 0.046,0.657 +/- 0.063
+Lipophilicity,0.671 +/- 0.02,0.637 +/- 0.024
diff --git a/ci_comparison_tables/mult_runs_within95_pm_std.csv b/ci_comparison_tables/mult_runs_within95_pm_std.csv
@@ -0,0 +1,5 @@
+,Random Forests,Gaussian Processes
+FreeSolv,0.994 +/- 0.009,0.996 +/- 0.005
+ESOL-full,0.968 +/- 0.02,0.922 +/- 0.015
+ESOL-reduced,0.919 +/- 0.047,0.922 +/- 0.015
+Lipophilicity,0.811 +/- 0.042,0.938 +/- 0.009
diff --git a/ci_comparison_tables/one_run_correlations_p_values.csv b/ci_comparison_tables/one_run_correlations_p_values.csv
@@ -0,0 +1,5 @@
+,Random Forests,Gaussian Processes
+FreeSolv,"(-0.65, 0.0)","(-0.85, 0.0)"
+ESOL-full,"(0.71, 0.0)","(-0.96, 0.0)"
+ESOL-reduced,"(-0.16, 0.01461)","(-0.96, 0.0)"
+Lipophilicity,"(0.73, 0.0)","(-0.97, 0.0)"
diff --git a/scripts/ci_plots_script.py b/scripts/ci_plots_script.py
@@ -80,6 +80,20 @@
 }
 
 
+# ----------------------------------------------------------------------------
+# variables to save things while the loop runs
+#
+# one run
+one_run_correlations_p_values = {}
+# mult runs mean, std
+mult_runs_corr_rmse_percentile_pm_stds = {}
+mult_runs_rmse_pm_std = {}
+mult_runs_within95_pm_std = {}
+# mult runs, correlation of ci-width vs percentile with p-value
+mult_runs_corr_p_val_ciwidth_percentile = {}
+
+
+
 # ----------------------------------------------------------------------------
 # main loop
 
@@ -90,6 +104,20 @@
     # report precision
     rp = datasets_to_rounding_precision[dataset]
 
+    # for reocrding on all datasets
+    #
+    # one run
+    one_run_correlations_p_values[f'{dataset}_{cf}'] = {}
+    # mult runs mean, std
+    mult_runs_corr_rmse_percentile_pm_stds[f'{dataset}_{cf}'] = {}
+    mult_runs_rmse_pm_std[f'{dataset}_{cf}'] = {}
+    mult_runs_within95_pm_std[f'{dataset}_{cf}'] = {}
+    # mult runs, correlation of ci-width vs percentile with p-value
+    mult_runs_corr_p_val_ciwidth_percentile[f'{dataset}_{cf}'] = {}
+
+
+
+
     for model in ['rf', 'gp']:
         assert model in ['rf', 'gp']
 
@@ -171,6 +199,11 @@
         plt.savefig(f'{PLOTS_DIR}/ci_plots/cumulrmse_vs_confidence_one_run_{dataset}_{cf}_{model}.png', dpi=DPI, bbox_inches='tight')
         plt.close()
 
+        # --------------------------------------------------------------------
+        # record one-run correlations and p-values
+        corr, p_val = pearsonr(confidence_percentiles, flipped_cumul_rmse)
+        one_run_correlations_p_values[f'{dataset}_{cf}'][model] = round(corr, 2), round(p_val, 5)
+
 
         # --------------------------------------------------------------------
         # multiple runs
@@ -241,6 +274,34 @@
         flipped_cumulrmse_upper = flipped_cumulrmse_mean + 1.96*flipped_cumulrmse_sdt
 
 
+        ######################################################################
+
+        # --------------------------------------------------------------------
+        # correlation mean +/- std of cumulrmse vs percentile
+        corr_mean = np.mean(cumulrmse_vs_percentile_corr_mult_runs).round(3)
+        corr_std = np.std(cumulrmse_vs_percentile_corr_mult_runs).round(3)
+        mult_runs_corr_rmse_percentile_pm_stds[f'{dataset}_{cf}'][model] = f'{corr_mean} +/- {corr_std}'
+
+        # --------------------------------------------------------------------
+        # rmse mean +/- std
+        rmse_mean = np.mean(rmse_mult_runs).round(3)
+        rmse_std = np.std(rmse_mult_runs).round(3)
+        mult_runs_rmse_pm_std[f'{dataset}_{cf}'][model] = f'{rmse_mean} +/- {rmse_std}'
+
+
+        # --------------------------------------------------------------------
+        # within95 mean +/- std
+        within95_mean = np.mean(within_95_cis_mult_runs).round(3)
+        within95_std = np.std(within_95_cis_mult_runs).round(3)
+        mult_runs_within95_pm_std[f'{dataset}_{cf}'][model] = f'{within95_mean} +/- {within95_std}'
+
+        # --------------------------------------------------------------------
+        # correlation, and p-value of ci width against percentile
+        mult_runs_corr_p_val_ciwidth_percentile[f'{dataset}_{cf}'][model] = \
+            np.round(pearsonr(flipped_cumulrmse_sdt, confidence_percentiles), 3)
+
+        ######################################################################
+
         # --------------------------------------------------------------------
         # big plots together
 
@@ -270,3 +331,66 @@
         plt.close()
 
         print()
+
+
+
+row_mapper = {
+    'freesolv_full': 'FreeSolv',
+    'esol_full': 'ESOL-full',
+    'esol_reduced': 'ESOL-reduced',
+    'lipophilicity_full': 'Lipophilicity'
+}
+
+row_order = ['FreeSolv', 'ESOL-full', 'ESOL-reduced', 'Lipophilicity']
+
+column_mapper = {
+    'rf': 'Random Forests',
+    'gp': 'Gaussian Processes'
+}
+
+row_order = ['FreeSolv', 'ESOL-full', 'ESOL-reduced', 'Lipophilicity']
+column_order  = ['Random Forests', 'Gaussian Processes']
+
+
+print("\nOne run correlations and p-values:")
+df = pd.DataFrame(one_run_correlations_p_values).T
+df = df.rename(mapper=row_mapper, axis='index')
+df = df.rename(mapper=column_mapper, axis='columns')
+df = df.loc[row_order, column_order]
+print(df)
+df.to_csv('../ci_comparison_tables/one_run_correlations_p_values.csv', index=True)
+
+print("\ncorrelation mean +/- std of cumulrmse vs percentile:")
+df = pd.DataFrame(mult_runs_corr_rmse_percentile_pm_stds).T
+df = df.rename(mapper=row_mapper, axis='index')
+df = df.rename(mapper=column_mapper, axis='columns')
+df = df.loc[row_order, column_order]
+print(df)
+df.to_csv('../ci_comparison_tables/mult_runs_corr_rmse_percentile_pm_stds.csv', index=True)
+
+
+print("\nrmse mean +/- std:")
+df = pd.DataFrame(mult_runs_rmse_pm_std).T
+df = df.rename(mapper=row_mapper, axis='index')
+df = df.rename(mapper=column_mapper, axis='columns')
+df = df.loc[row_order, column_order]
+df = df.loc[row_order, column_order]
+print(df)
+df.to_csv('../ci_comparison_tables/mult_runs_total_rmse_pm_std.csv', index=True)
+
+
+print("\nwithin95 mean +/- std:")
+df = pd.DataFrame(mult_runs_within95_pm_std).T
+df = df.rename(mapper=row_mapper, axis='index')
+df = df.rename(mapper=column_mapper, axis='columns')
+df = df.loc[row_order, column_order]
+print(df)
+df.to_csv('../ci_comparison_tables/mult_runs_within95_pm_std.csv', index=True)
+
+print("\ncorrelation, and p-value of ci width against percentile:")
+df = pd.DataFrame(mult_runs_corr_p_val_ciwidth_percentile).T
+df = df.rename(mapper=row_mapper, axis='index')
+df = df.rename(mapper=column_mapper, axis='columns')
+df = df.loc[row_order, column_order]
+print(df)
+df.to_csv('../ci_comparison_tables/mult_runs_corr_p_val_ciwidth_percentile.csv', index=True)