update threshold limits (openproblems-bio#49)

Paulos2411 · May 23, 2024 · a312058 · a312058
1 parent 3650466
commit a312058
Show file tree

Hide file tree

Showing 4 changed files with 38 additions and 90 deletions.
diff --git a/src/task/metrics/mean_cosine_sim/config.vsh.yaml b/src/task/metrics/mean_cosine_sim/config.vsh.yaml
@@ -19,19 +19,10 @@ functionality:
         min: -1
         max: 1
         maximize: true
-      - name: mean_cosine_sim_clipped_05
-        label: Mean Cosine Similarity clipped at 0.05
-        summary: The mean of cosine similarities per row (perturbation). Values are clipped to 0.05 adjusted p-values.
-        description: This metric is the same as `mean_cosine_sim`, but with the values clipped to [-log10(0.05), log10(0.05)].
-        repository_url: null
-        documentation_url: null
-        min: -1
-        max: 1
-        maximize: true
-      - name: mean_cosine_sim_clipped_01
-        label: Mean Cosine Similarity clipped at 0.01
-        summary: The mean of cosine similarities per row (perturbation). Values are clipped to 0.01 adjusted p-values.
-        description: This metric is the same as `mean_cosine_sim`, but with the values clipped to [-log10(0.01), log10(0.01)].
+      - name: mean_cosine_sim_clipped_0001
+        label: Mean Cosine Similarity clipped at 0.0001
+        summary: The mean of cosine similarities per row (perturbation). Values are clipped to 0.0001 adjusted p-values.
+        description: This metric is the same as `mean_cosine_sim`, but with the values clipped to [-log10(0.0001), log10(0.0001)].
         repository_url: null
         documentation_url: null
         min: -1

diff --git a/src/task/metrics/mean_cosine_sim/script.py b/src/task/metrics/mean_cosine_sim/script.py
@@ -21,55 +21,43 @@
 prediction = prediction[genes]
 
 print("Clipping values", flush=True)
-threshold_05 = -np.log10(0.05)
-de_test_X_clipped_05 = np.clip(de_test_X, -threshold_05, threshold_05)
-prediction_clipped_05 = np.clip(prediction.values, -threshold_05, threshold_05)
-threshold_01 = -np.log10(0.01)
-de_test_X_clipped_01 = np.clip(de_test_X, -threshold_01, threshold_01)
-prediction_clipped_01 = np.clip(prediction.values, -threshold_01, threshold_01)
+threshold_0001 = -np.log10(0.0001)
+de_test_X_clipped_0001 = np.clip(de_test_X, -threshold_0001, threshold_0001)
+prediction_clipped_0001 = np.clip(prediction.values, -threshold_0001, threshold_0001)
 
 print("Calculate mean cosine similarity", flush=True)
 mean_cosine_similarity = 0
-mean_cosine_similarity_clipped_05 = 0
-mean_cosine_similarity_clipped_01 = 0
+mean_cosine_similarity_clipped_0001 = 0
 for i in range(de_test_X.shape[0]):
     y_i = de_test_X[i,]
     y_hat_i = prediction.iloc[i]
-    y_i_clipped_05 = de_test_X_clipped_05[i,]
-    y_hat_i_clipped_05 = prediction_clipped_05[i]
-    y_i_clipped_01 = de_test_X_clipped_01[i,]
-    y_hat_i_clipped_01 = prediction_clipped_01[i]
+    y_i_clipped_0001 = de_test_X_clipped_0001[i,]
+    y_hat_i_clipped_0001 = prediction_clipped_0001[i]
 
     dot_product = np.dot(y_i, y_hat_i)
-    dot_product_clipped_05 = np.dot(y_i_clipped_05, y_hat_i_clipped_05)
-    dot_product_clipped_01 = np.dot(y_i_clipped_01, y_hat_i_clipped_01)
+    dot_product_clipped_0001 = np.dot(y_i_clipped_0001, y_hat_i_clipped_0001)
 
     norm_y_i = np.linalg.norm(y_i)
-    norm_y_i_clipped_05 = np.linalg.norm(y_i_clipped_05)
-    norm_y_i_clipped_01 = np.linalg.norm(y_i_clipped_01)
+    norm_y_i_clipped_0001 = np.linalg.norm(y_i_clipped_0001)
     norm_y_hat_i = np.linalg.norm(y_hat_i)
-    norm_y_hat_i_clipped_05 = np.linalg.norm(y_hat_i_clipped_05)
-    norm_y_hat_i_clipped_01 = np.linalg.norm(y_hat_i_clipped_01)
+    norm_y_hat_i_clipped_0001 = np.linalg.norm(y_hat_i_clipped_0001)
 
     cosine_similarity = dot_product / (norm_y_i * norm_y_hat_i)
-    cosine_similarity_clipped_05 = dot_product_clipped_05 / (norm_y_i_clipped_05 * norm_y_hat_i_clipped_05)
-    cosine_similarity_clipped_01 = dot_product_clipped_01 / (norm_y_i_clipped_01 * norm_y_hat_i_clipped_01)
+    cosine_similarity_clipped_0001 = dot_product_clipped_0001 / (norm_y_i_clipped_0001 * norm_y_hat_i_clipped_0001)
 
     mean_cosine_similarity += cosine_similarity
-    mean_cosine_similarity_clipped_05 += cosine_similarity_clipped_05
-    mean_cosine_similarity_clipped_01 += cosine_similarity_clipped_01
+    mean_cosine_similarity_clipped_0001 += cosine_similarity_clipped_0001
 
 mean_cosine_similarity /= de_test_X.shape[0]
-mean_cosine_similarity_clipped_05 /= de_test_X.shape[0]
-mean_cosine_similarity_clipped_01 /= de_test_X.shape[0]
+mean_cosine_similarity_clipped_0001 /= de_test_X.shape[0]
 
 print("Create output", flush=True)
 output = ad.AnnData(
     uns={
         "dataset_id": de_test.uns["dataset_id"],
         "method_id": par["method_id"],
-        "metric_ids": ["mean_cosine_sim", "mean_cosine_sim_clipped_05", "mean_cosine_sim_clipped_01"],
-        "metric_values": [mean_cosine_similarity, mean_cosine_similarity_clipped_05, mean_cosine_similarity_clipped_01]
+        "metric_ids": ["mean_cosine_sim", "mean_cosine_sim_clipped_0001"],
+        "metric_values": [mean_cosine_similarity, mean_cosine_similarity_clipped_0001]
     }
 )
 

diff --git a/src/task/metrics/mean_rowwise_error/config.vsh.yaml b/src/task/metrics/mean_rowwise_error/config.vsh.yaml
@@ -19,19 +19,10 @@ functionality:
         min: 0
         max: "+inf"
         maximize: false
-      - name: mean_rowwise_rmse_clipped_05
-        label: Mean Rowwise RMSE clipped at 0.05
-        summary: The mean of the root mean squared error (RMSE) of each row in the matrix, where the values are clipped to 0.5 adjusted p-values
-        description: This metric is the same as `mean_rowwise_rmse`, but with the values clipped to [-log10(0.05), log10(0.05)].
-        repository_url: null
-        documentation_url: null
-        min: 0
-        max: "+inf"
-        maximize: false
-      - name: mean_rowwise_rmse_clipped_01
-        label: Mean Rowwise RMSE clipped at 0.01
-        summary: The mean of the root mean squared error (RMSE) of each row in the matrix, where the values are clipped to 0.1 adjusted p-values
-        description: This metric is the same as `mean_rowwise_rmse`, but with the values clipped to [-log10(0.01), log10(0.01)].
+      - name: mean_rowwise_rmse_clipped_0001
+        label: Mean Rowwise RMSE clipped at 0.0001
+        summary: The mean of the root mean squared error (RMSE) of each row in the matrix, where the values are clipped to 0.0001 adjusted p-values
+        description: This metric is the same as `mean_rowwise_rmse`, but with the values clipped to [-log10(0.0001), log10(0.0001)].
         repository_url: null
         documentation_url: null
         min: 0
@@ -53,19 +44,10 @@ functionality:
         min: 0
         max: "+inf"
         maximize: false
-      - name: mean_rowwise_mae_clipped_05
-        label: Mean Rowwise MAE clipped at 0.05
-        summary: The mean of the absolute error (MAE) of each row in the matrix. The values are clipped to 0.5 adjusted p-values.
-        description: This metric is the same as `mean_rowwise_mae`, but with the values clipped to [-log10(0.05), log10(0.05)].
-        repository_url: null
-        documentation_url: null
-        min: 0
-        max: "+inf"
-        maximize: false
-      - name: mean_rowwise_mae_clipped_01
-        label: Mean Rowwise MAE clipped at 0.01
-        summary: The mean of the absolute error (MAE) of each row in the matrix. The values are clipped to 0.1 adjusted p-values.
-        description: This metric is the same as `mean_rowwise_mae`, but with the values clipped to [-log10(0.01), log10(0.01)].
+      - name: mean_rowwise_mae_clipped_0001
+        label: Mean Rowwise MAE clipped at 0.0001
+        summary: The mean of the absolute error (MAE) of each row in the matrix. The values are clipped to 0.0001 adjusted p-values.
+        description: This metric is the same as `mean_rowwise_mae`, but with the values clipped to [-log10(0.0001), log10(0.0001)].
         repository_url: null
         documentation_url: null
         min: 0

diff --git a/src/task/metrics/mean_rowwise_error/script.py b/src/task/metrics/mean_rowwise_error/script.py
@@ -21,51 +21,38 @@
 prediction = prediction[genes]
 
 print("Clipping values", flush=True)
-threshold_05 = -np.log10(0.05)
-de_test_X_clipped_05 = np.clip(de_test_X, -threshold_05, threshold_05)
-prediction_clipped_05 = np.clip(prediction.values, -threshold_05, threshold_05)
-
-threshold_01 = -np.log10(0.01)
-de_test_X_clipped_01 = np.clip(de_test_X, -threshold_01, threshold_01)
-prediction_clipped_01 = np.clip(prediction.values, -threshold_01, threshold_01)
+threshold_0001 = -np.log10(0.0001)
+de_test_X_clipped_0001 = np.clip(de_test_X, -threshold_0001, threshold_0001)
+prediction_clipped_0001 = np.clip(prediction.values, -threshold_0001, threshold_0001)
 
 print("Calculate mean rowwise RMSE", flush=True)
 mean_rowwise_rmse = 0
-mean_rowwise_rmse_clipped_05 = 0
-mean_rowwise_rmse_clipped_01 = 0
+mean_rowwise_rmse_clipped_0001 = 0
 mean_rowwise_mae = 0
-mean_rowwise_mae_clipped_05 = 0
-mean_rowwise_mae_clipped_01 = 0
+mean_rowwise_mae_clipped_0001 = 0
 for i in range(de_test_X.shape[0]):
     diff = de_test_X[i,] - prediction.iloc[i]
-    diff_clipped_05 = de_test_X_clipped_05[i,] - prediction_clipped_05[i]
-    diff_clipped_01 = de_test_X_clipped_01[i,] - prediction_clipped_01[i]
+    diff_clipped_0001 = de_test_X_clipped_0001[i,] - prediction_clipped_0001[i]
 
     mean_rowwise_rmse += np.sqrt((diff**2).mean())
-    mean_rowwise_rmse_clipped_05 += np.sqrt((diff_clipped_05**2).mean())
-    mean_rowwise_rmse_clipped_01 += np.sqrt((diff_clipped_01**2).mean())
+    mean_rowwise_rmse_clipped_0001 += np.sqrt((diff_clipped_0001 ** 2).mean())
     mean_rowwise_mae += np.abs(diff).mean()
-    mean_rowwise_mae_clipped_05 += np.abs(diff_clipped_05).mean()
-    mean_rowwise_mae_clipped_01 += np.abs(diff_clipped_01).mean()
+    mean_rowwise_mae_clipped_0001 += np.abs(diff_clipped_0001).mean()
 
 mean_rowwise_rmse /= de_test.shape[0]
-mean_rowwise_rmse_clipped_05 /= de_test.shape[0]
-mean_rowwise_rmse_clipped_01 /= de_test.shape[0]
+mean_rowwise_rmse_clipped_0001 /= de_test.shape[0]
 mean_rowwise_mae /= de_test.shape[0]
-mean_rowwise_mae_clipped_05 /= de_test.shape[0]
-mean_rowwise_mae_clipped_01 /= de_test.shape[0]
+mean_rowwise_mae_clipped_0001 /= de_test.shape[0]
 
 print("Create output", flush=True)
 output = ad.AnnData(
     uns={
         "dataset_id": de_test.uns["dataset_id"],
         "method_id": par["method_id"],
         "metric_ids": ["mean_rowwise_rmse", "mean_rowwise_mae",
-                          "mean_rowwise_rmse_clipped_05", "mean_rowwise_mae_clipped_05",
-                          "mean_rowwise_rmse_clipped_01", "mean_rowwise_mae_clipped_01"],
+                          "mean_rowwise_rmse_clipped_0001", "mean_rowwise_mae_clipped_0001"],
         "metric_values": [mean_rowwise_rmse, mean_rowwise_mae,
-                          mean_rowwise_rmse_clipped_05, mean_rowwise_mae_clipped_05,
-                          mean_rowwise_rmse_clipped_01, mean_rowwise_mae_clipped_01]
+                          mean_rowwise_rmse_clipped_0001, mean_rowwise_mae_clipped_0001]
     }
 )