From 36504666e84c13ec8f84ad011579875b9386e5af Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Thu, 23 May 2024 12:31:58 +0200
Subject: [PATCH] Multiple fixes (#47)

* increase lstm memory

* clean up descriptions

* add mean pearson / spearman component

* fix metric

* add to wf
---
 .../lstm_gru_cnn_ensemble/config.vsh.yaml     |  2 +-
 .../metrics/mean_correlation/config.vsh.yaml  | 44 +++++++++++++++
 src/task/metrics/mean_correlation/script.py   | 54 +++++++++++++++++++
 .../metrics/mean_cosine_sim/config.vsh.yaml   | 28 +++-------
 .../mean_rowwise_error/config.vsh.yaml        | 48 ++++-------------
 .../workflows/run_benchmark/config.vsh.yaml   |  1 +
 src/task/workflows/run_benchmark/main.nf      |  3 +-
 7 files changed, 119 insertions(+), 61 deletions(-)
 create mode 100644 src/task/metrics/mean_correlation/config.vsh.yaml
 create mode 100644 src/task/metrics/mean_correlation/script.py

diff --git a/src/task/methods/lstm_gru_cnn_ensemble/config.vsh.yaml b/src/task/methods/lstm_gru_cnn_ensemble/config.vsh.yaml
index 0072a55c..332c02df 100644
--- a/src/task/methods/lstm_gru_cnn_ensemble/config.vsh.yaml
+++ b/src/task/methods/lstm_gru_cnn_ensemble/config.vsh.yaml
@@ -70,4 +70,4 @@ platforms:
   - type: native
   - type: nextflow
     directives:
-      label: [veryhightime,highmem,highcpu,highsharedmem,gpu]
+      label: [veryhightime, veryhighmem, highcpu, highsharedmem, gpu]
diff --git a/src/task/metrics/mean_correlation/config.vsh.yaml b/src/task/metrics/mean_correlation/config.vsh.yaml
new file mode 100644
index 00000000..7e295770
--- /dev/null
+++ b/src/task/metrics/mean_correlation/config.vsh.yaml
@@ -0,0 +1,44 @@
+__merge__: ../../api/comp_metric.yaml
+functionality:
+  name: mean_correlation
+  info:
+    metrics:
+      - name: mean_pearson
+        label: Mean Pearson
+        summary: The mean of Pearson correlations per row (perturbation).
+        description: |
+          We use the **Mean Pearson Correlation** to score submissions, computed as follows:
+
+          $$
+          \textrm{Mean-Pearson} = \frac{1}{R} \sum_{i=1}^R \frac{\sum_{j=1}^n (y_{ij} - \bar{y}_i)(\hat{y}_{ij} - \bar{\hat{y}}_i)}{\sqrt{\sum_{j=1}^n (y_{ij} - \bar{y}_i)^2 \sum_{j=1}^n (\hat{y}_{ij} - \bar{\hat{y}}_i)^2}}
+          $$
+
+          where $(R)$ is the number of scored rows, and $(y_{ij})$ and $(\hat{y}_{ij})$ are the actual and predicted values, respectively, for row $(i)$ and column $(j)$.
+        repository_url: null
+        documentation_url: null
+        min: -1
+        max: 1
+        maximize: true
+      - name: mean_spearman
+        label: Mean Spearman
+        summary: The mean of Spearman correlations per row (perturbation).
+        description: |
+          We use the **Mean Spearman Correlation** to score submissions, computed as follows:
+
+          $$
+          \textrm{Mean-Spearman} = \frac{1}{R} \sum_{i=1}^R \frac{\sum_{j=1}^n (\text{rank}(y_{ij}) - \bar{\text{rank}}_i)(\text{rank}(\hat{y}_{ij}) - \bar{\text{rank}}_i)}{\sqrt{\sum_{j=1}^n (\text{rank}(y_{ij}) - \bar{\text{rank}}_i)^2 \sum_{j=1}^n (\text{rank}(\hat{y}_{ij}) - \bar{\text{rank}}_i)^2}}
+          $$
+
+          where $(R)$ is the number of scored rows, and $(y_{ij})$ and $(\hat{y}_{ij})$ are the actual and predicted values, respectively, for row $(i)$ and column $(j)$.
+  resources:
+    - type: python_script
+      path: script.py
+platforms:
+  - type: docker
+    image: ghcr.io/openproblems-bio/base_python:1.0.4
+    setup:
+      - type: python
+        packages: [ fastparquet ]
+  - type: nextflow
+    directives:
+      label: [ midtime, highmem, highcpu ]
\ No newline at end of file
diff --git a/src/task/metrics/mean_correlation/script.py b/src/task/metrics/mean_correlation/script.py
new file mode 100644
index 00000000..aa06244e
--- /dev/null
+++ b/src/task/metrics/mean_correlation/script.py
@@ -0,0 +1,54 @@
+import pandas as pd
+import anndata as ad
+import numpy as np
+
+## VIASH START
+par = {
+    "de_test_h5ad": "resources/neurips-2023-kaggle/de_test.h5ad",
+    "prediction": "resources/neurips-2023-kaggle/prediction.parquet",
+    "method_id": "foo",
+    "output": "resources/neurips-2023-data/score.h5ad",
+}
+## VIASH END
+
+print("Load data", flush=True)
+de_test = ad.read_h5ad(par["de_test_h5ad"])
+prediction = pd.read_parquet(par["prediction"]).set_index('id')
+
+print("Select genes", flush=True)
+genes = list(de_test.var_names)
+de_test_X = de_test.layers["sign_log10_pval"]
+prediction = prediction[genes]
+
+print("Calculate mean pearson", flush=True)
+mean_pearson = 0
+mean_spearman = 0
+for i in range(de_test_X.shape[0]):
+    y_i = de_test_X[i,]
+    y_hat_i = prediction.iloc[i]
+
+    # compute ranks
+    r_i = y_i.argsort().argsort()
+    r_hat_i = y_hat_i.argsort().argsort()
+
+    pearson = np.corrcoef(y_i, y_hat_i)[0, 1]
+    spearman = np.corrcoef(r_i, r_hat_i)[0, 1]
+
+    mean_pearson += pearson
+    mean_spearman += spearman
+
+mean_pearson /= de_test_X.shape[0]
+mean_spearman /= de_test_X.shape[0]
+
+print("Create output", flush=True)
+output = ad.AnnData(
+    uns={
+        "dataset_id": de_test.uns["dataset_id"],
+        "method_id": par["method_id"],
+        "metric_ids": ["mean_pearson", "mean_spearman"],
+        "metric_values": [mean_pearson, mean_spearman]
+    }
+)
+
+print("Write output", flush=True)
+output.write_h5ad(par["output"], compression="gzip")
\ No newline at end of file
diff --git a/src/task/metrics/mean_cosine_sim/config.vsh.yaml b/src/task/metrics/mean_cosine_sim/config.vsh.yaml
index 24d86f40..71f179ac 100644
--- a/src/task/metrics/mean_cosine_sim/config.vsh.yaml
+++ b/src/task/metrics/mean_cosine_sim/config.vsh.yaml
@@ -13,39 +13,25 @@ functionality:
           \textrm{Mean-Cosine} = \frac{1}{R} \sum_{i=1}^R \frac{\mathbf{y}_i \cdot \mathbf{\hat{y}}_i}{\|\mathbf{y}_i\| \|\mathbf{\hat{y}}_i\|}
           $$
 
-          where $\\(R\\)$ is the number of scored rows, and $\\(\mathbf{y}_i\\)$ and $\\(\mathbf{\hat{y}}_i\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$.
+          where $(R)$ is the number of scored rows, and $(\mathbf{y}_i)$ and $(\mathbf{\hat{y}}_i)$ are the actual and predicted values, respectively, for row $(i)$.
         repository_url: null
         documentation_url: null
         min: -1
         max: 1
         maximize: true
       - name: mean_cosine_sim_clipped_05
-        label: Mean Cosine Similarity clipped at 0.5
-        summary: The mean of cosine similarities per row (perturbation). Values are clipped to 0.5 adjusted p-values.
-        description: |
-          We use the **Mean Cosine Similarity** to score submissions, computed as follows:
-
-          $$
-          \textrm{Mean-Cosine} = \frac{1}{R} \sum_{i=1}^R \frac{\mathbf{y}_i \cdot \mathbf{\hat{y}}_i}{\|\mathbf{y}_i\| \|\mathbf{\hat{y}}_i\|}
-          $$
-
-          where $\\(R\\)$ is the number of scored rows, and $\\(\mathbf{y}_i\\)$ and $\\(\mathbf{\hat{y}}_i\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$.
+        label: Mean Cosine Similarity clipped at 0.05
+        summary: The mean of cosine similarities per row (perturbation). Values are clipped to 0.05 adjusted p-values.
+        description: This metric is the same as `mean_cosine_sim`, but with the values clipped to [-log10(0.05), log10(0.05)].
         repository_url: null
         documentation_url: null
         min: -1
         max: 1
         maximize: true
       - name: mean_cosine_sim_clipped_01
-        label: Mean Cosine Similarity clipped at 0.1
-        summary: The mean of cosine similarities per row (perturbation). Values are clipped to 0.1 adjusted p-values.
-        description: |
-          We use the **Mean Cosine Similarity** to score submissions, computed as follows:
-
-          $$
-          \textrm{Mean-Cosine} = \frac{1}{R} \sum_{i=1}^R \frac{\mathbf{y}_i \cdot \mathbf{\hat{y}}_i}{\|\mathbf{y}_i\| \|\mathbf{\hat{y}}_i\|}
-          $$
-
-          where $\\(R\\)$ is the number of scored rows, and $\\(\mathbf{y}_i\\)$ and $\\(\mathbf{\hat{y}}_i\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$.
+        label: Mean Cosine Similarity clipped at 0.01
+        summary: The mean of cosine similarities per row (perturbation). Values are clipped to 0.01 adjusted p-values.
+        description: This metric is the same as `mean_cosine_sim`, but with the values clipped to [-log10(0.01), log10(0.01)].
         repository_url: null
         documentation_url: null
         min: -1
diff --git a/src/task/metrics/mean_rowwise_error/config.vsh.yaml b/src/task/metrics/mean_rowwise_error/config.vsh.yaml
index 5e445741..7881b3be 100644
--- a/src/task/metrics/mean_rowwise_error/config.vsh.yaml
+++ b/src/task/metrics/mean_rowwise_error/config.vsh.yaml
@@ -13,39 +13,25 @@ functionality:
           \textrm{MRRMSE} = \frac{1}{R}\sum\_{i=1}^R\left(\frac{1}{n} \sum\_{j=1}^{n} (y\_{ij} - \widehat{y}\_{ij})^2\right)^{1/2}
           $$
 
-          where $\\(R\\)$ is the number of scored rows, and $\\(y_{ij}\\)$ and $\\(\widehat{y}_{ij}\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$ and column $\\(j\\)$, and $\\(n\\)$ bis the number of columns.
+          where $(R)$ is the number of scored rows, and $(y_{ij})$ and $(\widehat{y}_{ij})$ are the actual and predicted values, respectively, for row $(i)$ and column $(j)$, and $(n)$ bis the number of columns.
         repository_url: null
         documentation_url: null
         min: 0
         max: "+inf"
         maximize: false
       - name: mean_rowwise_rmse_clipped_05
-        label: Mean Rowwise RMSE clipped at 0.5
+        label: Mean Rowwise RMSE clipped at 0.05
         summary: The mean of the root mean squared error (RMSE) of each row in the matrix, where the values are clipped to 0.5 adjusted p-values
-        description: |
-          We use the **Mean Rowwise Root Mean Squared Error** to score submissions, computed as follows:
-
-          $$
-          \textrm{MRRMSE} = \frac{1}{R}\sum\_{i=1}^R\left(\frac{1}{n} \sum\_{j=1}^{n} (y\_{ij} - \widehat{y}\_{ij})^2\right)^{1/2}
-          $$
-
-          where $\\(R\\)$ is the number of scored rows, and $\\(y_{ij}\\)$ and $\\(\widehat{y}_{ij}\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$ and column $\\(j\\)$, and $\\(n\\)$ bis the number of columns.
+        description: This metric is the same as `mean_rowwise_rmse`, but with the values clipped to [-log10(0.05), log10(0.05)].
         repository_url: null
         documentation_url: null
         min: 0
         max: "+inf"
         maximize: false
       - name: mean_rowwise_rmse_clipped_01
-        label: Mean Rowwise RMSE clipped at 0.1
+        label: Mean Rowwise RMSE clipped at 0.01
         summary: The mean of the root mean squared error (RMSE) of each row in the matrix, where the values are clipped to 0.1 adjusted p-values
-        description: |
-          We use the **Mean Rowwise Root Mean Squared Error** to score submissions, computed as follows:
-
-          $$
-          \textrm{MRRMSE} = \frac{1}{R}\sum\_{i=1}^R\left(\frac{1}{n} \sum\_{j=1}^{n} (y\_{ij} - \widehat{y}\_{ij})^2\right)^{1/2}
-          $$
-
-          where $\\(R\\)$ is the number of scored rows, and $\\(y_{ij}\\)$ and $\\(\widehat{y}_{ij}\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$ and column $\\(j\\)$, and $\\(n\\)$ bis the number of columns.
+        description: This metric is the same as `mean_rowwise_rmse`, but with the values clipped to [-log10(0.01), log10(0.01)].
         repository_url: null
         documentation_url: null
         min: 0
@@ -61,39 +47,25 @@ functionality:
             \textrm{MRMAE} = \frac{1}{R}\sum_{i=1}^R\left(\frac{1}{n} \sum_{j=1}^{n} |y_{ij} - \widehat{y}_{ij}|\right)
             $$
           
-            where $\\(R\\)$ is the number of scored rows, and $\\(y_{ij}\\)$ and $\\(\widehat{y}_{ij}\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$ and column $\\(j\\)$, and $\\(n\\)$ bis the number of columns.
+            where $(R)$ is the number of scored rows, and $(y_{ij})$ and $(\widehat{y}_{ij})$ are the actual and predicted values, respectively, for row $(i)$ and column $(j)$, and $(n)$ bis the number of columns.
         repository_url: null
         documentation_url: null
         min: 0
         max: "+inf"
         maximize: false
       - name: mean_rowwise_mae_clipped_05
-        label: Mean Rowwise MAE clipped at 0.5
+        label: Mean Rowwise MAE clipped at 0.05
         summary: The mean of the absolute error (MAE) of each row in the matrix. The values are clipped to 0.5 adjusted p-values.
-        description: |
-            We use the **Mean Rowwise Absolute Error** to score submissions, computed as follows:
-
-            $$
-            \textrm{MRMAE} = \frac{1}{R}\sum_{i=1}^R\left(\frac{1}{n} \sum_{j=1}^{n} |y_{ij} - \widehat{y}_{ij}|\right)
-            $$
-          
-            where $\\(R\\)$ is the number of scored rows, and $\\(y_{ij}\\)$ and $\\(\widehat{y}_{ij}\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$ and column $\\(j\\)$, and $\\(n\\)$ bis the number of columns.
+        description: This metric is the same as `mean_rowwise_mae`, but with the values clipped to [-log10(0.05), log10(0.05)].
         repository_url: null
         documentation_url: null
         min: 0
         max: "+inf"
         maximize: false
       - name: mean_rowwise_mae_clipped_01
-        label: Mean Rowwise MAE clipped at 0.1
+        label: Mean Rowwise MAE clipped at 0.01
         summary: The mean of the absolute error (MAE) of each row in the matrix. The values are clipped to 0.1 adjusted p-values.
-        description: |
-            We use the **Mean Rowwise Absolute Error** to score submissions, computed as follows:
-
-            $$
-            \textrm{MRMAE} = \frac{1}{R}\sum_{i=1}^R\left(\frac{1}{n} \sum_{j=1}^{n} |y_{ij} - \widehat{y}_{ij}|\right)
-            $$
-          
-            where $\\(R\\)$ is the number of scored rows, and $\\(y_{ij}\\)$ and $\\(\widehat{y}_{ij}\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$ and column $\\(j\\)$, and $\\(n\\)$ bis the number of columns.
+        description: This metric is the same as `mean_rowwise_mae`, but with the values clipped to [-log10(0.01), log10(0.01)].
         repository_url: null
         documentation_url: null
         min: 0
diff --git a/src/task/workflows/run_benchmark/config.vsh.yaml b/src/task/workflows/run_benchmark/config.vsh.yaml
index 63128c19..cbcf83dd 100644
--- a/src/task/workflows/run_benchmark/config.vsh.yaml
+++ b/src/task/workflows/run_benchmark/config.vsh.yaml
@@ -81,6 +81,7 @@ functionality:
     - name: methods/pyboost
     - name: metrics/mean_rowwise_error
     - name: metrics/mean_cosine_sim
+    - name: metrics/mean_correlation
   repositories:
     - name: openproblemsv2
       type: github
diff --git a/src/task/workflows/run_benchmark/main.nf b/src/task/workflows/run_benchmark/main.nf
index 450684ff..e80f73c7 100644
--- a/src/task/workflows/run_benchmark/main.nf
+++ b/src/task/workflows/run_benchmark/main.nf
@@ -34,7 +34,8 @@ workflow run_wf {
   // construct list of metrics
   metrics = [
     mean_rowwise_error,
-    mean_cosine_sim
+    mean_cosine_sim,
+    mean_correlation
   ]
 
   /* **************************