From 36504666e84c13ec8f84ad011579875b9386e5af Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 23 May 2024 12:31:58 +0200 Subject: [PATCH] Multiple fixes (#47) * increase lstm memory * clean up descriptions * add mean pearson / spearman component * fix metric * add to wf --- .../lstm_gru_cnn_ensemble/config.vsh.yaml | 2 +- .../metrics/mean_correlation/config.vsh.yaml | 44 +++++++++++++++ src/task/metrics/mean_correlation/script.py | 54 +++++++++++++++++++ .../metrics/mean_cosine_sim/config.vsh.yaml | 28 +++------- .../mean_rowwise_error/config.vsh.yaml | 48 ++++------------- .../workflows/run_benchmark/config.vsh.yaml | 1 + src/task/workflows/run_benchmark/main.nf | 3 +- 7 files changed, 119 insertions(+), 61 deletions(-) create mode 100644 src/task/metrics/mean_correlation/config.vsh.yaml create mode 100644 src/task/metrics/mean_correlation/script.py diff --git a/src/task/methods/lstm_gru_cnn_ensemble/config.vsh.yaml b/src/task/methods/lstm_gru_cnn_ensemble/config.vsh.yaml index 0072a55c..332c02df 100644 --- a/src/task/methods/lstm_gru_cnn_ensemble/config.vsh.yaml +++ b/src/task/methods/lstm_gru_cnn_ensemble/config.vsh.yaml @@ -70,4 +70,4 @@ platforms: - type: native - type: nextflow directives: - label: [veryhightime,highmem,highcpu,highsharedmem,gpu] + label: [veryhightime, veryhighmem, highcpu, highsharedmem, gpu] diff --git a/src/task/metrics/mean_correlation/config.vsh.yaml b/src/task/metrics/mean_correlation/config.vsh.yaml new file mode 100644 index 00000000..7e295770 --- /dev/null +++ b/src/task/metrics/mean_correlation/config.vsh.yaml @@ -0,0 +1,44 @@ +__merge__: ../../api/comp_metric.yaml +functionality: + name: mean_correlation + info: + metrics: + - name: mean_pearson + label: Mean Pearson + summary: The mean of Pearson correlations per row (perturbation). + description: | + We use the **Mean Pearson Correlation** to score submissions, computed as follows: + + $$ + \textrm{Mean-Pearson} = \frac{1}{R} \sum_{i=1}^R \frac{\sum_{j=1}^n (y_{ij} - \bar{y}_i)(\hat{y}_{ij} - \bar{\hat{y}}_i)}{\sqrt{\sum_{j=1}^n (y_{ij} - \bar{y}_i)^2 \sum_{j=1}^n (\hat{y}_{ij} - \bar{\hat{y}}_i)^2}} + $$ + + where $(R)$ is the number of scored rows, and $(y_{ij})$ and $(\hat{y}_{ij})$ are the actual and predicted values, respectively, for row $(i)$ and column $(j)$. + repository_url: null + documentation_url: null + min: -1 + max: 1 + maximize: true + - name: mean_spearman + label: Mean Spearman + summary: The mean of Spearman correlations per row (perturbation). + description: | + We use the **Mean Spearman Correlation** to score submissions, computed as follows: + + $$ + \textrm{Mean-Spearman} = \frac{1}{R} \sum_{i=1}^R \frac{\sum_{j=1}^n (\text{rank}(y_{ij}) - \bar{\text{rank}}_i)(\text{rank}(\hat{y}_{ij}) - \bar{\text{rank}}_i)}{\sqrt{\sum_{j=1}^n (\text{rank}(y_{ij}) - \bar{\text{rank}}_i)^2 \sum_{j=1}^n (\text{rank}(\hat{y}_{ij}) - \bar{\text{rank}}_i)^2}} + $$ + + where $(R)$ is the number of scored rows, and $(y_{ij})$ and $(\hat{y}_{ij})$ are the actual and predicted values, respectively, for row $(i)$ and column $(j)$. + resources: + - type: python_script + path: script.py +platforms: + - type: docker + image: ghcr.io/openproblems-bio/base_python:1.0.4 + setup: + - type: python + packages: [ fastparquet ] + - type: nextflow + directives: + label: [ midtime, highmem, highcpu ] \ No newline at end of file diff --git a/src/task/metrics/mean_correlation/script.py b/src/task/metrics/mean_correlation/script.py new file mode 100644 index 00000000..aa06244e --- /dev/null +++ b/src/task/metrics/mean_correlation/script.py @@ -0,0 +1,54 @@ +import pandas as pd +import anndata as ad +import numpy as np + +## VIASH START +par = { + "de_test_h5ad": "resources/neurips-2023-kaggle/de_test.h5ad", + "prediction": "resources/neurips-2023-kaggle/prediction.parquet", + "method_id": "foo", + "output": "resources/neurips-2023-data/score.h5ad", +} +## VIASH END + +print("Load data", flush=True) +de_test = ad.read_h5ad(par["de_test_h5ad"]) +prediction = pd.read_parquet(par["prediction"]).set_index('id') + +print("Select genes", flush=True) +genes = list(de_test.var_names) +de_test_X = de_test.layers["sign_log10_pval"] +prediction = prediction[genes] + +print("Calculate mean pearson", flush=True) +mean_pearson = 0 +mean_spearman = 0 +for i in range(de_test_X.shape[0]): + y_i = de_test_X[i,] + y_hat_i = prediction.iloc[i] + + # compute ranks + r_i = y_i.argsort().argsort() + r_hat_i = y_hat_i.argsort().argsort() + + pearson = np.corrcoef(y_i, y_hat_i)[0, 1] + spearman = np.corrcoef(r_i, r_hat_i)[0, 1] + + mean_pearson += pearson + mean_spearman += spearman + +mean_pearson /= de_test_X.shape[0] +mean_spearman /= de_test_X.shape[0] + +print("Create output", flush=True) +output = ad.AnnData( + uns={ + "dataset_id": de_test.uns["dataset_id"], + "method_id": par["method_id"], + "metric_ids": ["mean_pearson", "mean_spearman"], + "metric_values": [mean_pearson, mean_spearman] + } +) + +print("Write output", flush=True) +output.write_h5ad(par["output"], compression="gzip") \ No newline at end of file diff --git a/src/task/metrics/mean_cosine_sim/config.vsh.yaml b/src/task/metrics/mean_cosine_sim/config.vsh.yaml index 24d86f40..71f179ac 100644 --- a/src/task/metrics/mean_cosine_sim/config.vsh.yaml +++ b/src/task/metrics/mean_cosine_sim/config.vsh.yaml @@ -13,39 +13,25 @@ functionality: \textrm{Mean-Cosine} = \frac{1}{R} \sum_{i=1}^R \frac{\mathbf{y}_i \cdot \mathbf{\hat{y}}_i}{\|\mathbf{y}_i\| \|\mathbf{\hat{y}}_i\|} $$ - where $\\(R\\)$ is the number of scored rows, and $\\(\mathbf{y}_i\\)$ and $\\(\mathbf{\hat{y}}_i\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$. + where $(R)$ is the number of scored rows, and $(\mathbf{y}_i)$ and $(\mathbf{\hat{y}}_i)$ are the actual and predicted values, respectively, for row $(i)$. repository_url: null documentation_url: null min: -1 max: 1 maximize: true - name: mean_cosine_sim_clipped_05 - label: Mean Cosine Similarity clipped at 0.5 - summary: The mean of cosine similarities per row (perturbation). Values are clipped to 0.5 adjusted p-values. - description: | - We use the **Mean Cosine Similarity** to score submissions, computed as follows: - - $$ - \textrm{Mean-Cosine} = \frac{1}{R} \sum_{i=1}^R \frac{\mathbf{y}_i \cdot \mathbf{\hat{y}}_i}{\|\mathbf{y}_i\| \|\mathbf{\hat{y}}_i\|} - $$ - - where $\\(R\\)$ is the number of scored rows, and $\\(\mathbf{y}_i\\)$ and $\\(\mathbf{\hat{y}}_i\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$. + label: Mean Cosine Similarity clipped at 0.05 + summary: The mean of cosine similarities per row (perturbation). Values are clipped to 0.05 adjusted p-values. + description: This metric is the same as `mean_cosine_sim`, but with the values clipped to [-log10(0.05), log10(0.05)]. repository_url: null documentation_url: null min: -1 max: 1 maximize: true - name: mean_cosine_sim_clipped_01 - label: Mean Cosine Similarity clipped at 0.1 - summary: The mean of cosine similarities per row (perturbation). Values are clipped to 0.1 adjusted p-values. - description: | - We use the **Mean Cosine Similarity** to score submissions, computed as follows: - - $$ - \textrm{Mean-Cosine} = \frac{1}{R} \sum_{i=1}^R \frac{\mathbf{y}_i \cdot \mathbf{\hat{y}}_i}{\|\mathbf{y}_i\| \|\mathbf{\hat{y}}_i\|} - $$ - - where $\\(R\\)$ is the number of scored rows, and $\\(\mathbf{y}_i\\)$ and $\\(\mathbf{\hat{y}}_i\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$. + label: Mean Cosine Similarity clipped at 0.01 + summary: The mean of cosine similarities per row (perturbation). Values are clipped to 0.01 adjusted p-values. + description: This metric is the same as `mean_cosine_sim`, but with the values clipped to [-log10(0.01), log10(0.01)]. repository_url: null documentation_url: null min: -1 diff --git a/src/task/metrics/mean_rowwise_error/config.vsh.yaml b/src/task/metrics/mean_rowwise_error/config.vsh.yaml index 5e445741..7881b3be 100644 --- a/src/task/metrics/mean_rowwise_error/config.vsh.yaml +++ b/src/task/metrics/mean_rowwise_error/config.vsh.yaml @@ -13,39 +13,25 @@ functionality: \textrm{MRRMSE} = \frac{1}{R}\sum\_{i=1}^R\left(\frac{1}{n} \sum\_{j=1}^{n} (y\_{ij} - \widehat{y}\_{ij})^2\right)^{1/2} $$ - where $\\(R\\)$ is the number of scored rows, and $\\(y_{ij}\\)$ and $\\(\widehat{y}_{ij}\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$ and column $\\(j\\)$, and $\\(n\\)$ bis the number of columns. + where $(R)$ is the number of scored rows, and $(y_{ij})$ and $(\widehat{y}_{ij})$ are the actual and predicted values, respectively, for row $(i)$ and column $(j)$, and $(n)$ bis the number of columns. repository_url: null documentation_url: null min: 0 max: "+inf" maximize: false - name: mean_rowwise_rmse_clipped_05 - label: Mean Rowwise RMSE clipped at 0.5 + label: Mean Rowwise RMSE clipped at 0.05 summary: The mean of the root mean squared error (RMSE) of each row in the matrix, where the values are clipped to 0.5 adjusted p-values - description: | - We use the **Mean Rowwise Root Mean Squared Error** to score submissions, computed as follows: - - $$ - \textrm{MRRMSE} = \frac{1}{R}\sum\_{i=1}^R\left(\frac{1}{n} \sum\_{j=1}^{n} (y\_{ij} - \widehat{y}\_{ij})^2\right)^{1/2} - $$ - - where $\\(R\\)$ is the number of scored rows, and $\\(y_{ij}\\)$ and $\\(\widehat{y}_{ij}\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$ and column $\\(j\\)$, and $\\(n\\)$ bis the number of columns. + description: This metric is the same as `mean_rowwise_rmse`, but with the values clipped to [-log10(0.05), log10(0.05)]. repository_url: null documentation_url: null min: 0 max: "+inf" maximize: false - name: mean_rowwise_rmse_clipped_01 - label: Mean Rowwise RMSE clipped at 0.1 + label: Mean Rowwise RMSE clipped at 0.01 summary: The mean of the root mean squared error (RMSE) of each row in the matrix, where the values are clipped to 0.1 adjusted p-values - description: | - We use the **Mean Rowwise Root Mean Squared Error** to score submissions, computed as follows: - - $$ - \textrm{MRRMSE} = \frac{1}{R}\sum\_{i=1}^R\left(\frac{1}{n} \sum\_{j=1}^{n} (y\_{ij} - \widehat{y}\_{ij})^2\right)^{1/2} - $$ - - where $\\(R\\)$ is the number of scored rows, and $\\(y_{ij}\\)$ and $\\(\widehat{y}_{ij}\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$ and column $\\(j\\)$, and $\\(n\\)$ bis the number of columns. + description: This metric is the same as `mean_rowwise_rmse`, but with the values clipped to [-log10(0.01), log10(0.01)]. repository_url: null documentation_url: null min: 0 @@ -61,39 +47,25 @@ functionality: \textrm{MRMAE} = \frac{1}{R}\sum_{i=1}^R\left(\frac{1}{n} \sum_{j=1}^{n} |y_{ij} - \widehat{y}_{ij}|\right) $$ - where $\\(R\\)$ is the number of scored rows, and $\\(y_{ij}\\)$ and $\\(\widehat{y}_{ij}\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$ and column $\\(j\\)$, and $\\(n\\)$ bis the number of columns. + where $(R)$ is the number of scored rows, and $(y_{ij})$ and $(\widehat{y}_{ij})$ are the actual and predicted values, respectively, for row $(i)$ and column $(j)$, and $(n)$ bis the number of columns. repository_url: null documentation_url: null min: 0 max: "+inf" maximize: false - name: mean_rowwise_mae_clipped_05 - label: Mean Rowwise MAE clipped at 0.5 + label: Mean Rowwise MAE clipped at 0.05 summary: The mean of the absolute error (MAE) of each row in the matrix. The values are clipped to 0.5 adjusted p-values. - description: | - We use the **Mean Rowwise Absolute Error** to score submissions, computed as follows: - - $$ - \textrm{MRMAE} = \frac{1}{R}\sum_{i=1}^R\left(\frac{1}{n} \sum_{j=1}^{n} |y_{ij} - \widehat{y}_{ij}|\right) - $$ - - where $\\(R\\)$ is the number of scored rows, and $\\(y_{ij}\\)$ and $\\(\widehat{y}_{ij}\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$ and column $\\(j\\)$, and $\\(n\\)$ bis the number of columns. + description: This metric is the same as `mean_rowwise_mae`, but with the values clipped to [-log10(0.05), log10(0.05)]. repository_url: null documentation_url: null min: 0 max: "+inf" maximize: false - name: mean_rowwise_mae_clipped_01 - label: Mean Rowwise MAE clipped at 0.1 + label: Mean Rowwise MAE clipped at 0.01 summary: The mean of the absolute error (MAE) of each row in the matrix. The values are clipped to 0.1 adjusted p-values. - description: | - We use the **Mean Rowwise Absolute Error** to score submissions, computed as follows: - - $$ - \textrm{MRMAE} = \frac{1}{R}\sum_{i=1}^R\left(\frac{1}{n} \sum_{j=1}^{n} |y_{ij} - \widehat{y}_{ij}|\right) - $$ - - where $\\(R\\)$ is the number of scored rows, and $\\(y_{ij}\\)$ and $\\(\widehat{y}_{ij}\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$ and column $\\(j\\)$, and $\\(n\\)$ bis the number of columns. + description: This metric is the same as `mean_rowwise_mae`, but with the values clipped to [-log10(0.01), log10(0.01)]. repository_url: null documentation_url: null min: 0 diff --git a/src/task/workflows/run_benchmark/config.vsh.yaml b/src/task/workflows/run_benchmark/config.vsh.yaml index 63128c19..cbcf83dd 100644 --- a/src/task/workflows/run_benchmark/config.vsh.yaml +++ b/src/task/workflows/run_benchmark/config.vsh.yaml @@ -81,6 +81,7 @@ functionality: - name: methods/pyboost - name: metrics/mean_rowwise_error - name: metrics/mean_cosine_sim + - name: metrics/mean_correlation repositories: - name: openproblemsv2 type: github diff --git a/src/task/workflows/run_benchmark/main.nf b/src/task/workflows/run_benchmark/main.nf index 450684ff..e80f73c7 100644 --- a/src/task/workflows/run_benchmark/main.nf +++ b/src/task/workflows/run_benchmark/main.nf @@ -34,7 +34,8 @@ workflow run_wf { // construct list of metrics metrics = [ mean_rowwise_error, - mean_cosine_sim + mean_cosine_sim, + mean_correlation ] /* **************************