Skip to content

Commit

Permalink
add thresholded metrics (openproblems-bio#45)
Browse files Browse the repository at this point in the history
* add thresholded metrics

* fix threshold values
  • Loading branch information
szalata authored May 22, 2024
1 parent baf9c73 commit c501ebc
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 7 deletions.
32 changes: 32 additions & 0 deletions src/task/metrics/mean_cosine_sim/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,38 @@ functionality:
min: -1
max: 1
maximize: true
- name: mean_cosine_sim_clipped_05
label: Mean Cosine Similarity clipped at 0.5
summary: The mean of cosine similarities per row (perturbation). Values are clipped to 0.5 adjusted p-values.
description: |
We use the **Mean Cosine Similarity** to score submissions, computed as follows:
$$
\textrm{Mean-Cosine} = \frac{1}{R} \sum_{i=1}^R \frac{\mathbf{y}_i \cdot \mathbf{\hat{y}}_i}{\|\mathbf{y}_i\| \|\mathbf{\hat{y}}_i\|}
$$
where $\\(R\\)$ is the number of scored rows, and $\\(\mathbf{y}_i\\)$ and $\\(\mathbf{\hat{y}}_i\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$.
repository_url: null
documentation_url: null
min: -1
max: 1
maximize: true
- name: mean_cosine_sim_clipped_01
label: Mean Cosine Similarity clipped at 0.1
summary: The mean of cosine similarities per row (perturbation). Values are clipped to 0.1 adjusted p-values.
description: |
We use the **Mean Cosine Similarity** to score submissions, computed as follows:
$$
\textrm{Mean-Cosine} = \frac{1}{R} \sum_{i=1}^R \frac{\mathbf{y}_i \cdot \mathbf{\hat{y}}_i}{\|\mathbf{y}_i\| \|\mathbf{\hat{y}}_i\|}
$$
where $\\(R\\)$ is the number of scored rows, and $\\(\mathbf{y}_i\\)$ and $\\(\mathbf{\hat{y}}_i\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$.
repository_url: null
documentation_url: null
min: -1
max: 1
maximize: true
resources:
- type: python_script
path: script.py
Expand Down
34 changes: 30 additions & 4 deletions src/task/metrics/mean_cosine_sim/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

## VIASH START
par = {
"de_test_h5ad": "resources/neurips-2023-data/de_test.h5ad",
"prediction": "resources/neurips-2023-data/prediction.parquet",
"de_test_h5ad": "resources/neurips-2023-kaggle/de_test.h5ad",
"prediction": "resources/neurips-2023-kaggle/output_mean_compounds.parquet",
"method_id": "foo",
"output": "resources/neurips-2023-data/score.h5ad",
}
Expand All @@ -20,30 +20,56 @@
de_test_X = de_test.layers["sign_log10_pval"]
prediction = prediction[genes]

print("Clipping values", flush=True)
threshold_05 = -np.log10(0.05)
de_test_X_clipped_05 = np.clip(de_test_X, -threshold_05, threshold_05)
prediction_clipped_05 = np.clip(prediction.values, -threshold_05, threshold_05)
threshold_01 = -np.log10(0.01)
de_test_X_clipped_01 = np.clip(de_test_X, -threshold_01, threshold_01)
prediction_clipped_01 = np.clip(prediction.values, -threshold_01, threshold_01)

print("Calculate mean cosine similarity", flush=True)
mean_cosine_similarity = 0
mean_cosine_similarity_clipped_05 = 0
mean_cosine_similarity_clipped_01 = 0
for i in range(de_test_X.shape[0]):
y_i = de_test_X[i,]
y_hat_i = prediction.iloc[i]
y_i_clipped_05 = de_test_X_clipped_05[i,]
y_hat_i_clipped_05 = prediction_clipped_05[i]
y_i_clipped_01 = de_test_X_clipped_01[i,]
y_hat_i_clipped_01 = prediction_clipped_01[i]

dot_product = np.dot(y_i, y_hat_i)
dot_product_clipped_05 = np.dot(y_i_clipped_05, y_hat_i_clipped_05)
dot_product_clipped_01 = np.dot(y_i_clipped_01, y_hat_i_clipped_01)

norm_y_i = np.linalg.norm(y_i)
norm_y_i_clipped_05 = np.linalg.norm(y_i_clipped_05)
norm_y_i_clipped_01 = np.linalg.norm(y_i_clipped_01)
norm_y_hat_i = np.linalg.norm(y_hat_i)
norm_y_hat_i_clipped_05 = np.linalg.norm(y_hat_i_clipped_05)
norm_y_hat_i_clipped_01 = np.linalg.norm(y_hat_i_clipped_01)

cosine_similarity = dot_product / (norm_y_i * norm_y_hat_i)
cosine_similarity_clipped_05 = dot_product_clipped_05 / (norm_y_i_clipped_05 * norm_y_hat_i_clipped_05)
cosine_similarity_clipped_01 = dot_product_clipped_01 / (norm_y_i_clipped_01 * norm_y_hat_i_clipped_01)

mean_cosine_similarity += cosine_similarity
mean_cosine_similarity_clipped_05 += cosine_similarity_clipped_05
mean_cosine_similarity_clipped_01 += cosine_similarity_clipped_01

mean_cosine_similarity /= de_test_X.shape[0]
mean_cosine_similarity_clipped_05 /= de_test_X.shape[0]
mean_cosine_similarity_clipped_01 /= de_test_X.shape[0]

print("Create output", flush=True)
output = ad.AnnData(
uns={
"dataset_id": de_test.uns["dataset_id"],
"method_id": par["method_id"],
"metric_ids": ["mean_cosine_sim"],
"metric_values": [mean_cosine_similarity]
"metric_ids": ["mean_cosine_sim", "mean_cosine_sim_clipped_05", "mean_cosine_sim_clipped_01"],
"metric_values": [mean_cosine_similarity, mean_cosine_similarity_clipped_05, mean_cosine_similarity_clipped_01]
}
)

Expand Down
64 changes: 64 additions & 0 deletions src/task/metrics/mean_rowwise_error/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,38 @@ functionality:
min: 0
max: "+inf"
maximize: false
- name: mean_rowwise_rmse_clipped_05
label: Mean Rowwise RMSE clipped at 0.5
summary: The mean of the root mean squared error (RMSE) of each row in the matrix, where the values are clipped to 0.5 adjusted p-values
description: |
We use the **Mean Rowwise Root Mean Squared Error** to score submissions, computed as follows:
$$
\textrm{MRRMSE} = \frac{1}{R}\sum\_{i=1}^R\left(\frac{1}{n} \sum\_{j=1}^{n} (y\_{ij} - \widehat{y}\_{ij})^2\right)^{1/2}
$$
where $\\(R\\)$ is the number of scored rows, and $\\(y_{ij}\\)$ and $\\(\widehat{y}_{ij}\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$ and column $\\(j\\)$, and $\\(n\\)$ bis the number of columns.
repository_url: null
documentation_url: null
min: 0
max: "+inf"
maximize: false
- name: mean_rowwise_rmse_clipped_01
label: Mean Rowwise RMSE clipped at 0.1
summary: The mean of the root mean squared error (RMSE) of each row in the matrix, where the values are clipped to 0.1 adjusted p-values
description: |
We use the **Mean Rowwise Root Mean Squared Error** to score submissions, computed as follows:
$$
\textrm{MRRMSE} = \frac{1}{R}\sum\_{i=1}^R\left(\frac{1}{n} \sum\_{j=1}^{n} (y\_{ij} - \widehat{y}\_{ij})^2\right)^{1/2}
$$
where $\\(R\\)$ is the number of scored rows, and $\\(y_{ij}\\)$ and $\\(\widehat{y}_{ij}\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$ and column $\\(j\\)$, and $\\(n\\)$ bis the number of columns.
repository_url: null
documentation_url: null
min: 0
max: "+inf"
maximize: false
- name: mean_rowwise_mae
label: Mean Rowwise MAE
summary: The mean of the absolute error (MAE) of each row in the matrix.
Expand All @@ -35,6 +67,38 @@ functionality:
min: 0
max: "+inf"
maximize: false
- name: mean_rowwise_mae_clipped_05
label: Mean Rowwise MAE clipped at 0.5
summary: The mean of the absolute error (MAE) of each row in the matrix. The values are clipped to 0.5 adjusted p-values.
description: |
We use the **Mean Rowwise Absolute Error** to score submissions, computed as follows:
$$
\textrm{MRMAE} = \frac{1}{R}\sum_{i=1}^R\left(\frac{1}{n} \sum_{j=1}^{n} |y_{ij} - \widehat{y}_{ij}|\right)
$$
where $\\(R\\)$ is the number of scored rows, and $\\(y_{ij}\\)$ and $\\(\widehat{y}_{ij}\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$ and column $\\(j\\)$, and $\\(n\\)$ bis the number of columns.
repository_url: null
documentation_url: null
min: 0
max: "+inf"
maximize: false
- name: mean_rowwise_mae_clipped_01
label: Mean Rowwise MAE clipped at 0.1
summary: The mean of the absolute error (MAE) of each row in the matrix. The values are clipped to 0.1 adjusted p-values.
description: |
We use the **Mean Rowwise Absolute Error** to score submissions, computed as follows:
$$
\textrm{MRMAE} = \frac{1}{R}\sum_{i=1}^R\left(\frac{1}{n} \sum_{j=1}^{n} |y_{ij} - \widehat{y}_{ij}|\right)
$$
where $\\(R\\)$ is the number of scored rows, and $\\(y_{ij}\\)$ and $\\(\widehat{y}_{ij}\\)$ are the actual and predicted values, respectively, for row $\\(i\\)$ and column $\\(j\\)$, and $\\(n\\)$ bis the number of columns.
repository_url: null
documentation_url: null
min: 0
max: "+inf"
maximize: false
resources:
- type: python_script
path: script.py
Expand Down
34 changes: 31 additions & 3 deletions src/task/metrics/mean_rowwise_error/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
## VIASH START
par = {
"de_test_h5ad": "resources/neurips-2023-data/de_test.h5ad",
"prediction": "resources/neurips-2023-data/output_rf.parquet",
"prediction": "resources/neurips-2023-data/prediction.parquet",
"method_id": "foo",
"output": "resources/neurips-2023-data/score.h5ad",
}
Expand All @@ -20,24 +20,52 @@
de_test_X = de_test.layers["sign_log10_pval"]
prediction = prediction[genes]

print("Clipping values", flush=True)
threshold_05 = -np.log10(0.05)
de_test_X_clipped_05 = np.clip(de_test_X, -threshold_05, threshold_05)
prediction_clipped_05 = np.clip(prediction.values, -threshold_05, threshold_05)

threshold_01 = -np.log10(0.01)
de_test_X_clipped_01 = np.clip(de_test_X, -threshold_01, threshold_01)
prediction_clipped_01 = np.clip(prediction.values, -threshold_01, threshold_01)

print("Calculate mean rowwise RMSE", flush=True)
mean_rowwise_rmse = 0
mean_rowwise_rmse_clipped_05 = 0
mean_rowwise_rmse_clipped_01 = 0
mean_rowwise_mae = 0
mean_rowwise_mae_clipped_05 = 0
mean_rowwise_mae_clipped_01 = 0
for i in range(de_test_X.shape[0]):
diff = de_test_X[i,] - prediction.iloc[i]
diff_clipped_05 = de_test_X_clipped_05[i,] - prediction_clipped_05[i]
diff_clipped_01 = de_test_X_clipped_01[i,] - prediction_clipped_01[i]

mean_rowwise_rmse += np.sqrt((diff**2).mean())
mean_rowwise_rmse_clipped_05 += np.sqrt((diff_clipped_05**2).mean())
mean_rowwise_rmse_clipped_01 += np.sqrt((diff_clipped_01**2).mean())
mean_rowwise_mae += np.abs(diff).mean()
mean_rowwise_mae_clipped_05 += np.abs(diff_clipped_05).mean()
mean_rowwise_mae_clipped_01 += np.abs(diff_clipped_01).mean()

mean_rowwise_rmse /= de_test.shape[0]
mean_rowwise_rmse_clipped_05 /= de_test.shape[0]
mean_rowwise_rmse_clipped_01 /= de_test.shape[0]
mean_rowwise_mae /= de_test.shape[0]
mean_rowwise_mae_clipped_05 /= de_test.shape[0]
mean_rowwise_mae_clipped_01 /= de_test.shape[0]

print("Create output", flush=True)
output = ad.AnnData(
uns={
"dataset_id": de_test.uns["dataset_id"],
"method_id": par["method_id"],
"metric_ids": ["mean_rowwise_rmse", "mean_rowwise_mae"],
"metric_values": [mean_rowwise_rmse, mean_rowwise_mae]
"metric_ids": ["mean_rowwise_rmse", "mean_rowwise_mae",
"mean_rowwise_rmse_clipped_05", "mean_rowwise_mae_clipped_05",
"mean_rowwise_rmse_clipped_01", "mean_rowwise_mae_clipped_01"],
"metric_values": [mean_rowwise_rmse, mean_rowwise_mae,
mean_rowwise_rmse_clipped_05, mean_rowwise_mae_clipped_05,
mean_rowwise_rmse_clipped_01, mean_rowwise_mae_clipped_01]
}
)

Expand Down

0 comments on commit c501ebc

Please sign in to comment.