From 9313317ab0269002d3eaf1d325e6bb30db714e77 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Tue, 4 Jun 2024 08:28:58 +0200
Subject: [PATCH] fix scripts (#63)

* fix scripts

* fix script

* don't run with the original kaggle dataset by default

* update

* override output filenames
---
 scripts/run_benchmark_test.sh                 | 28 ++++++++++++-------
 scripts/run_benchmark_tw.sh                   | 20 ++++++-------
 scripts/run_stability_tw.sh                   |  4 ++-
 scripts/sync_results.sh                       |  7 +++++
 .../run_stability_analysis/config.vsh.yaml    |  2 +-
 5 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/scripts/run_benchmark_test.sh b/scripts/run_benchmark_test.sh
index 21d61bee..ec3928b2 100755
--- a/scripts/run_benchmark_test.sh
+++ b/scripts/run_benchmark_test.sh
@@ -2,16 +2,24 @@
 
 export NXF_VER=23.04.2
 
-cat > /tmp/params.yaml << EOF
-id: neurips-2023-data
-de_train_h5ad: resources/neurips-2023-data/de_train.h5ad
-de_test_h5ad: resources/neurips-2023-data/de_test.h5ad
-id_map: resources/neurips-2023-data/id_map.csv
-method_ids: ['ground_truth', 'sample', 'mean_across_celltypes', 'mean_across_compounds']
-layer: t # test a different layer
-publish_dir: "output/test_run_benchmark"
-output_state: state.yaml
-EOF
+resources_dir="resources"
+publish_dir="output/test_run_benchmark"
+
+cat > /tmp/params.yaml << HERE
+param_list:
+  - id: neurips-2023-data
+    de_train_h5ad: "$resources_dir/neurips-2023-data/de_train.h5ad"
+    de_test_h5ad: "$resources_dir/neurips-2023-data/de_test.h5ad"
+    id_map: "$resources_dir/neurips-2023-data/id_map.csv"
+    layer: clipped_sign_log10_pval
+  - id: neurips-2023-kaggle
+    de_train_h5ad: "$resources_dir/neurips-2023-kaggle/de_train.h5ad"
+    de_test_h5ad: "$resources_dir/neurips-2023-kaggle/de_test.h5ad"
+    id_map: "$resources_dir/neurips-2023-kaggle/id_map.csv"
+    layer: sign_log10_pval
+output_state: "state.yaml"
+publish_dir: "$publish_dir"
+HERE
 
 nextflow run . \
   -main-script target/nextflow/workflows/run_benchmark/main.nf \
diff --git a/scripts/run_benchmark_tw.sh b/scripts/run_benchmark_tw.sh
index abc836ba..76f46b6e 100755
--- a/scripts/run_benchmark_tw.sh
+++ b/scripts/run_benchmark_tw.sh
@@ -1,30 +1,30 @@
 #!/bin/bash
 
 RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
+resources_dir="s3://openproblems-bio/public/neurips-2023-competition/workflow-resources"
 publish_dir="s3://openproblems-data/resources/dge_perturbation_prediction/results/${RUN_ID}"
 
 cat > /tmp/params.yaml << HERE
 param_list:
   - id: neurips-2023-data
-    de_train_h5ad: resources/neurips-2023-data/de_train.h5ad
-    de_test_h5ad: resources/neurips-2023-data/de_test.h5ad
-    id_map: resources/neurips-2023-data/id_map.csv
+    de_train_h5ad: "$resources_dir/neurips-2023-data/de_train.h5ad"
+    de_test_h5ad: "$resources_dir/neurips-2023-data/de_test.h5ad"
+    id_map: "$resources_dir/neurips-2023-data/id_map.csv"
     layer: clipped_sign_log10_pval
-  - id: neurips-2023-kaggle
-    de_train_h5ad: resources/neurips-2023-kaggle/de_train.h5ad
-    de_test_h5ad: resources/neurips-2023-kaggle/de_test.h5ad
-    id_map: resources/neurips-2023-kaggle/id_map.csv
-    layer: sign_log10_pval
+  # - id: neurips-2023-kaggle
+  #   de_train_h5ad: "$resources_dir/neurips-2023-kaggle/de_train.h5ad"
+  #   de_test_h5ad: "$resources_dir/neurips-2023-kaggle/de_test.h5ad"
+  #   id_map: "$resources_dir/neurips-2023-kaggle/id_map.csv"
+  #   layer: sign_log10_pval
 output_state: "state.yaml"
 publish_dir: "$publish_dir"
 HERE
 
 tw launch https://github.com/openproblems-bio/task-dge-perturbation-prediction.git \
-  --revision remove_clipped_build \
+  --revision main_build \
   --pull-latest \
   --main-script target/nextflow/workflows/run_benchmark/main.nf \
   --workspace 53907369739130 \
   --compute-env 6TeIFgV5OY4pJCk8I0bfOh \
   --params-file /tmp/params.yaml \
-  --entry-name auto \
   --config src/common/nextflow_helpers/labels_tw.config
diff --git a/scripts/run_stability_tw.sh b/scripts/run_stability_tw.sh
index 4b5c7ef6..3890fdf9 100755
--- a/scripts/run_stability_tw.sh
+++ b/scripts/run_stability_tw.sh
@@ -6,11 +6,13 @@ publish_dir="s3://openproblems-data/resources/dge_perturbation_prediction/result
 cat > /tmp/params.yaml << HERE
 id: neurips-2023-data
 sc_counts: s3://openproblems-bio/public/neurips-2023-competition/sc_counts_reannotated_with_counts.h5ad
+scores: stability_uns.yaml
+output_state: "state.yaml"
 publish_dir: "$publish_dir"
 HERE
 
 tw launch https://github.com/openproblems-bio/task-dge-perturbation-prediction.git \
-  --revision remove_clipped_build \
+  --revision main_build \
   --pull-latest \
   --main-script target/nextflow/workflows/run_stability_analysis/main.nf \
   --workspace 53907369739130 \
diff --git a/scripts/sync_results.sh b/scripts/sync_results.sh
index 04405114..a46623cb 100755
--- a/scripts/sync_results.sh
+++ b/scripts/sync_results.sh
@@ -5,7 +5,14 @@ aws s3 sync \
   output/benchmark_results/ \
   --delete --dryrun
 
+# sync back modified results
 aws s3 sync \
   output/benchmark_results/ \
   s3://openproblems-data/resources/dge_perturbation_prediction/results/ \
+  --delete --dryrun
+
+# sync one run
+runid=run_2024-06-01_00-03-09; aws s3 sync \
+  output/benchmark_results/${runid}/ \
+  s3://openproblems-data/resources/dge_perturbation_prediction/results/${runid}/ \
   --delete --dryrun
\ No newline at end of file
diff --git a/src/task/workflows/run_stability_analysis/config.vsh.yaml b/src/task/workflows/run_stability_analysis/config.vsh.yaml
index e530efcb..05a7d6fa 100644
--- a/src/task/workflows/run_stability_analysis/config.vsh.yaml
+++ b/src/task/workflows/run_stability_analysis/config.vsh.yaml
@@ -55,7 +55,7 @@ functionality:
           required: true
           direction: output
           description: A yaml file containing the scores of each of the methods
-          default: score_uns.yaml
+          default: stability_uns.yaml
     - name: Arguments
       arguments:
         - name: "--method_ids"