From 9313317ab0269002d3eaf1d325e6bb30db714e77 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 4 Jun 2024 08:28:58 +0200 Subject: [PATCH] fix scripts (#63) * fix scripts * fix script * don't run with the original kaggle dataset by default * update * override output filenames --- scripts/run_benchmark_test.sh | 28 ++++++++++++------- scripts/run_benchmark_tw.sh | 20 ++++++------- scripts/run_stability_tw.sh | 4 ++- scripts/sync_results.sh | 7 +++++ .../run_stability_analysis/config.vsh.yaml | 2 +- 5 files changed, 39 insertions(+), 22 deletions(-) diff --git a/scripts/run_benchmark_test.sh b/scripts/run_benchmark_test.sh index 21d61bee..ec3928b2 100755 --- a/scripts/run_benchmark_test.sh +++ b/scripts/run_benchmark_test.sh @@ -2,16 +2,24 @@ export NXF_VER=23.04.2 -cat > /tmp/params.yaml << EOF -id: neurips-2023-data -de_train_h5ad: resources/neurips-2023-data/de_train.h5ad -de_test_h5ad: resources/neurips-2023-data/de_test.h5ad -id_map: resources/neurips-2023-data/id_map.csv -method_ids: ['ground_truth', 'sample', 'mean_across_celltypes', 'mean_across_compounds'] -layer: t # test a different layer -publish_dir: "output/test_run_benchmark" -output_state: state.yaml -EOF +resources_dir="resources" +publish_dir="output/test_run_benchmark" + +cat > /tmp/params.yaml << HERE +param_list: + - id: neurips-2023-data + de_train_h5ad: "$resources_dir/neurips-2023-data/de_train.h5ad" + de_test_h5ad: "$resources_dir/neurips-2023-data/de_test.h5ad" + id_map: "$resources_dir/neurips-2023-data/id_map.csv" + layer: clipped_sign_log10_pval + - id: neurips-2023-kaggle + de_train_h5ad: "$resources_dir/neurips-2023-kaggle/de_train.h5ad" + de_test_h5ad: "$resources_dir/neurips-2023-kaggle/de_test.h5ad" + id_map: "$resources_dir/neurips-2023-kaggle/id_map.csv" + layer: sign_log10_pval +output_state: "state.yaml" +publish_dir: "$publish_dir" +HERE nextflow run . \ -main-script target/nextflow/workflows/run_benchmark/main.nf \ diff --git a/scripts/run_benchmark_tw.sh b/scripts/run_benchmark_tw.sh index abc836ba..76f46b6e 100755 --- a/scripts/run_benchmark_tw.sh +++ b/scripts/run_benchmark_tw.sh @@ -1,30 +1,30 @@ #!/bin/bash RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)" +resources_dir="s3://openproblems-bio/public/neurips-2023-competition/workflow-resources" publish_dir="s3://openproblems-data/resources/dge_perturbation_prediction/results/${RUN_ID}" cat > /tmp/params.yaml << HERE param_list: - id: neurips-2023-data - de_train_h5ad: resources/neurips-2023-data/de_train.h5ad - de_test_h5ad: resources/neurips-2023-data/de_test.h5ad - id_map: resources/neurips-2023-data/id_map.csv + de_train_h5ad: "$resources_dir/neurips-2023-data/de_train.h5ad" + de_test_h5ad: "$resources_dir/neurips-2023-data/de_test.h5ad" + id_map: "$resources_dir/neurips-2023-data/id_map.csv" layer: clipped_sign_log10_pval - - id: neurips-2023-kaggle - de_train_h5ad: resources/neurips-2023-kaggle/de_train.h5ad - de_test_h5ad: resources/neurips-2023-kaggle/de_test.h5ad - id_map: resources/neurips-2023-kaggle/id_map.csv - layer: sign_log10_pval + # - id: neurips-2023-kaggle + # de_train_h5ad: "$resources_dir/neurips-2023-kaggle/de_train.h5ad" + # de_test_h5ad: "$resources_dir/neurips-2023-kaggle/de_test.h5ad" + # id_map: "$resources_dir/neurips-2023-kaggle/id_map.csv" + # layer: sign_log10_pval output_state: "state.yaml" publish_dir: "$publish_dir" HERE tw launch https://github.com/openproblems-bio/task-dge-perturbation-prediction.git \ - --revision remove_clipped_build \ + --revision main_build \ --pull-latest \ --main-script target/nextflow/workflows/run_benchmark/main.nf \ --workspace 53907369739130 \ --compute-env 6TeIFgV5OY4pJCk8I0bfOh \ --params-file /tmp/params.yaml \ - --entry-name auto \ --config src/common/nextflow_helpers/labels_tw.config diff --git a/scripts/run_stability_tw.sh b/scripts/run_stability_tw.sh index 4b5c7ef6..3890fdf9 100755 --- a/scripts/run_stability_tw.sh +++ b/scripts/run_stability_tw.sh @@ -6,11 +6,13 @@ publish_dir="s3://openproblems-data/resources/dge_perturbation_prediction/result cat > /tmp/params.yaml << HERE id: neurips-2023-data sc_counts: s3://openproblems-bio/public/neurips-2023-competition/sc_counts_reannotated_with_counts.h5ad +scores: stability_uns.yaml +output_state: "state.yaml" publish_dir: "$publish_dir" HERE tw launch https://github.com/openproblems-bio/task-dge-perturbation-prediction.git \ - --revision remove_clipped_build \ + --revision main_build \ --pull-latest \ --main-script target/nextflow/workflows/run_stability_analysis/main.nf \ --workspace 53907369739130 \ diff --git a/scripts/sync_results.sh b/scripts/sync_results.sh index 04405114..a46623cb 100755 --- a/scripts/sync_results.sh +++ b/scripts/sync_results.sh @@ -5,7 +5,14 @@ aws s3 sync \ output/benchmark_results/ \ --delete --dryrun +# sync back modified results aws s3 sync \ output/benchmark_results/ \ s3://openproblems-data/resources/dge_perturbation_prediction/results/ \ + --delete --dryrun + +# sync one run +runid=run_2024-06-01_00-03-09; aws s3 sync \ + output/benchmark_results/${runid}/ \ + s3://openproblems-data/resources/dge_perturbation_prediction/results/${runid}/ \ --delete --dryrun \ No newline at end of file diff --git a/src/task/workflows/run_stability_analysis/config.vsh.yaml b/src/task/workflows/run_stability_analysis/config.vsh.yaml index e530efcb..05a7d6fa 100644 --- a/src/task/workflows/run_stability_analysis/config.vsh.yaml +++ b/src/task/workflows/run_stability_analysis/config.vsh.yaml @@ -55,7 +55,7 @@ functionality: required: true direction: output description: A yaml file containing the scores of each of the methods - default: score_uns.yaml + default: stability_uns.yaml - name: Arguments arguments: - name: "--method_ids"