Skip to content

Commit

Permalink
add cell obs to uns (openproblems-bio#51)
Browse files Browse the repository at this point in the history
* update jn_ap_op2

* add cell obs to uns

* fix script

* refactor transformer_ensemble and convert_h5ad_to_parquet

* update scape

* update pyboost

* fix methods

* fix control methods

* fix test

* fix baselines

* fix api and wfs

* fix methods and metrics

* fix components

* add layer argument to metrics as well

* add layer argument to benchmark wf

* remvoe outdated input

* add t statistic to output table

* fixes

* demote lstm gpu usage

* fix code

* fix previous commit

* fix workflow

* move workflow

* switch to midgpu

* simplify description

* fix api

* fix wf

* add more helpful messages

* fix prediction extension

* remove references to parquet

* allow specifying different test and prediction layers

* add gene resolution strategy

* try to parallellize with nn.DataParallel

* fix args

* create nextflow workflow for lgc ensemble

* fix model names

* fix config

* enable lazy loading for the models

* revert `load_trained_models` function
  • Loading branch information
rcannood authored May 29, 2024
1 parent 5934a85 commit 1f6afe2
Show file tree
Hide file tree
Showing 73 changed files with 1,999 additions and 1,166 deletions.
6 changes: 3 additions & 3 deletions scripts/add_a_method.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,18 @@ viash run src/task/methods/$method_id/config.vsh.yaml -- \
viash run src/task/methods/$method_id/config.vsh.yaml -- \
--de_train "resources/neurips-2023-kaggle/de_train.parquet" \
--id_map "resources/neurips-2023-kaggle/id_map.csv" \
--output "output/prediction.parquet"
--output "output/prediction.h5ad"

# run the method (using h5ad as input)
viash run src/task/methods/$method_id/config.vsh.yaml -- \
--de_train_h5ad "resources/neurips-2023-kaggle/2023-09-12_de_by_cell_type_train.h5ad" \
--id_map "resources/neurips-2023-kaggle/id_map.csv" \
--output "output/prediction.parquet"
--output "output/prediction.h5ad"

# run evaluation metric
viash run src/task/metrics/mean_rowwise_error/config.vsh.yaml -- \
--de_test "resources/neurips-2023-kaggle/de_test.parquet" \
--prediction "output/prediction.parquet" \
--prediction "output/prediction.h5ad" \
--output "output/score.h5ad"

# print score on kaggle test dataset
Expand Down
14 changes: 5 additions & 9 deletions scripts/generate_kaggle_resources.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@ python -c \
viash run src/task/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml -- \
--input_train "$OUT/2023-09-12_de_by_cell_type_train.h5ad" \
--input_test "$OUT/2023-09-12_de_by_cell_type_test.h5ad" \
--output_train "$OUT/de_train.parquet" \
--input_single_cell_h5ad "resources/neurips-2023-raw/sc_counts.h5ad" \
--output_train_h5ad "$OUT/de_train.h5ad" \
--output_test "$OUT/de_test.parquet" \
--output_test_h5ad "$OUT/de_test.h5ad" \
--output_id_map "$OUT/id_map.csv" \
--dataset_id neurips-2023-kaggle \
Expand All @@ -33,22 +32,19 @@ viash run src/task/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yam

echo ">> Run method"
viash run src/task/control_methods/sample/config.vsh.yaml -- \
--de_train "$OUT/de_train.parquet" \
--de_test "$OUT/de_test.parquet" \
--de_train_h5ad "$OUT/de_train.h5ad" \
--de_test_h5ad "$OUT/de_test.h5ad" \
--id_map "$OUT/id_map.csv" \
--output "$OUT/prediction.parquet"
--output "$OUT/prediction.h5ad"

echo ">> Run metric"
viash run src/task/metrics/mean_rowwise_error/config.vsh.yaml -- \
--prediction "$OUT/prediction.parquet" \
--method_id "sample" \
--prediction "$OUT/prediction.h5ad" \
--de_test_h5ad "$OUT/de_test.h5ad" \
--output "$OUT/score.h5ad"

cat > "$OUT/state.yaml" <<'EOF'
id: neurips-2023-kaggle
de_train: !file de_train.parquet
de_test: !file de_test.parquet
de_train_h5ad: !file de_train.h5ad
de_test_h5ad: !file de_test.h5ad
id_map: !file id_map.csv
Expand Down
11 changes: 5 additions & 6 deletions scripts/generate_resources.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ fi

echo ">> Running 'process_dataset' workflow"
nextflow run \
target/nextflow/process_dataset/workflow/main.nf \
target/nextflow/workflows/process_dataset/main.nf \
-profile docker \
-resume \
--id neurips-2023-data \
Expand All @@ -33,15 +33,14 @@ nextflow run \

echo ">> Run method"
viash run src/task/control_methods/sample/config.vsh.yaml -- \
--de_train "$OUT/de_train.parquet" \
--de_test "$OUT/de_test.parquet" \
--de_train_h5ad "$OUT/de_train.h5ad" \
--de_test_h5ad "$OUT/de_test.h5ad" \
--id_map "$OUT/id_map.csv" \
--output "$OUT/prediction.parquet"
--output "$OUT/prediction.h5ad"

echo ">> Run metric"
viash run src/task/metrics/mean_rowwise_error/config.vsh.yaml -- \
--prediction "$OUT/prediction.parquet" \
--method_id "sample" \
--prediction "$OUT/prediction.h5ad" \
--de_test_h5ad "$OUT/de_test.h5ad" \
--output "$OUT/score.h5ad"

Expand Down
11 changes: 6 additions & 5 deletions src/common/nextflow_helpers/labels_tw.config
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,20 @@ process {
containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
( workflow.containerEngine == "docker" ? '--gpus all': null ) }
}
withLabel: midgpu {
cpus = 32
withLabel: midgpu { // aiming for g4dn.12xlarge
memory = 150.GB
cpus = 40
accelerator = 4
containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
( workflow.containerEngine == "docker" ? '--gpus all': null ) }
}
withLabel: highgpu {
cpus = 64
withLabel: highgpu { // aiming for g4dn.metal
memory = 300.GB
cpus = 80
accelerator = 8
containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
( workflow.containerEngine == "docker" ? '--gpus all': null ) }
}

// make sure publishstates gets enough disk space and memory
withName:'.*publishStatesProc' {
memory = '16GB'
Expand Down
15 changes: 10 additions & 5 deletions src/task/api/comp_control_method.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,23 @@ functionality:
description: |
A control method to predict perturbation effects.
arguments:
- name: --de_train
__merge__: file_de_train_parquet.yaml
required: true
- name: --de_train_h5ad
__merge__: file_de_train_h5ad.yaml
required: false
direction: input
- name: --de_test
__merge__: file_de_test_parquet.yaml
- name: --de_test_h5ad
__merge__: file_de_test_h5ad.yaml
required: true
direction: input
- name: --id_map
__merge__: file_id_map.yaml
required: true
direction: input
- name: --layer
type: string
direction: input
default: sign_log10_pval
description: Which layer to use for prediction.
- name: --output
__merge__: file_prediction.yaml
required: true
Expand Down
32 changes: 1 addition & 31 deletions src/task/api/comp_method.yaml
Original file line number Diff line number Diff line change
@@ -1,35 +1,5 @@
__merge__: comp_method_notest.yaml
functionality:
namespace: "methods"
info:
type: method
type_info:
label: Method
summary: A regression method.
description: |
A regression method to predict the expression of one modality from another.
arguments:
- name: --de_train
__merge__: file_de_train_parquet.yaml
required: false
direction: input
- name: --de_train_h5ad
__merge__: file_de_train_h5ad.yaml
required: false
direction: input
- name: --id_map
__merge__: file_id_map.yaml
required: true
direction: input
- name: --output
__merge__: file_prediction.yaml
required: true
direction: output
- name: "--output_model"
type: "file"
description: "Optional model output. If no value is passed, the model will be removed at the end of the run."
direction: output
required: false
must_exist: false
test_resources:
- type: python_script
path: /src/common/component_tests/run_and_check_output.py
Expand Down
33 changes: 33 additions & 0 deletions src/task/api/comp_method_notest.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
functionality:
namespace: "methods"
info:
type: method
type_info:
label: Method
summary: A regression method.
description: |
A regression method to predict the expression of one modality from another.
arguments:
- name: --de_train_h5ad
__merge__: file_de_train_h5ad.yaml
required: false
direction: input
- name: --id_map
__merge__: file_id_map.yaml
required: true
direction: input
- name: --layer
type: string
direction: input
default: sign_log10_pval
description: Which layer to use for prediction.
- name: --output
__merge__: file_prediction.yaml
required: true
direction: output
- name: "--output_model"
type: "file"
description: "Optional model output. If no value is passed, the model will be removed at the end of the run."
direction: output
required: false
must_exist: false
36 changes: 0 additions & 36 deletions src/task/api/comp_method_run.yaml

This file was deleted.

32 changes: 0 additions & 32 deletions src/task/api/comp_method_train.yaml

This file was deleted.

19 changes: 15 additions & 4 deletions src/task/api/comp_metric.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,31 @@ functionality:
__merge__: file_de_test_h5ad.yaml
required: true
direction: input
- name: --de_test_layer
type: string
direction: input
default: sign_log10_pval
description: In which layer to find the DE data.
- name: --prediction
__merge__: file_prediction.yaml
required: true
direction: input
- name: --method_id
- name: --prediction_layer
type: string
required: true
direction: input
info:
test_default: test
default: prediction
description: In which layer to find the predicted DE data.
- name: --output
__merge__: file_score.yaml
direction: output
required: true
- name: --resolve_genes
type: string
direction: input
default: de_test
choices: [de_test, intersection]
description: |
How to resolve difference in genes between the two datasets.
test_resources:
- type: python_script
path: /src/common/component_tests/run_and_check_output.py
Expand Down
11 changes: 0 additions & 11 deletions src/task/api/comp_process_dataset.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
functionality:
namespace: "process_dataset"
info:
type: process_dataset
type_info:
Expand All @@ -12,16 +11,6 @@ functionality:
__merge__: file_sc_counts.yaml
required: true
direction: input
- name: --de_train
__merge__: file_de_train_parquet.yaml
required: true
direction: output
default: de_train.parquet
- name: --de_test
__merge__: file_de_test_parquet.yaml
required: true
direction: output
default: de_test.parquet
- name: --de_train_h5ad
__merge__: file_de_train_h5ad.yaml
required: true
Expand Down
25 changes: 21 additions & 4 deletions src/task/api/file_de_test_h5ad.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,18 @@ info:
description: "Boolean indicating whether this instance was used as a control."
required: true
layers:
- name: logFC
type: double
description: "Log fold change of the differential expression test"
required: true
- name: AveExpr
type: double
description: "Average expression of the differential expression test"
required: false
- name: t
type: double
description: "T-statistic of the differential expression test"
required: false
- name: P.Value
type: double
description: "P-value of the differential expression test"
Expand All @@ -49,6 +61,10 @@ info:
type: double
description: "Adjusted P-value of the differential expression test"
required: true
- name: B
type: double
description: "B-statistic of the differential expression test"
required: false
- name: is_de
type: boolean
description: "Whether the gene is differentially expressed"
Expand All @@ -57,10 +73,6 @@ info:
type: boolean
description: "Whether the gene is differentially expressed after adjustment"
required: true
- name: logFC
type: double
description: "Log fold change of the differential expression test"
required: true
- name: sign_log10_pval
type: double
description: |
Expand Down Expand Up @@ -100,3 +112,8 @@ info:
description: The organism of the sample in the dataset.
required: false
multiple: true
- name: single_cell_obs
type: dataframe
description: |
A dataframe with the cell-level metadata.
required: true
Loading

0 comments on commit 1f6afe2

Please sign in to comment.