Skip to content

Commit

Permalink
Add clipping to layer (openproblems-bio#57)
Browse files Browse the repository at this point in the history
* add a clipped layer to the limma component

* add scripts

* fix labels
  • Loading branch information
rcannood authored May 31, 2024
1 parent 3addfba commit e8451aa
Show file tree
Hide file tree
Showing 8 changed files with 58 additions and 7 deletions.
21 changes: 21 additions & 0 deletions scripts/run_benchmark_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash

set -e

IN="resources"
OUT="output"

[[ ! -d "$OUT" ]] && mkdir -p "$OUT"

# run benchmark
# 'input_states' looks for state.yaml files corresponding to datasets
export NXF_VER=23.04.2

nextflow run . \
-main-script target/nextflow/workflows/run_benchmark/main.nf \
-profile docker \
-resume \
--publish_dir "$OUT" \
--output_state "state.yaml" \
-entry auto \
--input_states "$IN/**/state.yaml"
23 changes: 23 additions & 0 deletions scripts/run_layerclip_tw.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash

RUN_ID="layerclip_$(date +%Y-%m-%d_%H-%M-%S)"
publish_dir="s3://openproblems-data/resources/dge_perturbation_prediction/results/${RUN_ID}"

cat > /tmp/params.yaml << HERE
id: dge_perturbation_task
input_states: s3://openproblems-bio/public/neurips-2023-competition/workflow-resources/neurips-2023-data/state.yaml
output_state: "state.yaml"
publish_dir: "$publish_dir"
rename_keys: "de_train_h5ad:de_train_h5ad,de_test_h5ad:de_test_h5ad,id_map:id_map"
settings: '{"layer": "clipped_sign_log10_pval"}'
HERE

tw launch https://github.com/openproblems-bio/task-dge-perturbation-prediction.git \
--revision main_build \
--pull-latest \
--main-script target/nextflow/workflows/run_benchmark/main.nf \
--workspace 53907369739130 \
--compute-env 6TeIFgV5OY4pJCk8I0bfOh \
--params-file /tmp/params.yaml \
--entry-name auto \
--config src/common/nextflow_helpers/labels_tw.config
4 changes: 2 additions & 2 deletions scripts/run_tval_tw.sh → scripts/run_layert_tw.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

RUN_ID="tval_$(date +%Y-%m-%d_%H-%M-%S)"
RUN_ID="layert_$(date +%Y-%m-%d_%H-%M-%S)"
publish_dir="s3://openproblems-data/resources/dge_perturbation_prediction/results/${RUN_ID}"

cat > /tmp/params.yaml << HERE
Expand All @@ -13,7 +13,7 @@ settings: '{"layer": "t"}'
HERE

tw launch https://github.com/openproblems-bio/task-dge-perturbation-prediction.git \
--revision add_cell_obs_to_uns_build \
--revision main_build \
--pull-latest \
--main-script target/nextflow/workflows/run_benchmark/main.nf \
--workspace 53907369739130 \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ __merge__: ../../api/comp_control_method.yaml
functionality:
name: mean_across_celltypes
info:
label: Mean outcome for the cell type
label: Mean per cell type and gene
summary: Baseline method that returns mean of cell type's outcomes
description: |
Baseline method that predicts for a cell type the mean of its outcomes of all compounds.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ __merge__: ../../api/comp_control_method.yaml
functionality:
name: mean_across_compounds
info:
label: Mean outcome for the compound
label: Mean per compound and gene
summary: Baseline method that returns mean of compound's outcomes
description: |
Baseline method that predicts for a compound the mean of its outcomes of all samples.
Expand Down
2 changes: 1 addition & 1 deletion src/task/control_methods/mean_outcome/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ __merge__: ../../api/comp_control_method.yaml
functionality:
name: mean_outcome
info:
label: Mean outcome for a gene
label: Mean per gene
summary: Baseline method that returns mean of gene's outcomes
description: |
Baseline method that predicts for a gene the mean of its outcomes of all samples.
Expand Down
5 changes: 5 additions & 0 deletions src/task/process_dataset/run_limma/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ functionality:
type: double
required: false
default: 0.05
- name: --clipping_cutoff
type: double
required: false
default: 0.0001
description: Clip the log p-values between log10(clip) and -log10(clip)
- name: --control_compound
type: string
required: false
Expand Down
6 changes: 4 additions & 2 deletions src/task/process_dataset/run_limma/script.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,9 @@ de_df2 <- de_df %>%
sign_log10_adj_pval = sign(logFC) * -log10(ifelse(adj.P.Value == 0, .Machine$double.eps, adj.P.Value)),
# determine if gene is DE
is_de = P.Value < par$de_sig_cutoff,
is_de_adj = adj.P.Value < par$de_sig_cutoff
is_de_adj = adj.P.Value < par$de_sig_cutoff,
# compute clipped sign fc × log10 p-values
clipped_sign_log10_pval = sign(logFC) * -log10(pmax(par$clipping_cutoff, P.Value)),
) %>%
as_tibble()

Expand All @@ -122,7 +124,7 @@ rownames(new_obs) <- paste0(new_obs$cell_type, ", ", new_obs$sm_name)
new_var <- data.frame(row.names = levels(de_df2$gene))

# create layers from de_df
layer_names <- c("is_de", "is_de_adj", "logFC", "AveExpr", "t", "P.Value", "adj.P.Value", "B", "sign_log10_adj_pval", "sign_log10_pval")
layer_names <- c("is_de", "is_de_adj", "logFC", "AveExpr", "t", "P.Value", "adj.P.Value", "B", "sign_log10_adj_pval", "sign_log10_pval", "clipped_sign_log10_pval")
layers <- map(setNames(layer_names, layer_names), function(layer_name) {
de_df2 %>%
select(gene, row_i, !!layer_name) %>%
Expand Down

0 comments on commit e8451aa

Please sign in to comment.