-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement alternate stability analysis (#58)
* add sc_counts bootstrap method * remove stability wf * add novel stability wf * fix name * also passthrough layer argument * turn r component into python component * fix for list bug * simplify wf * fix wf
- Loading branch information
Showing
7 changed files
with
259 additions
and
144 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
46 changes: 46 additions & 0 deletions
46
src/task/process_dataset/bootstrap_sc_counts/config.vsh.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
functionality: | ||
name: bootstrap_sc_counts | ||
namespace: process_dataset | ||
info: | ||
type: process_dataset | ||
type_info: | ||
label: Bootstrap | ||
summary: A component to generate bootstraps of a dataset. | ||
description: | | ||
This component generates bootstraps of a dataset. It takes as input a parquet file and an h5ad file and generates bootstraps of the dataset. The bootstraps are saved as parquet and h5ad files. | ||
argument_groups: | ||
- name: Inputs | ||
arguments: | ||
- name: --input | ||
type: file | ||
required: true | ||
direction: input | ||
example: resources/neurips-2023-raw/sc_counts_reannotated_with_counts.h5ad | ||
- name: Outputs | ||
arguments: | ||
- name: --output | ||
type: file | ||
required: true | ||
direction: output | ||
example: sc_counts_bootstrap.h5ad | ||
- name: Arguments | ||
arguments: | ||
- name: --obs_fraction | ||
type: double | ||
required: true | ||
default: 0.95 | ||
description: Fraction of the obs of the sc_counts to include in each bootstrap. | ||
- name: --var_fraction | ||
type: double | ||
required: true | ||
default: 0.95 | ||
description: Fraction of the var of the sc_counts to include in each bootstrap. | ||
resources: | ||
- type: python_script | ||
path: script.py | ||
platforms: | ||
- type: docker | ||
image: ghcr.io/openproblems-bio/base_python:1.0.4 | ||
- type: nextflow | ||
directives: | ||
label: [ midtime, highmem, midcpu ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import anndata as ad | ||
import numpy as np | ||
|
||
# VIASH START | ||
par = { | ||
"input": "resources/neurips-2023-raw/sc_counts_reannotated_with_counts.h5ad", | ||
"output": "output/sc_counts_bootstrapped_*.h5ad", | ||
"obs_fraction": 0.95, | ||
"var_fraction": 1 | ||
} | ||
# VIASH END | ||
|
||
# Load data | ||
input_data = ad.read_h5ad(par["input"]) | ||
|
||
# Sample indices | ||
obs_ix = np.random.choice( | ||
input_data.obs_names, | ||
int(input_data.n_obs * par["obs_fraction"]), | ||
replace=False | ||
) | ||
var_ix = np.random.choice( | ||
input_data.var_names, | ||
int(input_data.n_vars * par["var_fraction"]), | ||
replace=False | ||
) | ||
|
||
# Subset AnnData object | ||
output_data = input_data[obs_ix, var_ix].copy() | ||
|
||
# Write output | ||
output_data.write_h5ad(par["output"], compression="gzip") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
functionality: | ||
name: "run_stability_analysis" | ||
namespace: "workflows" | ||
argument_groups: | ||
- name: Inputs | ||
arguments: | ||
- name: --sc_counts | ||
__merge__: ../../api/file_sc_counts.yaml | ||
required: true | ||
direction: input | ||
- name: "--id" | ||
type: string | ||
description: Unique identifier of the dataset. | ||
required: true | ||
- name: --layer | ||
type: string | ||
direction: input | ||
default: sign_log10_pval | ||
description: Which layer to use for prediction and evaluation. | ||
- name: Bootstrapping arguments | ||
description: Define the sampling strategy for the stability analysis. | ||
arguments: | ||
- name: --bootstrap_num_replicates | ||
type: integer | ||
required: true | ||
default: 10 | ||
description: Number of bootstraps to generate. | ||
- name: --bootstrap_obs_fraction | ||
type: double | ||
required: true | ||
default: 0.95 | ||
description: Fraction of the obs of the sc_counts to include in each bootstrap. | ||
- name: --bootstrap_var_fraction | ||
type: double | ||
required: true | ||
default: 1 | ||
description: Fraction of the var of the sc_counts to include in each bootstrap. | ||
- name: Outputs | ||
arguments: | ||
- name: "--scores" | ||
type: file | ||
required: true | ||
direction: output | ||
description: A yaml file containing the scores of each of the methods | ||
default: score_uns.yaml | ||
- name: Arguments | ||
arguments: | ||
- name: "--method_ids" | ||
type: string | ||
multiple: true | ||
description: A list of method ids to run. If not specified, all methods will be run. | ||
- name: "--metric_ids" | ||
type: string | ||
multiple: true | ||
description: A list of metric ids to run. If not specified, all metric will be run. | ||
resources: | ||
- type: nextflow_script | ||
path: main.nf | ||
entrypoint: run_wf | ||
- type: file | ||
path: "../../api/task_info.yaml" | ||
dependencies: | ||
- name: process_dataset/bootstrap_sc_counts | ||
- name: workflows/process_dataset | ||
- name: workflows/run_benchmark | ||
repositories: | ||
- name: openproblemsv2 | ||
type: github | ||
repo: openproblems-bio/openproblems-v2 | ||
tag: main_build | ||
platforms: | ||
- type: nextflow | ||
config: | ||
script: | | ||
process.errorStrategy = 'ignore' | ||
trace { | ||
enabled = true | ||
overwrite = true | ||
file = "${params.publish_dir}/trace.txt" | ||
} |
Oops, something went wrong.