From 8a613812c05774713df0e96cd9d58f8afd0e0717 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 5 Jun 2024 00:52:11 +0200 Subject: [PATCH] Rename `task-dge-perturbation-prediction` to `task_perturbation_prediction` (#66) * update task info * update project config * rename task * move files * add namespace to api * update readme * update spec * simplify component --- CHANGELOG.md | 12 +- README.md | 287 ++++++++++-------- _viash.yaml | 4 +- scripts/add_a_method.sh | 12 +- scripts/generate_kaggle_resources.sh | 6 +- scripts/generate_resources.sh | 4 +- scripts/render_readme.sh | 6 +- scripts/run_benchmark_tw.sh | 4 +- scripts/run_benchmark_tw_traens.sh | 6 +- scripts/run_layert_tw.sh | 4 +- scripts/run_stability_tw.sh | 4 +- scripts/sync_results.sh | 8 +- src/{task => }/api/comp_control_method.yaml | 0 src/{task => }/api/comp_method.yaml | 0 src/{task => }/api/comp_method_notest.yaml | 3 +- src/{task => }/api/comp_metric.yaml | 0 src/{task => }/api/comp_process_dataset.yaml | 4 +- src/{task => }/api/file_de_test_h5ad.yaml | 0 src/{task => }/api/file_de_train_h5ad.yaml | 0 src/{task => }/api/file_id_map.yaml | 0 src/api/file_model.yaml | 6 + src/{task => }/api/file_prediction.yaml | 0 src/{task => }/api/file_sc_counts.yaml | 0 src/{task => }/api/file_score.yaml | 0 src/{task => }/api/task_info.yaml | 48 ++- src/common/create_component/config.vsh.yaml | 4 +- src/common/create_component/script.py | 6 +- .../ground_truth/config.vsh.yaml | 0 .../control_methods/ground_truth/script.R | 0 .../mean_across_celltypes/config.vsh.yaml | 0 .../mean_across_celltypes/script.py | 0 .../mean_across_compounds/config.vsh.yaml | 0 .../mean_across_compounds/script.py | 0 .../mean_outcome/config.vsh.yaml | 0 .../control_methods/mean_outcome/script.py | 0 .../control_methods/sample/config.vsh.yaml | 0 .../control_methods/sample/script.R | 0 .../control_methods/zeros/config.vsh.yaml | 0 .../control_methods/zeros/script.py | 0 .../methods/jn_ap_op2/config.vsh.yaml | 1 - src/{task => }/methods/jn_ap_op2/helper.py | 0 src/{task => }/methods/jn_ap_op2/script.py | 10 +- .../methods/lgc_ensemble/config.vsh.yaml | 0 src/{task => }/methods/lgc_ensemble/main.nf | 0 src/{task => }/methods/lgc_ensemble/test.sh | 0 .../lgc_ensemble_direct/config.vsh.yaml | 0 .../methods/lgc_ensemble_direct/script.py | 2 +- .../lgc_ensemble_helpers/divisor_finder.py | 0 .../lgc_ensemble_helpers/helper_classes.py | 0 .../lgc_ensemble_helpers/helper_functions.py | 0 .../methods/lgc_ensemble_helpers/models.py | 0 .../methods/lgc_ensemble_helpers/predict.py | 0 .../lgc_ensemble_helpers/prepare_data.py | 0 .../methods/lgc_ensemble_helpers/train.py | 0 .../lgc_ensemble_predict/config.vsh.yaml | 0 .../methods/lgc_ensemble_predict/script.py | 0 .../lgc_ensemble_prepare/config.vsh.yaml | 0 .../methods/lgc_ensemble_prepare/script.py | 6 +- .../lgc_ensemble_train/config.vsh.yaml | 0 .../methods/lgc_ensemble_train/script.py | 4 +- .../config.vsh.yaml | 0 .../notebook_264.py | 0 .../notebook_266.py | 0 .../nn_retraining_with_pseudolabels/script.py | 2 +- .../methods/pyboost/config.vsh.yaml | 0 src/{task => }/methods/pyboost/helper.py | 0 src/{task => }/methods/pyboost/script.py | 2 +- src/{task => }/methods/scape/config.vsh.yaml | 0 src/{task => }/methods/scape/script.py | 0 .../transformer_ensemble/config.vsh.yaml | 0 .../methods/transformer_ensemble/models.py | 0 .../methods/transformer_ensemble/script.py | 2 +- .../methods/transformer_ensemble/train.py | 0 .../methods/transformer_ensemble/utils.py | 0 .../mean_rowwise_correlation/config.vsh.yaml | 0 .../metrics/mean_rowwise_correlation/script.R | 0 .../mean_rowwise_error/config.vsh.yaml | 0 .../metrics/mean_rowwise_error/script.R | 0 .../add_uns_metadata/config.vsh.yaml | 3 +- .../add_uns_metadata/script.py | 0 .../process_dataset/bootstrap/config.vsh.yaml | 1 - .../process_dataset/bootstrap/script.py | 0 .../clean_pseudobulk/config.vsh.yaml | 1 - .../process_dataset/clean_pseudobulk/script.R | 0 .../compute_pseudobulk/config.vsh.yaml | 3 +- .../compute_pseudobulk/script.py | 0 .../convert_h5ad_to_parquet/config.vsh.yaml | 1 - .../convert_h5ad_to_parquet/script.py | 0 .../config.vsh.yaml | 1 - .../convert_kaggle_h5ad_to_parquet/script.py | 0 .../filter_obs/config.vsh.yaml | 1 - .../process_dataset/filter_obs/script.R | 0 .../generate_id_map/config.vsh.yaml | 1 - .../process_dataset/generate_id_map/script.py | 0 .../process_dataset/run_limma/config.vsh.yaml | 3 +- .../process_dataset/run_limma/script.R | 0 src/{task => }/utils/anndata_to_dataframe.py | 0 .../workflows/process_dataset/config.vsh.yaml | 0 .../workflows/process_dataset/main.nf | 0 .../workflows/run_benchmark/config.vsh.yaml | 0 .../workflows/run_benchmark/main.nf | 0 .../run_stability_analysis/config.vsh.yaml | 0 .../workflows/run_stability_analysis/main.nf | 0 103 files changed, 272 insertions(+), 200 deletions(-) rename src/{task => }/api/comp_control_method.yaml (100%) rename src/{task => }/api/comp_method.yaml (100%) rename src/{task => }/api/comp_method_notest.yaml (85%) rename src/{task => }/api/comp_metric.yaml (100%) rename src/{task => }/api/comp_process_dataset.yaml (92%) rename src/{task => }/api/file_de_test_h5ad.yaml (100%) rename src/{task => }/api/file_de_train_h5ad.yaml (100%) rename src/{task => }/api/file_id_map.yaml (100%) create mode 100644 src/api/file_model.yaml rename src/{task => }/api/file_prediction.yaml (100%) rename src/{task => }/api/file_sc_counts.yaml (100%) rename src/{task => }/api/file_score.yaml (100%) rename src/{task => }/api/task_info.yaml (81%) rename src/{task => }/control_methods/ground_truth/config.vsh.yaml (100%) rename src/{task => }/control_methods/ground_truth/script.R (100%) rename src/{task => }/control_methods/mean_across_celltypes/config.vsh.yaml (100%) rename src/{task => }/control_methods/mean_across_celltypes/script.py (100%) rename src/{task => }/control_methods/mean_across_compounds/config.vsh.yaml (100%) rename src/{task => }/control_methods/mean_across_compounds/script.py (100%) rename src/{task => }/control_methods/mean_outcome/config.vsh.yaml (100%) rename src/{task => }/control_methods/mean_outcome/script.py (100%) rename src/{task => }/control_methods/sample/config.vsh.yaml (100%) rename src/{task => }/control_methods/sample/script.R (100%) rename src/{task => }/control_methods/zeros/config.vsh.yaml (100%) rename src/{task => }/control_methods/zeros/script.py (100%) rename src/{task => }/methods/jn_ap_op2/config.vsh.yaml (97%) rename src/{task => }/methods/jn_ap_op2/helper.py (100%) rename src/{task => }/methods/jn_ap_op2/script.py (90%) rename src/{task => }/methods/lgc_ensemble/config.vsh.yaml (100%) rename src/{task => }/methods/lgc_ensemble/main.nf (100%) rename src/{task => }/methods/lgc_ensemble/test.sh (100%) rename src/{task => }/methods/lgc_ensemble_direct/config.vsh.yaml (100%) rename src/{task => }/methods/lgc_ensemble_direct/script.py (96%) rename src/{task => }/methods/lgc_ensemble_helpers/divisor_finder.py (100%) rename src/{task => }/methods/lgc_ensemble_helpers/helper_classes.py (100%) rename src/{task => }/methods/lgc_ensemble_helpers/helper_functions.py (100%) rename src/{task => }/methods/lgc_ensemble_helpers/models.py (100%) rename src/{task => }/methods/lgc_ensemble_helpers/predict.py (100%) rename src/{task => }/methods/lgc_ensemble_helpers/prepare_data.py (100%) rename src/{task => }/methods/lgc_ensemble_helpers/train.py (100%) rename src/{task => }/methods/lgc_ensemble_predict/config.vsh.yaml (100%) rename src/{task => }/methods/lgc_ensemble_predict/script.py (100%) rename src/{task => }/methods/lgc_ensemble_prepare/config.vsh.yaml (100%) rename src/{task => }/methods/lgc_ensemble_prepare/script.py (97%) rename src/{task => }/methods/lgc_ensemble_train/config.vsh.yaml (100%) rename src/{task => }/methods/lgc_ensemble_train/script.py (94%) rename src/{task => }/methods/nn_retraining_with_pseudolabels/config.vsh.yaml (100%) rename src/{task => }/methods/nn_retraining_with_pseudolabels/notebook_264.py (100%) rename src/{task => }/methods/nn_retraining_with_pseudolabels/notebook_266.py (100%) rename src/{task => }/methods/nn_retraining_with_pseudolabels/script.py (96%) rename src/{task => }/methods/pyboost/config.vsh.yaml (100%) rename src/{task => }/methods/pyboost/helper.py (100%) rename src/{task => }/methods/pyboost/script.py (98%) rename src/{task => }/methods/scape/config.vsh.yaml (100%) rename src/{task => }/methods/scape/script.py (100%) rename src/{task => }/methods/transformer_ensemble/config.vsh.yaml (100%) rename src/{task => }/methods/transformer_ensemble/models.py (100%) rename src/{task => }/methods/transformer_ensemble/script.py (98%) rename src/{task => }/methods/transformer_ensemble/train.py (100%) rename src/{task => }/methods/transformer_ensemble/utils.py (100%) rename src/{task => }/metrics/mean_rowwise_correlation/config.vsh.yaml (100%) rename src/{task => }/metrics/mean_rowwise_correlation/script.R (100%) rename src/{task => }/metrics/mean_rowwise_error/config.vsh.yaml (100%) rename src/{task => }/metrics/mean_rowwise_error/script.R (100%) rename src/{task => }/process_dataset/add_uns_metadata/config.vsh.yaml (96%) rename src/{task => }/process_dataset/add_uns_metadata/script.py (100%) rename src/{task => }/process_dataset/bootstrap/config.vsh.yaml (98%) rename src/{task => }/process_dataset/bootstrap/script.py (100%) rename src/{task => }/process_dataset/clean_pseudobulk/config.vsh.yaml (96%) rename src/{task => }/process_dataset/clean_pseudobulk/script.R (100%) rename src/{task => }/process_dataset/compute_pseudobulk/config.vsh.yaml (89%) rename src/{task => }/process_dataset/compute_pseudobulk/script.py (100%) rename src/{task => }/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml (97%) rename src/{task => }/process_dataset/convert_h5ad_to_parquet/script.py (100%) rename src/{task => }/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml (98%) rename src/{task => }/process_dataset/convert_kaggle_h5ad_to_parquet/script.py (100%) rename src/{task => }/process_dataset/filter_obs/config.vsh.yaml (96%) rename src/{task => }/process_dataset/filter_obs/script.R (100%) rename src/{task => }/process_dataset/generate_id_map/config.vsh.yaml (96%) rename src/{task => }/process_dataset/generate_id_map/script.py (100%) rename src/{task => }/process_dataset/run_limma/config.vsh.yaml (94%) rename src/{task => }/process_dataset/run_limma/script.R (100%) rename src/{task => }/utils/anndata_to_dataframe.py (100%) rename src/{task => }/workflows/process_dataset/config.vsh.yaml (100%) rename src/{task => }/workflows/process_dataset/main.nf (100%) rename src/{task => }/workflows/run_benchmark/config.vsh.yaml (100%) rename src/{task => }/workflows/run_benchmark/main.nf (100%) rename src/{task => }/workflows/run_stability_analysis/config.vsh.yaml (100%) rename src/{task => }/workflows/run_stability_analysis/main.nf (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 69a61bee..437bb9b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,10 @@ -# task-dge-perturbation-prediction 0.1.0 +# task_perturbation_prediction 1.0.0 -Initial release of the DGE Perturbation Prediction task. Initial components: +Initial release of the Perturbation Prediction task. Initial components: -* `src/task/process_dataset`: Compute the DGE data from the raw single-cell counts using Limma. -* `src/task/control_methods`: Baseline control methods: sample, ground_truth, zeros, mean_across_celltypes, mean_across_compounds, mean_outcome. -* `src/task/methods`: DGE perturbation prediction methods: random_forest. -* `src/task/metrics`: Evaluation metrics: mean_rowwise_error. +* `src/process_dataset`: Compute the DGE data from the raw single-cell counts using Limma. +* `src/control_methods`: Baseline control methods: sample, ground_truth, zeros, mean_across_celltypes, mean_across_compounds, mean_outcome. +* `src/methods`: Perturbation prediction methods: jn_ap_op2, lgc_ensemble, nn_retraining_with_pseudolabels, pyboost, scape, transformer_ensemble. +* `src/metrics`: Evaluation metrics: mean_rowwise_error, mean_rowwise_correlation. diff --git a/README.md b/README.md index 42eaf4be..596c4f44 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# DGE Perturbation Prediction +# Perturbation Prediction file_de_train - comp_process_dataset-->file_de_test + comp_process_dataset-->file_de_train_h5ad + comp_process_dataset-->file_de_test_h5ad comp_process_dataset-->file_id_map - file_de_train---comp_control_method - file_de_train---comp_method - file_de_test---comp_control_method - file_de_test---comp_metric + file_de_train_h5ad---comp_control_method + file_de_train_h5ad---comp_method_notest + file_de_train_h5ad---comp_method + file_de_test_h5ad---comp_control_method + file_de_test_h5ad---comp_metric file_id_map---comp_control_method + file_id_map---comp_method_notest file_id_map---comp_method comp_control_method-->file_prediction comp_method-->file_prediction + comp_method-->file_model comp_metric-->file_score file_prediction---comp_metric - file_lincs_id_compound_mapping---comp_process_dataset ``` ## File format: Single Cell Counts @@ -196,24 +212,23 @@ Slot description: -## Component type: Data processor +## Component type: Process dataset Path: [`src/process_dataset`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/process_dataset) -A DGE regression dataset processor +Process the raw dataset Arguments:
-| Name | Type | Description | -|:------------------------------|:-------|:---------------------------------------------------------------------------------------------| -| `--sc_counts` | `file` | Anndata with the counts of the whole dataset. | -| `--lincs_id_compound_mapping` | `file` | Parquet file mapping compound names to lincs ids and smiles. | -| `--de_train` | `file` | (*Output*) Differential expression results for training. | -| `--de_test` | `file` | (*Output*) Differential expression results for testing. | -| `--id_map` | `file` | (*Output*) File indicates the order of de_test, the cell types and the small molecule names. | +| Name | Type | Description | +|:------------------|:-------|:--------------------------------------------------------------------------------------------------------------------| +| `--sc_counts` | `file` | Anndata with the counts of the whole dataset. | +| `--de_train_h5ad` | `file` | (*Output*) Differential expression results for training. Default: `de_train.h5ad`. | +| `--de_test_h5ad` | `file` | (*Output*) Differential expression results for testing. Default: `de_test.h5ad`. | +| `--id_map` | `file` | (*Output*) File indicates the order of de_test, the cell types and the small molecule names. Default: `id_map.csv`. |
@@ -229,7 +244,8 @@ Format: AnnData object obs: 'cell_type', 'sm_name', 'sm_lincs_id', 'SMILES', 'split', 'control' - layers: 'P.Value', 'adj.P.Value', 'is_de', 'is_de_adj', 'logFC', 'sign_log10_pval' + layers: 'logFC', 'AveExpr', 't', 'P.Value', 'adj.P.Value', 'B', 'is_de', 'is_de_adj', 'sign_log10_pval', 'clipped_sign_log10_pval' + uns: 'dataset_id', 'dataset_name', 'dataset_url', 'dataset_reference', 'dataset_summary', 'dataset_description', 'dataset_organism', 'single_cell_obs' @@ -237,20 +253,32 @@ Slot description:
-| Slot | Type | Description | -|:----------------------------|:----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `obs["cell_type"]` | `string` | The annotated cell type of each cell based on RNA expression. | -| `obs["sm_name"]` | `string` | The primary name for the (parent) compound (in a standardized representation) as chosen by LINCS. This is provided to map the data in this experiment to the LINCS Connectivity Map data. | -| `obs["sm_lincs_id"]` | `string` | The global LINCS ID (parent) compound (in a standardized representation). This is provided to map the data in this experiment to the LINCS Connectivity Map data. | -| `obs["SMILES"]` | `string` | Simplified molecular-input line-entry system (SMILES) representations of the compounds used in the experiment. This is a 1D representation of molecular structure. These SMILES are provided by Cellarity based on the specific compounds ordered for this experiment. | -| `obs["split"]` | `string` | Split. Must be one of ‘control’, ‘train’, ‘public_test’, or ‘private_test’. | -| `obs["control"]` | `boolean` | Boolean indicating whether this instance was used as a control. | -| `layers["P.Value"]` | `double` | P-value of the differential expression test. | -| `layers["adj.P.Value"]` | `double` | Adjusted P-value of the differential expression test. | -| `layers["is_de"]` | `boolean` | Whether the gene is differentially expressed. | -| `layers["is_de_adj"]` | `boolean` | Whether the gene is differentially expressed after adjustment. | -| `layers["logFC"]` | `double` | Log fold change of the differential expression test. | -| `layers["sign_log10_pval"]` | `double` | Differential expression value (-log10(p-value) \* sign(LFC)) for each gene. Here, LFC is the estimated log-fold change in expression between the treatment and control condition after shrinkage as calculated by Limma. Positive LFC means the gene goes up in the treatment condition relative to the control. | +| Slot | Type | Description | +|:------------------------------------|:------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `obs["cell_type"]` | `string` | The annotated cell type of each cell based on RNA expression. | +| `obs["sm_name"]` | `string` | The primary name for the (parent) compound (in a standardized representation) as chosen by LINCS. This is provided to map the data in this experiment to the LINCS Connectivity Map data. | +| `obs["sm_lincs_id"]` | `string` | The global LINCS ID (parent) compound (in a standardized representation). This is provided to map the data in this experiment to the LINCS Connectivity Map data. | +| `obs["SMILES"]` | `string` | Simplified molecular-input line-entry system (SMILES) representations of the compounds used in the experiment. This is a 1D representation of molecular structure. These SMILES are provided by Cellarity based on the specific compounds ordered for this experiment. | +| `obs["split"]` | `string` | Split. Must be one of ‘control’, ‘train’, ‘public_test’, or ‘private_test’. | +| `obs["control"]` | `boolean` | Boolean indicating whether this instance was used as a control. | +| `layers["logFC"]` | `double` | Log fold change of the differential expression test. | +| `layers["AveExpr"]` | `double` | (*Optional*) Average expression of the differential expression test. | +| `layers["t"]` | `double` | (*Optional*) T-statistic of the differential expression test. | +| `layers["P.Value"]` | `double` | P-value of the differential expression test. | +| `layers["adj.P.Value"]` | `double` | Adjusted P-value of the differential expression test. | +| `layers["B"]` | `double` | (*Optional*) B-statistic of the differential expression test. | +| `layers["is_de"]` | `boolean` | Whether the gene is differentially expressed. | +| `layers["is_de_adj"]` | `boolean` | Whether the gene is differentially expressed after adjustment. | +| `layers["sign_log10_pval"]` | `double` | Differential expression value (`-log10(p-value) * sign(LFC)`) for each gene. Here, LFC is the estimated log-fold change in expression between the treatment and control condition after shrinkage as calculated by Limma. Positive LFC means the gene goes up in the treatment condition relative to the control. | +| `layers["clipped_sign_log10_pval"]` | `double` | A clipped version of the sign_log10_pval layer. Values are clipped to be between -4 and 4 (i.e. `-log10(0.0001)` and `-log10(0.0001)`). | +| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. This is different from the `obs.dataset_id` field, which is the identifier for the dataset from which the cell data is derived. | +| `uns["dataset_name"]` | `string` | A human-readable name for the dataset. | +| `uns["dataset_url"]` | `string` | (*Optional*) Link to the original source of the dataset. | +| `uns["dataset_reference"]` | `string` | (*Optional*) Bibtex reference of the paper in which the dataset was published. | +| `uns["dataset_summary"]` | `string` | Short description of the dataset. | +| `uns["dataset_description"]` | `string` | Long description of the dataset. | +| `uns["dataset_organism"]` | `string` | (*Optional*) The organism of the sample in the dataset. | +| `uns["single_cell_obs"]` | `dataframe` | A dataframe with the cell-level metadata for the training set. |
@@ -265,8 +293,9 @@ Format:
AnnData object - obs: 'id', 'cell_type', 'sm_name', 'sm_lincs_id', 'SMILES', 'split', 'control' - layers: 'P.Value', 'adj.P.Value', 'is_de', 'is_de_adj', 'logFC', 'sign_log10_pval' + obs: 'cell_type', 'sm_name', 'sm_lincs_id', 'SMILES', 'split', 'control' + layers: 'logFC', 'AveExpr', 't', 'P.Value', 'adj.P.Value', 'B', 'is_de', 'is_de_adj', 'sign_log10_pval', 'clipped_sign_log10_pval' + uns: 'dataset_id', 'dataset_name', 'dataset_url', 'dataset_reference', 'dataset_summary', 'dataset_description', 'dataset_organism', 'single_cell_obs'
@@ -274,21 +303,32 @@ Slot description:
-| Slot | Type | Description | -|:----------------------------|:----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `obs["id"]` | `integer` | Index of the test observation. | -| `obs["cell_type"]` | `string` | The annotated cell type of each cell based on RNA expression. | -| `obs["sm_name"]` | `string` | The primary name for the (parent) compound (in a standardized representation) as chosen by LINCS. This is provided to map the data in this experiment to the LINCS Connectivity Map data. | -| `obs["sm_lincs_id"]` | `string` | The global LINCS ID (parent) compound (in a standardized representation). This is provided to map the data in this experiment to the LINCS Connectivity Map data. | -| `obs["SMILES"]` | `string` | Simplified molecular-input line-entry system (SMILES) representations of the compounds used in the experiment. This is a 1D representation of molecular structure. These SMILES are provided by Cellarity based on the specific compounds ordered for this experiment. | -| `obs["split"]` | `string` | Split. Must be one of ‘control’, ‘train’, ‘public_test’, or ‘private_test’. | -| `obs["control"]` | `boolean` | Boolean indicating whether this instance was used as a control. | -| `layers["P.Value"]` | `double` | P-value of the differential expression test. | -| `layers["adj.P.Value"]` | `double` | Adjusted P-value of the differential expression test. | -| `layers["is_de"]` | `boolean` | Whether the gene is differentially expressed. | -| `layers["is_de_adj"]` | `boolean` | Whether the gene is differentially expressed after adjustment. | -| `layers["logFC"]` | `double` | Log fold change of the differential expression test. | -| `layers["sign_log10_pval"]` | `double` | Differential expression value (-log10(p-value) \* sign(LFC)) for each gene. Here, LFC is the estimated log-fold change in expression between the treatment and control condition after shrinkage as calculated by Limma. Positive LFC means the gene goes up in the treatment condition relative to the control. | +| Slot | Type | Description | +|:------------------------------------|:------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `obs["cell_type"]` | `string` | The annotated cell type of each cell based on RNA expression. | +| `obs["sm_name"]` | `string` | The primary name for the (parent) compound (in a standardized representation) as chosen by LINCS. This is provided to map the data in this experiment to the LINCS Connectivity Map data. | +| `obs["sm_lincs_id"]` | `string` | The global LINCS ID (parent) compound (in a standardized representation). This is provided to map the data in this experiment to the LINCS Connectivity Map data. | +| `obs["SMILES"]` | `string` | Simplified molecular-input line-entry system (SMILES) representations of the compounds used in the experiment. This is a 1D representation of molecular structure. These SMILES are provided by Cellarity based on the specific compounds ordered for this experiment. | +| `obs["split"]` | `string` | Split. Must be one of ‘control’, ‘train’, ‘public_test’, or ‘private_test’. | +| `obs["control"]` | `boolean` | Boolean indicating whether this instance was used as a control. | +| `layers["logFC"]` | `double` | Log fold change of the differential expression test. | +| `layers["AveExpr"]` | `double` | (*Optional*) Average expression of the differential expression test. | +| `layers["t"]` | `double` | (*Optional*) T-statistic of the differential expression test. | +| `layers["P.Value"]` | `double` | P-value of the differential expression test. | +| `layers["adj.P.Value"]` | `double` | Adjusted P-value of the differential expression test. | +| `layers["B"]` | `double` | (*Optional*) B-statistic of the differential expression test. | +| `layers["is_de"]` | `boolean` | Whether the gene is differentially expressed. | +| `layers["is_de_adj"]` | `boolean` | Whether the gene is differentially expressed after adjustment. | +| `layers["sign_log10_pval"]` | `double` | Differential expression value (`-log10(p-value) * sign(LFC)`) for each gene. Here, LFC is the estimated log-fold change in expression between the treatment and control condition after shrinkage as calculated by Limma. Positive LFC means the gene goes up in the treatment condition relative to the control. | +| `layers["clipped_sign_log10_pval"]` | `double` | A clipped version of the sign_log10_pval layer. Values are clipped to be between -4 and 4 (i.e. `-log10(0.0001)` and `-log10(0.0001)`). | +| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. This is different from the `obs.dataset_id` field, which is the identifier for the dataset from which the cell data is derived. | +| `uns["dataset_name"]` | `string` | A human-readable name for the dataset. | +| `uns["dataset_url"]` | `string` | (*Optional*) Link to the original source of the dataset. | +| `uns["dataset_reference"]` | `string` | (*Optional*) Bibtex reference of the paper in which the dataset was published. | +| `uns["dataset_summary"]` | `string` | Short description of the dataset. | +| `uns["dataset_description"]` | `string` | Long description of the dataset. | +| `uns["dataset_organism"]` | `string` | (*Optional*) The organism of the sample in the dataset. | +| `uns["single_cell_obs"]` | `dataframe` | A dataframe with the cell-level metadata. |
@@ -303,8 +343,8 @@ Format:
- AnnData object - obs: 'id', 'cell_type', 'sm_name' + Tabular data + 'id', 'cell_type', 'sm_name'
@@ -312,11 +352,11 @@ Slot description:
-| Slot | Type | Description | -|:-------------------|:----------|:-------------------------------| -| `obs["id"]` | `integer` | Index of the test observation. | -| `obs["cell_type"]` | `string` | Cell type name. | -| `obs["sm_name"]` | `string` | Small molecule name. | +| Column | Type | Description | +|:------------|:----------|:-------------------------------| +| `id` | `integer` | Index of the test observation. | +| `cell_type` | `string` | Cell type name. | +| `sm_name` | `string` | Small molecule name. |
@@ -331,15 +371,17 @@ Arguments:
-| Name | Type | Description | -|:-------------|:-------|:----------------------------------------------------------------------------------| -| `--de_train` | `file` | Differential expression results for training. | -| `--de_test` | `file` | Differential expression results for testing. | -| `--id_map` | `file` | File indicates the order of de_test, the cell types and the small molecule names. | -| `--output` | `file` | (*Output*) Differential Gene Expression prediction. | +| Name | Type | Description | +|:------------------|:---------|:------------------------------------------------------------------------------------| +| `--de_train_h5ad` | `file` | (*Optional*) Differential expression results for training. | +| `--de_test_h5ad` | `file` | Differential expression results for testing. | +| `--id_map` | `file` | File indicates the order of de_test, the cell types and the small molecule names. | +| `--layer` | `string` | (*Optional*) Which layer to use for prediction. Default: `clipped_sign_log10_pval`. | +| `--output` | `file` | (*Output*) Differential Gene Expression prediction. |
+ ## Component type: Method Path: @@ -351,11 +393,13 @@ Arguments:
-| Name | Type | Description | -|:-------------|:-------|:----------------------------------------------------------------------------------| -| `--de_train` | `file` | Differential expression results for training. | -| `--id_map` | `file` | File indicates the order of de_test, the cell types and the small molecule names. | -| `--output` | `file` | (*Output*) Differential Gene Expression prediction. | +| Name | Type | Description | +|:------------------|:---------|:--------------------------------------------------------------------------------------------------------------------| +| `--de_train_h5ad` | `file` | (*Optional*) Differential expression results for training. | +| `--id_map` | `file` | File indicates the order of de_test, the cell types and the small molecule names. | +| `--layer` | `string` | (*Optional*) Which layer to use for prediction. Default: `clipped_sign_log10_pval`. | +| `--output` | `file` | (*Output*) Differential Gene Expression prediction. | +| `--output_model` | `file` | (*Optional, Output*) Optional model output. If no value is passed, the model will be removed at the end of the run. |
@@ -370,11 +414,15 @@ Arguments:
-| Name | Type | Description | -|:---------------|:-------|:--------------------------------------------------| -| `--de_test` | `file` | Differential expression results for testing. | -| `--prediction` | `file` | Differential Gene Expression prediction. | -| `--output` | `file` | (*Output*) File indicating the score of a metric. | +| Name | Type | Description | +|:---------------------|:---------|:----------------------------------------------------------------------------------------------| +| `--de_test_h5ad` | `file` | Differential expression results for testing. | +| `--de_test_layer` | `string` | (*Optional*) In which layer to find the DE data. Default: `clipped_sign_log10_pval`. | +| `--prediction` | `file` | Differential Gene Expression prediction. | +| `--prediction_layer` | `string` | (*Optional*) In which layer to find the predicted DE data. Default: `prediction`. | +| `--output` | `file` | (*Output*) File indicating the score of a metric. | +| `--resolve_genes` | `string` | (*Optional*) How to resolve difference in genes between the two datasets. Default: `de_test`. | +| `--resolve_genes` | `string` | (*Optional*) How to resolve difference in genes between the two datasets. Default: `de_test`. |
@@ -382,15 +430,15 @@ Arguments: Differential Gene Expression prediction -Example file: `resources/neurips-2023-data/output_rf.parquet` +Example file: `resources/neurips-2023-data/prediction.h5ad` Format:
AnnData object - obs: 'id' - layers: 'sign_log10_pval' + layers: 'prediction' + uns: 'dataset_id', 'method_id'
@@ -398,54 +446,45 @@ Slot description:
-| Slot | Type | Description | -|:----------------------------|:----------|:---------------------------------------------------------------------| -| `obs["id"]` | `integer` | Index of the test observation. | -| `layers["sign_log10_pval"]` | `double` | Predicted sign of the logFC times the log10 of the adjusted p-value. | +| Slot | Type | Description | +|:-----------------------|:---------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `layers["prediction"]` | `double` | Predicted differential gene expression. | +| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. This is different from the `obs.dataset_id` field, which is the identifier for the dataset from which the cell data is derived. | +| `uns["method_id"]` | `string` | A unique identifier for the method used to generate the prediction. |
-## File format: Score +## File format: Model -File indicating the score of a metric. +Optional model output. If no value is passed, the model will be removed +at the end of the run. -Example file: `resources/neurips-2023-data/score_rf.json` +Example file: `resources/neurips-2023-data/model/` Format:
- AnnData object - uns: 'dataset_id', 'method_id', 'metric_ids', 'metric_values' -
Slot description:
-| Slot | Type | Description | -|:-----------------------|:---------|:---------------------------------------------------------------------------------------------| -| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. | -| `uns["method_id"]` | `string` | A unique identifier for the method. | -| `uns["metric_ids"]` | `string` | One or more unique metric identifiers. | -| `uns["metric_values"]` | `double` | The metric values obtained for the given prediction. Must be of same length as ‘metric_ids’. | -
-## File format: Mapping compound names to lincs ids and smiles +## File format: Score -Parquet file mapping compound names to lincs ids and smiles. +File indicating the score of a metric. -Example file: -`resources/neurips-2023-raw/lincs_id_compound_mapping.parquet` +Example file: `resources/neurips-2023-data/score.h5ad` Format:
AnnData object - obs: 'compound_id', 'sm_lincs_id', 'sm_name', 'smiles' + uns: 'dataset_id', 'method_id', 'metric_ids', 'metric_values'
@@ -453,12 +492,12 @@ Slot description:
-| Slot | Type | Description | -|:---------------------|:---------|:------------------------------------------------------| -| `obs["compound_id"]` | `string` | Unique identifier for the compound. | -| `obs["sm_lincs_id"]` | `string` | LINCS identifier for the compound. | -| `obs["sm_name"]` | `string` | Name of the compound. | -| `obs["smiles"]` | `string` | SMILES notation representing the molecular structure. | +| Slot | Type | Description | +|:-----------------------|:---------|:---------------------------------------------------------------------------------------------| +| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. | +| `uns["method_id"]` | `string` | A unique identifier for the method. | +| `uns["metric_ids"]` | `string` | One or more unique metric identifiers. | +| `uns["metric_values"]` | `double` | The metric values obtained for the given prediction. Must be of same length as ‘metric_ids’. |
diff --git a/_viash.yaml b/_viash.yaml index 42118b5f..5106f43c 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -7,8 +7,8 @@ config_mods: | .functionality.version := 'dev' .functionality.arguments[.multiple == true].multiple_sep := ';' .platforms[.type == 'docker'].target_registry := 'ghcr.io' - .platforms[.type == 'docker'].target_organization := 'openproblems-bio/task-dge-perturbation-prediction' - .platforms[.type == 'docker'].target_image_source := 'https://github.com/openproblems-bio/task-dge-perturbation-prediction' + .platforms[.type == 'docker'].target_organization := 'openproblems-bio/task_perturbation_prediction' + .platforms[.type == 'docker'].target_image_source := 'https://github.com/openproblems-bio/task_perturbation_prediction' .platforms[.type == "nextflow"].directives.tag := "$id" .platforms[.type == "nextflow"].auto.simplifyOutput := false .platforms[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h" } diff --git a/scripts/add_a_method.sh b/scripts/add_a_method.sh index fe07dfdc..aada69a1 100755 --- a/scripts/add_a_method.sh +++ b/scripts/add_a_method.sh @@ -15,25 +15,25 @@ viash run src/common/create_component/config.vsh.yaml -- \ --language "$method_lang" \ --name "$method_id" -# TODO: fill in required fields in src/task/methods/foo/config.vsh.yaml -# TODO: edit src/task/methods/foo/script.py/R +# TODO: fill in required fields in src/methods/foo/config.vsh.yaml +# TODO: edit src/methods/foo/script.py/R # test the component -viash test src/task/methods/$method_id/config.vsh.yaml +viash test src/methods/$method_id/config.vsh.yaml # rebuild the container (only if you change something to the docker platform) # You can reduce the memory and cpu allotted to jobs in _viash.yaml by modifying .platforms[.type == "nextflow"].config.labels -viash run src/task/methods/$method_id/config.vsh.yaml -- \ +viash run src/methods/$method_id/config.vsh.yaml -- \ ---setup cachedbuild ---verbose # run the method (using h5ad as input) -viash run src/task/methods/$method_id/config.vsh.yaml -- \ +viash run src/methods/$method_id/config.vsh.yaml -- \ --de_train_h5ad "resources/neurips-2023-kaggle/2023-09-12_de_by_cell_type_train.h5ad" \ --id_map "resources/neurips-2023-kaggle/id_map.csv" \ --output "output/prediction.h5ad" # run evaluation metric -viash run src/task/metrics/mean_rowwise_error/config.vsh.yaml -- \ +viash run src/metrics/mean_rowwise_error/config.vsh.yaml -- \ --de_test_h5ad "resources/neurips-2023-kaggle/de_test.h5ad" \ --prediction "output/prediction.h5ad" \ --output "output/score.h5ad" diff --git a/scripts/generate_kaggle_resources.sh b/scripts/generate_kaggle_resources.sh index 54aa237f..13a9d005 100755 --- a/scripts/generate_kaggle_resources.sh +++ b/scripts/generate_kaggle_resources.sh @@ -18,7 +18,7 @@ if [[ ! -f "$OUT/2023-09-12_de_by_cell_type_test.h5ad" ]]; then "import anndata as ad; ad.read_h5ad('$OUT/2023-09-12_de_by_cell_type_train.h5ad').write_h5ad('$OUT/2023-09-12_de_by_cell_type_train.h5ad', compression='gzip')" fi -viash run src/task/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml -- \ +viash run src/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml -- \ --input_train "$OUT/2023-09-12_de_by_cell_type_train.h5ad" \ --input_test "$OUT/2023-09-12_de_by_cell_type_test.h5ad" \ --input_single_cell_h5ad "resources/neurips-2023-raw/sc_counts.h5ad" \ @@ -34,14 +34,14 @@ viash run src/task/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yam --dataset_organism homo_sapiens echo ">> Run method" -viash run src/task/control_methods/mean_across_compounds/config.vsh.yaml -- \ +viash run src/control_methods/mean_across_compounds/config.vsh.yaml -- \ --de_train_h5ad "$OUT/de_train.h5ad" \ --de_test_h5ad "$OUT/de_test.h5ad" \ --id_map "$OUT/id_map.csv" \ --output "$OUT/prediction.h5ad" echo ">> Run metric" -viash run src/task/metrics/mean_rowwise_error/config.vsh.yaml -- \ +viash run src/metrics/mean_rowwise_error/config.vsh.yaml -- \ --prediction "$OUT/prediction.h5ad" \ --de_test_h5ad "$OUT/de_test.h5ad" \ --output "$OUT/score.h5ad" diff --git a/scripts/generate_resources.sh b/scripts/generate_resources.sh index 72137f85..13d4a10a 100755 --- a/scripts/generate_resources.sh +++ b/scripts/generate_resources.sh @@ -32,14 +32,14 @@ nextflow run \ --publish_dir "$OUT" echo ">> Run method" -viash run src/task/control_methods/mean_across_compounds/config.vsh.yaml -- \ +viash run src/control_methods/mean_across_compounds/config.vsh.yaml -- \ --de_train_h5ad "$OUT/de_train.h5ad" \ --de_test_h5ad "$OUT/de_test.h5ad" \ --id_map "$OUT/id_map.csv" \ --output "$OUT/prediction.h5ad" echo ">> Run metric" -viash run src/task/metrics/mean_rowwise_error/config.vsh.yaml -- \ +viash run src/metrics/mean_rowwise_error/config.vsh.yaml -- \ --prediction "$OUT/prediction.h5ad" \ --de_test_h5ad "$OUT/de_test.h5ad" \ --output "$OUT/score.h5ad" diff --git a/scripts/render_readme.sh b/scripts/render_readme.sh index b805cbea..21908bb5 100755 --- a/scripts/render_readme.sh +++ b/scripts/render_readme.sh @@ -5,7 +5,7 @@ set -e [[ ! -d ../openproblems-v2 ]] && echo "You need to clone the openproblems-v2 repository next to this repository" && exit 1 ../openproblems-v2/target/docker/common/create_task_readme/create_task_readme \ - --task "dge_perturbation_prediction" \ - --task_dir "src/task" \ - --github_url "https://github.com/openproblems-bio/task-dge-perturbation-prediction/tree/main/" \ + --task "perturbation_prediction" \ + --task_dir "src" \ + --github_url "https://github.com/openproblems-bio/task_perturbation_prediction/tree/main/" \ --output "README.md" diff --git a/scripts/run_benchmark_tw.sh b/scripts/run_benchmark_tw.sh index 76f46b6e..86472d44 100755 --- a/scripts/run_benchmark_tw.sh +++ b/scripts/run_benchmark_tw.sh @@ -2,7 +2,7 @@ RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)" resources_dir="s3://openproblems-bio/public/neurips-2023-competition/workflow-resources" -publish_dir="s3://openproblems-data/resources/dge_perturbation_prediction/results/${RUN_ID}" +publish_dir="s3://openproblems-data/resources/perturbation_prediction/results/${RUN_ID}" cat > /tmp/params.yaml << HERE param_list: @@ -20,7 +20,7 @@ output_state: "state.yaml" publish_dir: "$publish_dir" HERE -tw launch https://github.com/openproblems-bio/task-dge-perturbation-prediction.git \ +tw launch https://github.com/openproblems-bio/task_perturbation_prediction.git \ --revision main_build \ --pull-latest \ --main-script target/nextflow/workflows/run_benchmark/main.nf \ diff --git a/scripts/run_benchmark_tw_traens.sh b/scripts/run_benchmark_tw_traens.sh index 908a150f..f5f10639 100755 --- a/scripts/run_benchmark_tw_traens.sh +++ b/scripts/run_benchmark_tw_traens.sh @@ -4,7 +4,7 @@ RUN_ID="traens_$(date +%Y-%m-%d_%H-%M-%S)" resources_dir="s3://openproblems-bio/public/neurips-2023-competition/workflow-resources" -publish_dir="s3://openproblems-data/resources/dge_perturbation_prediction/results/${RUN_ID}" +publish_dir="s3://openproblems-data/resources/perturbation_prediction/results/${RUN_ID}" cat > /tmp/params.yaml << HERE param_list: @@ -18,8 +18,8 @@ output_state: "state.yaml" publish_dir: "$publish_dir" HERE -tw launch https://github.com/openproblems-bio/task-dge-perturbation-prediction.git \ - --revision fix_trafo_ens_build \ +tw launch https://github.com/openproblems-bio/task_perturbation_prediction.git \ + --revision suggestions_elior_build \ --pull-latest \ --main-script target/nextflow/workflows/run_benchmark/main.nf \ --workspace 53907369739130 \ diff --git a/scripts/run_layert_tw.sh b/scripts/run_layert_tw.sh index a05880fa..c56e49f5 100755 --- a/scripts/run_layert_tw.sh +++ b/scripts/run_layert_tw.sh @@ -1,7 +1,7 @@ #!/bin/bash RUN_ID="layert_$(date +%Y-%m-%d_%H-%M-%S)" -publish_dir="s3://openproblems-data/resources/dge_perturbation_prediction/results/${RUN_ID}" +publish_dir="s3://openproblems-data/resources/perturbation_prediction/results/${RUN_ID}" cat > /tmp/params.yaml << HERE id: dge_perturbation_task @@ -12,7 +12,7 @@ rename_keys: "de_train_h5ad:de_train_h5ad,de_test_h5ad:de_test_h5ad,id_map:id_ma settings: '{"layer": "t"}' HERE -tw launch https://github.com/openproblems-bio/task-dge-perturbation-prediction.git \ +tw launch https://github.com/openproblems-bio/task_perturbation_prediction.git \ --revision main_build \ --pull-latest \ --main-script target/nextflow/workflows/run_benchmark/main.nf \ diff --git a/scripts/run_stability_tw.sh b/scripts/run_stability_tw.sh index 3890fdf9..c752af8a 100755 --- a/scripts/run_stability_tw.sh +++ b/scripts/run_stability_tw.sh @@ -1,7 +1,7 @@ #!/bin/bash RUN_ID="stability_$(date +%Y-%m-%d_%H-%M-%S)" -publish_dir="s3://openproblems-data/resources/dge_perturbation_prediction/results/${RUN_ID}" +publish_dir="s3://openproblems-data/resources/perturbation_prediction/results/${RUN_ID}" cat > /tmp/params.yaml << HERE id: neurips-2023-data @@ -11,7 +11,7 @@ output_state: "state.yaml" publish_dir: "$publish_dir" HERE -tw launch https://github.com/openproblems-bio/task-dge-perturbation-prediction.git \ +tw launch https://github.com/openproblems-bio/task_perturbation_prediction.git \ --revision main_build \ --pull-latest \ --main-script target/nextflow/workflows/run_stability_analysis/main.nf \ diff --git a/scripts/sync_results.sh b/scripts/sync_results.sh index a46623cb..ed4acd26 100755 --- a/scripts/sync_results.sh +++ b/scripts/sync_results.sh @@ -1,18 +1,18 @@ #!/bin/bash aws s3 sync \ - s3://openproblems-data/resources/dge_perturbation_prediction/results/ \ + s3://openproblems-data/resources/perturbation_prediction/results/ \ output/benchmark_results/ \ --delete --dryrun # sync back modified results aws s3 sync \ output/benchmark_results/ \ - s3://openproblems-data/resources/dge_perturbation_prediction/results/ \ + s3://openproblems-data/resources/perturbation_prediction/results/ \ --delete --dryrun # sync one run runid=run_2024-06-01_00-03-09; aws s3 sync \ output/benchmark_results/${runid}/ \ - s3://openproblems-data/resources/dge_perturbation_prediction/results/${runid}/ \ - --delete --dryrun \ No newline at end of file + s3://openproblems-data/resources/perturbation_prediction/results/${runid}/ \ + --delete --dryrun diff --git a/src/task/api/comp_control_method.yaml b/src/api/comp_control_method.yaml similarity index 100% rename from src/task/api/comp_control_method.yaml rename to src/api/comp_control_method.yaml diff --git a/src/task/api/comp_method.yaml b/src/api/comp_method.yaml similarity index 100% rename from src/task/api/comp_method.yaml rename to src/api/comp_method.yaml diff --git a/src/task/api/comp_method_notest.yaml b/src/api/comp_method_notest.yaml similarity index 85% rename from src/task/api/comp_method_notest.yaml rename to src/api/comp_method_notest.yaml index 5e2e0477..b1e68ca9 100644 --- a/src/task/api/comp_method_notest.yaml +++ b/src/api/comp_method_notest.yaml @@ -26,8 +26,7 @@ functionality: required: true direction: output - name: "--output_model" - type: "file" - description: "Optional model output. If no value is passed, the model will be removed at the end of the run." + __merge__: file_model.yaml direction: output required: false must_exist: false diff --git a/src/task/api/comp_metric.yaml b/src/api/comp_metric.yaml similarity index 100% rename from src/task/api/comp_metric.yaml rename to src/api/comp_metric.yaml diff --git a/src/task/api/comp_process_dataset.yaml b/src/api/comp_process_dataset.yaml similarity index 92% rename from src/task/api/comp_process_dataset.yaml rename to src/api/comp_process_dataset.yaml index b1e4d6b5..95544a99 100644 --- a/src/task/api/comp_process_dataset.yaml +++ b/src/api/comp_process_dataset.yaml @@ -1,4 +1,5 @@ functionality: + namespace: process_dataset info: type: process_dataset type_info: @@ -25,5 +26,4 @@ functionality: __merge__: file_id_map.yaml required: true direction: output - default: id_map.csv - test_resources: [] \ No newline at end of file + default: id_map.csv \ No newline at end of file diff --git a/src/task/api/file_de_test_h5ad.yaml b/src/api/file_de_test_h5ad.yaml similarity index 100% rename from src/task/api/file_de_test_h5ad.yaml rename to src/api/file_de_test_h5ad.yaml diff --git a/src/task/api/file_de_train_h5ad.yaml b/src/api/file_de_train_h5ad.yaml similarity index 100% rename from src/task/api/file_de_train_h5ad.yaml rename to src/api/file_de_train_h5ad.yaml diff --git a/src/task/api/file_id_map.yaml b/src/api/file_id_map.yaml similarity index 100% rename from src/task/api/file_id_map.yaml rename to src/api/file_id_map.yaml diff --git a/src/api/file_model.yaml b/src/api/file_model.yaml new file mode 100644 index 00000000..cb6cbc63 --- /dev/null +++ b/src/api/file_model.yaml @@ -0,0 +1,6 @@ +type: file +example: resources/neurips-2023-data/model/ +info: + label: Model + summary: "Optional model output. If no value is passed, the model will be removed at the end of the run." + file_type: directory \ No newline at end of file diff --git a/src/task/api/file_prediction.yaml b/src/api/file_prediction.yaml similarity index 100% rename from src/task/api/file_prediction.yaml rename to src/api/file_prediction.yaml diff --git a/src/task/api/file_sc_counts.yaml b/src/api/file_sc_counts.yaml similarity index 100% rename from src/task/api/file_sc_counts.yaml rename to src/api/file_sc_counts.yaml diff --git a/src/task/api/file_score.yaml b/src/api/file_score.yaml similarity index 100% rename from src/task/api/file_score.yaml rename to src/api/file_score.yaml diff --git a/src/task/api/task_info.yaml b/src/api/task_info.yaml similarity index 81% rename from src/task/api/task_info.yaml rename to src/api/task_info.yaml index 40b3973b..c0dfb93f 100644 --- a/src/task/api/task_info.yaml +++ b/src/api/task_info.yaml @@ -1,5 +1,5 @@ -name: dge_perturbation_prediction -label: DGE Perturbation Prediction +name: perturbation_prediction +label: Perturbation Prediction summary: Predicting how small molecules change gene expression in different cell types. readme: | ## Installation @@ -17,9 +17,9 @@ readme: | To get started, you can run the following commands: ```bash - git clone git@github.com:openproblems-bio/task-dge-perturbation-prediction.git + git clone git@github.com:openproblems-bio/task_perturbation_prediction.git - cd task-dge-perturbation-prediction + cd task_perturbation_prediction # download resources scripts/download_resources.sh @@ -99,3 +99,43 @@ authors: info: github: rcannood orcid: "0000-0003-3641-729X" + - name: Daniel Burkhardt + roles: [ author ] + info: + github: dburkhardt + orcid: 0000-0001-7744-1363 + - name: Malte D. Luecken + roles: [ author ] + info: + github: LuckyMD + orcid: 0000-0001-7464-7921 + - name: Tin M. Tunjic + roles: [ contributor ] + info: + github: ttunja + orcid: 0000-0001-8842-6548 + - name: Mengbo Wang + roles: [ contributor ] + info: + github: wangmengbo + orcid: 0000-0002-0266-9993 + - name: Andrew Benz + roles: [ author ] + info: + github: andrew-benz + orcid: 0009-0002-8118-1861 + - name: Tianyu Liu + roles: [ contributor ] + info: + github: HelloWorldLTY + orcid: 0000-0002-9412-6573 + - name: Jalil Nourisa + roles: [ contributor ] + info: + github: janursa + orcid: 0000-0002-7539-4396 + - name: Rico Meinl + roles: [ contributor ] + info: + github: ricomnl + orcid: 0000-0003-4356-6058 diff --git a/src/common/create_component/config.vsh.yaml b/src/common/create_component/config.vsh.yaml index 91bcc633..9a214832 100644 --- a/src/common/create_component/config.vsh.yaml +++ b/src/common/create_component/config.vsh.yaml @@ -24,7 +24,7 @@ functionality: direction: output # required: true description: Path to the component directory. Suggested location is `src//s/`. - default: src/task/methods/${VIASH_PAR_NAME} + default: src/methods/${VIASH_PAR_NAME} - type: file name: --api_file description: | @@ -33,7 +33,7 @@ functionality: to manually specify a different API file to inherit from. must_exist: false # required: true - default: src/task/api/comp_method.yaml + default: src/api/comp_method.yaml - type: file name: --viash_yaml description: | diff --git a/src/common/create_component/script.py b/src/common/create_component/script.py index b7fef5e8..65aaad9a 100644 --- a/src/common/create_component/script.py +++ b/src/common/create_component/script.py @@ -6,12 +6,12 @@ ## VIASH START par = { - "task": "DGE Perturbation Prediction", + "task": "Perturbation Prediction", "type": "method", "language": "python", "name": "new_comp", - "output": "src/task/method/new_comp", - "api_file": "src/task/api/comp_method.yaml", + "output": "src/method/new_comp", + "api_file": "src/api/comp_method.yaml", "viash_yaml": "_viash.yaml" } ## VIASH END diff --git a/src/task/control_methods/ground_truth/config.vsh.yaml b/src/control_methods/ground_truth/config.vsh.yaml similarity index 100% rename from src/task/control_methods/ground_truth/config.vsh.yaml rename to src/control_methods/ground_truth/config.vsh.yaml diff --git a/src/task/control_methods/ground_truth/script.R b/src/control_methods/ground_truth/script.R similarity index 100% rename from src/task/control_methods/ground_truth/script.R rename to src/control_methods/ground_truth/script.R diff --git a/src/task/control_methods/mean_across_celltypes/config.vsh.yaml b/src/control_methods/mean_across_celltypes/config.vsh.yaml similarity index 100% rename from src/task/control_methods/mean_across_celltypes/config.vsh.yaml rename to src/control_methods/mean_across_celltypes/config.vsh.yaml diff --git a/src/task/control_methods/mean_across_celltypes/script.py b/src/control_methods/mean_across_celltypes/script.py similarity index 100% rename from src/task/control_methods/mean_across_celltypes/script.py rename to src/control_methods/mean_across_celltypes/script.py diff --git a/src/task/control_methods/mean_across_compounds/config.vsh.yaml b/src/control_methods/mean_across_compounds/config.vsh.yaml similarity index 100% rename from src/task/control_methods/mean_across_compounds/config.vsh.yaml rename to src/control_methods/mean_across_compounds/config.vsh.yaml diff --git a/src/task/control_methods/mean_across_compounds/script.py b/src/control_methods/mean_across_compounds/script.py similarity index 100% rename from src/task/control_methods/mean_across_compounds/script.py rename to src/control_methods/mean_across_compounds/script.py diff --git a/src/task/control_methods/mean_outcome/config.vsh.yaml b/src/control_methods/mean_outcome/config.vsh.yaml similarity index 100% rename from src/task/control_methods/mean_outcome/config.vsh.yaml rename to src/control_methods/mean_outcome/config.vsh.yaml diff --git a/src/task/control_methods/mean_outcome/script.py b/src/control_methods/mean_outcome/script.py similarity index 100% rename from src/task/control_methods/mean_outcome/script.py rename to src/control_methods/mean_outcome/script.py diff --git a/src/task/control_methods/sample/config.vsh.yaml b/src/control_methods/sample/config.vsh.yaml similarity index 100% rename from src/task/control_methods/sample/config.vsh.yaml rename to src/control_methods/sample/config.vsh.yaml diff --git a/src/task/control_methods/sample/script.R b/src/control_methods/sample/script.R similarity index 100% rename from src/task/control_methods/sample/script.R rename to src/control_methods/sample/script.R diff --git a/src/task/control_methods/zeros/config.vsh.yaml b/src/control_methods/zeros/config.vsh.yaml similarity index 100% rename from src/task/control_methods/zeros/config.vsh.yaml rename to src/control_methods/zeros/config.vsh.yaml diff --git a/src/task/control_methods/zeros/script.py b/src/control_methods/zeros/script.py similarity index 100% rename from src/task/control_methods/zeros/script.py rename to src/control_methods/zeros/script.py diff --git a/src/task/methods/jn_ap_op2/config.vsh.yaml b/src/methods/jn_ap_op2/config.vsh.yaml similarity index 97% rename from src/task/methods/jn_ap_op2/config.vsh.yaml rename to src/methods/jn_ap_op2/config.vsh.yaml index edbfa55f..1da6eb5b 100644 --- a/src/task/methods/jn_ap_op2/config.vsh.yaml +++ b/src/methods/jn_ap_op2/config.vsh.yaml @@ -30,7 +30,6 @@ functionality: - type: python_script path: script.py - path: helper.py - - path: ../../utils/anndata_to_dataframe.py platforms: - type: docker image: ghcr.io/openproblems-bio/base_pytorch_nvidia:1.0.4 diff --git a/src/task/methods/jn_ap_op2/helper.py b/src/methods/jn_ap_op2/helper.py similarity index 100% rename from src/task/methods/jn_ap_op2/helper.py rename to src/methods/jn_ap_op2/helper.py diff --git a/src/task/methods/jn_ap_op2/script.py b/src/methods/jn_ap_op2/script.py similarity index 90% rename from src/task/methods/jn_ap_op2/script.py rename to src/methods/jn_ap_op2/script.py index 753da0ee..1529cbef 100644 --- a/src/task/methods/jn_ap_op2/script.py +++ b/src/methods/jn_ap_op2/script.py @@ -20,18 +20,16 @@ "submission_names": ["dl40"] } meta = { - "resources_dir": "src/task/methods/jn_ap_op2", + "resources_dir": "src/methods/jn_ap_op2", } ## VIASH END sys.path.append(meta["resources_dir"]) -from anndata_to_dataframe import anndata_to_dataframe from helper import plant_seed, MultiOutputTargetEncoder, train print('Reading input files', flush=True) de_train_h5ad = ad.read_h5ad(par["de_train_h5ad"]) -de_train = anndata_to_dataframe(de_train_h5ad, par["layer"]) id_map = pd.read_csv(par["id_map"]) gene_names = list(de_train_h5ad.var_names) @@ -58,10 +56,10 @@ print('Data location', flush=True) # Data location -cell_types = de_train['cell_type'] -sm_names = de_train['sm_name'] +cell_types = de_train_h5ad.obs['cell_type'].astype(str) +sm_names = de_train_h5ad.obs['sm_name'].astype(str) -data = de_train.drop(columns=["cell_type", "sm_name", "sm_lincs_id", "SMILES", "split", "control"]).to_numpy(dtype=float) +data = de_train_h5ad.layers[par["layer"]] print('Train model', flush=True) # ... train model ... diff --git a/src/task/methods/lgc_ensemble/config.vsh.yaml b/src/methods/lgc_ensemble/config.vsh.yaml similarity index 100% rename from src/task/methods/lgc_ensemble/config.vsh.yaml rename to src/methods/lgc_ensemble/config.vsh.yaml diff --git a/src/task/methods/lgc_ensemble/main.nf b/src/methods/lgc_ensemble/main.nf similarity index 100% rename from src/task/methods/lgc_ensemble/main.nf rename to src/methods/lgc_ensemble/main.nf diff --git a/src/task/methods/lgc_ensemble/test.sh b/src/methods/lgc_ensemble/test.sh similarity index 100% rename from src/task/methods/lgc_ensemble/test.sh rename to src/methods/lgc_ensemble/test.sh diff --git a/src/task/methods/lgc_ensemble_direct/config.vsh.yaml b/src/methods/lgc_ensemble_direct/config.vsh.yaml similarity index 100% rename from src/task/methods/lgc_ensemble_direct/config.vsh.yaml rename to src/methods/lgc_ensemble_direct/config.vsh.yaml diff --git a/src/task/methods/lgc_ensemble_direct/script.py b/src/methods/lgc_ensemble_direct/script.py similarity index 96% rename from src/task/methods/lgc_ensemble_direct/script.py rename to src/methods/lgc_ensemble_direct/script.py index fdf90516..6aeba5c6 100644 --- a/src/task/methods/lgc_ensemble_direct/script.py +++ b/src/methods/lgc_ensemble_direct/script.py @@ -19,7 +19,7 @@ "output_model": None } meta = { - "resources_dir": "src/task/methods/lgc_ensemble", + "resources_dir": "src/methods/lgc_ensemble", "temp_dir": "/tmp" } ## VIASH END diff --git a/src/task/methods/lgc_ensemble_helpers/divisor_finder.py b/src/methods/lgc_ensemble_helpers/divisor_finder.py similarity index 100% rename from src/task/methods/lgc_ensemble_helpers/divisor_finder.py rename to src/methods/lgc_ensemble_helpers/divisor_finder.py diff --git a/src/task/methods/lgc_ensemble_helpers/helper_classes.py b/src/methods/lgc_ensemble_helpers/helper_classes.py similarity index 100% rename from src/task/methods/lgc_ensemble_helpers/helper_classes.py rename to src/methods/lgc_ensemble_helpers/helper_classes.py diff --git a/src/task/methods/lgc_ensemble_helpers/helper_functions.py b/src/methods/lgc_ensemble_helpers/helper_functions.py similarity index 100% rename from src/task/methods/lgc_ensemble_helpers/helper_functions.py rename to src/methods/lgc_ensemble_helpers/helper_functions.py diff --git a/src/task/methods/lgc_ensemble_helpers/models.py b/src/methods/lgc_ensemble_helpers/models.py similarity index 100% rename from src/task/methods/lgc_ensemble_helpers/models.py rename to src/methods/lgc_ensemble_helpers/models.py diff --git a/src/task/methods/lgc_ensemble_helpers/predict.py b/src/methods/lgc_ensemble_helpers/predict.py similarity index 100% rename from src/task/methods/lgc_ensemble_helpers/predict.py rename to src/methods/lgc_ensemble_helpers/predict.py diff --git a/src/task/methods/lgc_ensemble_helpers/prepare_data.py b/src/methods/lgc_ensemble_helpers/prepare_data.py similarity index 100% rename from src/task/methods/lgc_ensemble_helpers/prepare_data.py rename to src/methods/lgc_ensemble_helpers/prepare_data.py diff --git a/src/task/methods/lgc_ensemble_helpers/train.py b/src/methods/lgc_ensemble_helpers/train.py similarity index 100% rename from src/task/methods/lgc_ensemble_helpers/train.py rename to src/methods/lgc_ensemble_helpers/train.py diff --git a/src/task/methods/lgc_ensemble_predict/config.vsh.yaml b/src/methods/lgc_ensemble_predict/config.vsh.yaml similarity index 100% rename from src/task/methods/lgc_ensemble_predict/config.vsh.yaml rename to src/methods/lgc_ensemble_predict/config.vsh.yaml diff --git a/src/task/methods/lgc_ensemble_predict/script.py b/src/methods/lgc_ensemble_predict/script.py similarity index 100% rename from src/task/methods/lgc_ensemble_predict/script.py rename to src/methods/lgc_ensemble_predict/script.py diff --git a/src/task/methods/lgc_ensemble_prepare/config.vsh.yaml b/src/methods/lgc_ensemble_prepare/config.vsh.yaml similarity index 100% rename from src/task/methods/lgc_ensemble_prepare/config.vsh.yaml rename to src/methods/lgc_ensemble_prepare/config.vsh.yaml diff --git a/src/task/methods/lgc_ensemble_prepare/script.py b/src/methods/lgc_ensemble_prepare/script.py similarity index 97% rename from src/task/methods/lgc_ensemble_prepare/script.py rename to src/methods/lgc_ensemble_prepare/script.py index aaa91682..562291ac 100644 --- a/src/task/methods/lgc_ensemble_prepare/script.py +++ b/src/methods/lgc_ensemble_prepare/script.py @@ -22,7 +22,7 @@ "train_data_aug_dir": "output/train_data_aug_dir", } meta = { - "resources_dir": "src/task/methods/lgc_ensemble", + "resources_dir": "src/methods/lgc_ensemble", "temp_dir": "/tmp" } ## VIASH END @@ -37,7 +37,7 @@ ################################################################### -# interpreted from src/task/methods/lgc_ensemble/prepare_data.py +# interpreted from src/methods/lgc_ensemble/prepare_data.py # prepare data seed_everything() @@ -91,7 +91,7 @@ _, _ = save_ChemBERTa_features(test_smiles, out_dir=par["train_data_aug_dir"], on_train_data=False) ################################################################### -# interpreted from src/task/methods/lgc_ensemble/train.py +# interpreted from src/methods/lgc_ensemble/train.py ## Prepare cross-validation cell_types_sm_names = de_train[['cell_type', 'sm_name']] diff --git a/src/task/methods/lgc_ensemble_train/config.vsh.yaml b/src/methods/lgc_ensemble_train/config.vsh.yaml similarity index 100% rename from src/task/methods/lgc_ensemble_train/config.vsh.yaml rename to src/methods/lgc_ensemble_train/config.vsh.yaml diff --git a/src/task/methods/lgc_ensemble_train/script.py b/src/methods/lgc_ensemble_train/script.py similarity index 94% rename from src/task/methods/lgc_ensemble_train/script.py rename to src/methods/lgc_ensemble_train/script.py index fa557221..a50e0083 100644 --- a/src/task/methods/lgc_ensemble_train/script.py +++ b/src/methods/lgc_ensemble_train/script.py @@ -18,7 +18,7 @@ "log_file": "output/log.json", } meta = { - "resources_dir": "src/task/methods/lgc_ensemble", + "resources_dir": "src/methods/lgc_ensemble", "temp_dir": "/tmp" } ## VIASH END @@ -32,7 +32,7 @@ from helper_functions import train_function ################################################################### -# Interpretation from src/task/methods/lgc_ensemble/helper_functions.py +# Interpretation from src/methods/lgc_ensemble/helper_functions.py print("Load data...", flush=True) # read kf_cv_initial from json diff --git a/src/task/methods/nn_retraining_with_pseudolabels/config.vsh.yaml b/src/methods/nn_retraining_with_pseudolabels/config.vsh.yaml similarity index 100% rename from src/task/methods/nn_retraining_with_pseudolabels/config.vsh.yaml rename to src/methods/nn_retraining_with_pseudolabels/config.vsh.yaml diff --git a/src/task/methods/nn_retraining_with_pseudolabels/notebook_264.py b/src/methods/nn_retraining_with_pseudolabels/notebook_264.py similarity index 100% rename from src/task/methods/nn_retraining_with_pseudolabels/notebook_264.py rename to src/methods/nn_retraining_with_pseudolabels/notebook_264.py diff --git a/src/task/methods/nn_retraining_with_pseudolabels/notebook_266.py b/src/methods/nn_retraining_with_pseudolabels/notebook_266.py similarity index 100% rename from src/task/methods/nn_retraining_with_pseudolabels/notebook_266.py rename to src/methods/nn_retraining_with_pseudolabels/notebook_266.py diff --git a/src/task/methods/nn_retraining_with_pseudolabels/script.py b/src/methods/nn_retraining_with_pseudolabels/script.py similarity index 96% rename from src/task/methods/nn_retraining_with_pseudolabels/script.py rename to src/methods/nn_retraining_with_pseudolabels/script.py index d9b83461..13fcb880 100644 --- a/src/task/methods/nn_retraining_with_pseudolabels/script.py +++ b/src/methods/nn_retraining_with_pseudolabels/script.py @@ -26,7 +26,7 @@ "output": "output.h5ad", "reps": 2, } -meta = {"resources_dir": "src/task/methods/nn_retraining_with_pseudolabels"} +meta = {"resources_dir": "src/methods/nn_retraining_with_pseudolabels"} ## VIASH END # load helper functions in notebooks diff --git a/src/task/methods/pyboost/config.vsh.yaml b/src/methods/pyboost/config.vsh.yaml similarity index 100% rename from src/task/methods/pyboost/config.vsh.yaml rename to src/methods/pyboost/config.vsh.yaml diff --git a/src/task/methods/pyboost/helper.py b/src/methods/pyboost/helper.py similarity index 100% rename from src/task/methods/pyboost/helper.py rename to src/methods/pyboost/helper.py diff --git a/src/task/methods/pyboost/script.py b/src/methods/pyboost/script.py similarity index 98% rename from src/task/methods/pyboost/script.py rename to src/methods/pyboost/script.py index 95e190b7..ef6f878c 100644 --- a/src/task/methods/pyboost/script.py +++ b/src/methods/pyboost/script.py @@ -20,7 +20,7 @@ output = "output.h5ad", ) meta = dict( - resources_dir = "src/task/methods/pyboost" + resources_dir = "src/methods/pyboost" ) ## VIASH END diff --git a/src/task/methods/scape/config.vsh.yaml b/src/methods/scape/config.vsh.yaml similarity index 100% rename from src/task/methods/scape/config.vsh.yaml rename to src/methods/scape/config.vsh.yaml diff --git a/src/task/methods/scape/script.py b/src/methods/scape/script.py similarity index 100% rename from src/task/methods/scape/script.py rename to src/methods/scape/script.py diff --git a/src/task/methods/transformer_ensemble/config.vsh.yaml b/src/methods/transformer_ensemble/config.vsh.yaml similarity index 100% rename from src/task/methods/transformer_ensemble/config.vsh.yaml rename to src/methods/transformer_ensemble/config.vsh.yaml diff --git a/src/task/methods/transformer_ensemble/models.py b/src/methods/transformer_ensemble/models.py similarity index 100% rename from src/task/methods/transformer_ensemble/models.py rename to src/methods/transformer_ensemble/models.py diff --git a/src/task/methods/transformer_ensemble/script.py b/src/methods/transformer_ensemble/script.py similarity index 98% rename from src/task/methods/transformer_ensemble/script.py rename to src/methods/transformer_ensemble/script.py index 79764953..f6368f10 100644 --- a/src/task/methods/transformer_ensemble/script.py +++ b/src/methods/transformer_ensemble/script.py @@ -19,7 +19,7 @@ "layer": "sign_log10_pval" } meta = { - "resources_dir": "src/task/methods/transformer_ensemble", + "resources_dir": "src/methods/transformer_ensemble", } ## VIASH END diff --git a/src/task/methods/transformer_ensemble/train.py b/src/methods/transformer_ensemble/train.py similarity index 100% rename from src/task/methods/transformer_ensemble/train.py rename to src/methods/transformer_ensemble/train.py diff --git a/src/task/methods/transformer_ensemble/utils.py b/src/methods/transformer_ensemble/utils.py similarity index 100% rename from src/task/methods/transformer_ensemble/utils.py rename to src/methods/transformer_ensemble/utils.py diff --git a/src/task/metrics/mean_rowwise_correlation/config.vsh.yaml b/src/metrics/mean_rowwise_correlation/config.vsh.yaml similarity index 100% rename from src/task/metrics/mean_rowwise_correlation/config.vsh.yaml rename to src/metrics/mean_rowwise_correlation/config.vsh.yaml diff --git a/src/task/metrics/mean_rowwise_correlation/script.R b/src/metrics/mean_rowwise_correlation/script.R similarity index 100% rename from src/task/metrics/mean_rowwise_correlation/script.R rename to src/metrics/mean_rowwise_correlation/script.R diff --git a/src/task/metrics/mean_rowwise_error/config.vsh.yaml b/src/metrics/mean_rowwise_error/config.vsh.yaml similarity index 100% rename from src/task/metrics/mean_rowwise_error/config.vsh.yaml rename to src/metrics/mean_rowwise_error/config.vsh.yaml diff --git a/src/task/metrics/mean_rowwise_error/script.R b/src/metrics/mean_rowwise_error/script.R similarity index 100% rename from src/task/metrics/mean_rowwise_error/script.R rename to src/metrics/mean_rowwise_error/script.R diff --git a/src/task/process_dataset/add_uns_metadata/config.vsh.yaml b/src/process_dataset/add_uns_metadata/config.vsh.yaml similarity index 96% rename from src/task/process_dataset/add_uns_metadata/config.vsh.yaml rename to src/process_dataset/add_uns_metadata/config.vsh.yaml index febcc84a..fbb2047e 100644 --- a/src/task/process_dataset/add_uns_metadata/config.vsh.yaml +++ b/src/process_dataset/add_uns_metadata/config.vsh.yaml @@ -1,13 +1,12 @@ functionality: name: add_uns_metadata - namespace: process_dataset info: type: process_dataset type_info: label: Add metadata summary: Add metadata to the pseudobulked data description: | - Add metadata to the pseudobulked single-cell dataset for the DGE regression task. + Add metadata to the pseudobulked single-cell dataset for the perturbation regression task. arguments: - name: --input type: file diff --git a/src/task/process_dataset/add_uns_metadata/script.py b/src/process_dataset/add_uns_metadata/script.py similarity index 100% rename from src/task/process_dataset/add_uns_metadata/script.py rename to src/process_dataset/add_uns_metadata/script.py diff --git a/src/task/process_dataset/bootstrap/config.vsh.yaml b/src/process_dataset/bootstrap/config.vsh.yaml similarity index 98% rename from src/task/process_dataset/bootstrap/config.vsh.yaml rename to src/process_dataset/bootstrap/config.vsh.yaml index a8fb69ed..c23b3583 100644 --- a/src/task/process_dataset/bootstrap/config.vsh.yaml +++ b/src/process_dataset/bootstrap/config.vsh.yaml @@ -1,6 +1,5 @@ functionality: name: bootstrap - namespace: process_dataset info: type: process_dataset type_info: diff --git a/src/task/process_dataset/bootstrap/script.py b/src/process_dataset/bootstrap/script.py similarity index 100% rename from src/task/process_dataset/bootstrap/script.py rename to src/process_dataset/bootstrap/script.py diff --git a/src/task/process_dataset/clean_pseudobulk/config.vsh.yaml b/src/process_dataset/clean_pseudobulk/config.vsh.yaml similarity index 96% rename from src/task/process_dataset/clean_pseudobulk/config.vsh.yaml rename to src/process_dataset/clean_pseudobulk/config.vsh.yaml index a7a63730..d983a446 100644 --- a/src/task/process_dataset/clean_pseudobulk/config.vsh.yaml +++ b/src/process_dataset/clean_pseudobulk/config.vsh.yaml @@ -1,6 +1,5 @@ functionality: name: filter_vars - namespace: process_dataset info: type: process_dataset type_info: diff --git a/src/task/process_dataset/clean_pseudobulk/script.R b/src/process_dataset/clean_pseudobulk/script.R similarity index 100% rename from src/task/process_dataset/clean_pseudobulk/script.R rename to src/process_dataset/clean_pseudobulk/script.R diff --git a/src/task/process_dataset/compute_pseudobulk/config.vsh.yaml b/src/process_dataset/compute_pseudobulk/config.vsh.yaml similarity index 89% rename from src/task/process_dataset/compute_pseudobulk/config.vsh.yaml rename to src/process_dataset/compute_pseudobulk/config.vsh.yaml index 30bbd5c7..c63af091 100644 --- a/src/task/process_dataset/compute_pseudobulk/config.vsh.yaml +++ b/src/process_dataset/compute_pseudobulk/config.vsh.yaml @@ -1,13 +1,12 @@ functionality: name: compute_pseudobulk - namespace: process_dataset info: type: process_dataset type_info: label: Pseudobulk summary: Compute pseudobulk data description: | - Compute pseudobulk data for the DGE regression task. + Compute pseudobulk data for the perturbation regression task. arguments: - name: --input type: file diff --git a/src/task/process_dataset/compute_pseudobulk/script.py b/src/process_dataset/compute_pseudobulk/script.py similarity index 100% rename from src/task/process_dataset/compute_pseudobulk/script.py rename to src/process_dataset/compute_pseudobulk/script.py diff --git a/src/task/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml b/src/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml similarity index 97% rename from src/task/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml rename to src/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml index e6d6c209..77670fa6 100644 --- a/src/task/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml +++ b/src/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml @@ -1,6 +1,5 @@ functionality: name: convert_h5ad_to_parquet - namespace: process_dataset info: type: process_dataset type_info: diff --git a/src/task/process_dataset/convert_h5ad_to_parquet/script.py b/src/process_dataset/convert_h5ad_to_parquet/script.py similarity index 100% rename from src/task/process_dataset/convert_h5ad_to_parquet/script.py rename to src/process_dataset/convert_h5ad_to_parquet/script.py diff --git a/src/task/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml b/src/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml similarity index 98% rename from src/task/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml rename to src/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml index 95c4e83c..4b8cda82 100644 --- a/src/task/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml +++ b/src/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml @@ -1,6 +1,5 @@ functionality: name: convert_kaggle_h5ad_to_parquet - namespace: task/process_dataset info: type: process_dataset type_info: diff --git a/src/task/process_dataset/convert_kaggle_h5ad_to_parquet/script.py b/src/process_dataset/convert_kaggle_h5ad_to_parquet/script.py similarity index 100% rename from src/task/process_dataset/convert_kaggle_h5ad_to_parquet/script.py rename to src/process_dataset/convert_kaggle_h5ad_to_parquet/script.py diff --git a/src/task/process_dataset/filter_obs/config.vsh.yaml b/src/process_dataset/filter_obs/config.vsh.yaml similarity index 96% rename from src/task/process_dataset/filter_obs/config.vsh.yaml rename to src/process_dataset/filter_obs/config.vsh.yaml index 24f19e76..bc950f9d 100644 --- a/src/task/process_dataset/filter_obs/config.vsh.yaml +++ b/src/process_dataset/filter_obs/config.vsh.yaml @@ -1,6 +1,5 @@ functionality: name: filter_obs - namespace: process_dataset info: type: process_dataset type_info: diff --git a/src/task/process_dataset/filter_obs/script.R b/src/process_dataset/filter_obs/script.R similarity index 100% rename from src/task/process_dataset/filter_obs/script.R rename to src/process_dataset/filter_obs/script.R diff --git a/src/task/process_dataset/generate_id_map/config.vsh.yaml b/src/process_dataset/generate_id_map/config.vsh.yaml similarity index 96% rename from src/task/process_dataset/generate_id_map/config.vsh.yaml rename to src/process_dataset/generate_id_map/config.vsh.yaml index 648634c8..97696a7e 100644 --- a/src/task/process_dataset/generate_id_map/config.vsh.yaml +++ b/src/process_dataset/generate_id_map/config.vsh.yaml @@ -1,6 +1,5 @@ functionality: name: generate_id_map - namespace: process_dataset info: type: process_dataset type_info: diff --git a/src/task/process_dataset/generate_id_map/script.py b/src/process_dataset/generate_id_map/script.py similarity index 100% rename from src/task/process_dataset/generate_id_map/script.py rename to src/process_dataset/generate_id_map/script.py diff --git a/src/task/process_dataset/run_limma/config.vsh.yaml b/src/process_dataset/run_limma/config.vsh.yaml similarity index 94% rename from src/task/process_dataset/run_limma/config.vsh.yaml rename to src/process_dataset/run_limma/config.vsh.yaml index e513d5ea..b50845d1 100644 --- a/src/task/process_dataset/run_limma/config.vsh.yaml +++ b/src/process_dataset/run_limma/config.vsh.yaml @@ -1,13 +1,12 @@ functionality: name: run_limma - namespace: process_dataset info: type: process_dataset type_info: label: Limma summary: Run limma description: | - Run limma for the DGE regression task. + Run limma for the parturbation regression task. arguments: - name: --input type: file diff --git a/src/task/process_dataset/run_limma/script.R b/src/process_dataset/run_limma/script.R similarity index 100% rename from src/task/process_dataset/run_limma/script.R rename to src/process_dataset/run_limma/script.R diff --git a/src/task/utils/anndata_to_dataframe.py b/src/utils/anndata_to_dataframe.py similarity index 100% rename from src/task/utils/anndata_to_dataframe.py rename to src/utils/anndata_to_dataframe.py diff --git a/src/task/workflows/process_dataset/config.vsh.yaml b/src/workflows/process_dataset/config.vsh.yaml similarity index 100% rename from src/task/workflows/process_dataset/config.vsh.yaml rename to src/workflows/process_dataset/config.vsh.yaml diff --git a/src/task/workflows/process_dataset/main.nf b/src/workflows/process_dataset/main.nf similarity index 100% rename from src/task/workflows/process_dataset/main.nf rename to src/workflows/process_dataset/main.nf diff --git a/src/task/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml similarity index 100% rename from src/task/workflows/run_benchmark/config.vsh.yaml rename to src/workflows/run_benchmark/config.vsh.yaml diff --git a/src/task/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf similarity index 100% rename from src/task/workflows/run_benchmark/main.nf rename to src/workflows/run_benchmark/main.nf diff --git a/src/task/workflows/run_stability_analysis/config.vsh.yaml b/src/workflows/run_stability_analysis/config.vsh.yaml similarity index 100% rename from src/task/workflows/run_stability_analysis/config.vsh.yaml rename to src/workflows/run_stability_analysis/config.vsh.yaml diff --git a/src/task/workflows/run_stability_analysis/main.nf b/src/workflows/run_stability_analysis/main.nf similarity index 100% rename from src/task/workflows/run_stability_analysis/main.nf rename to src/workflows/run_stability_analysis/main.nf