From 8a613812c05774713df0e96cd9d58f8afd0e0717 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Wed, 5 Jun 2024 00:52:11 +0200
Subject: [PATCH] Rename `task-dge-perturbation-prediction` to
 `task_perturbation_prediction` (#66)

* update task info

* update project config

* rename task

* move files

* add namespace to api

* update readme

* update spec

* simplify component
---
 CHANGELOG.md                                  |  12 +-
 README.md                                     | 287 ++++++++++--------
 _viash.yaml                                   |   4 +-
 scripts/add_a_method.sh                       |  12 +-
 scripts/generate_kaggle_resources.sh          |   6 +-
 scripts/generate_resources.sh                 |   4 +-
 scripts/render_readme.sh                      |   6 +-
 scripts/run_benchmark_tw.sh                   |   4 +-
 scripts/run_benchmark_tw_traens.sh            |   6 +-
 scripts/run_layert_tw.sh                      |   4 +-
 scripts/run_stability_tw.sh                   |   4 +-
 scripts/sync_results.sh                       |   8 +-
 src/{task => }/api/comp_control_method.yaml   |   0
 src/{task => }/api/comp_method.yaml           |   0
 src/{task => }/api/comp_method_notest.yaml    |   3 +-
 src/{task => }/api/comp_metric.yaml           |   0
 src/{task => }/api/comp_process_dataset.yaml  |   4 +-
 src/{task => }/api/file_de_test_h5ad.yaml     |   0
 src/{task => }/api/file_de_train_h5ad.yaml    |   0
 src/{task => }/api/file_id_map.yaml           |   0
 src/api/file_model.yaml                       |   6 +
 src/{task => }/api/file_prediction.yaml       |   0
 src/{task => }/api/file_sc_counts.yaml        |   0
 src/{task => }/api/file_score.yaml            |   0
 src/{task => }/api/task_info.yaml             |  48 ++-
 src/common/create_component/config.vsh.yaml   |   4 +-
 src/common/create_component/script.py         |   6 +-
 .../ground_truth/config.vsh.yaml              |   0
 .../control_methods/ground_truth/script.R     |   0
 .../mean_across_celltypes/config.vsh.yaml     |   0
 .../mean_across_celltypes/script.py           |   0
 .../mean_across_compounds/config.vsh.yaml     |   0
 .../mean_across_compounds/script.py           |   0
 .../mean_outcome/config.vsh.yaml              |   0
 .../control_methods/mean_outcome/script.py    |   0
 .../control_methods/sample/config.vsh.yaml    |   0
 .../control_methods/sample/script.R           |   0
 .../control_methods/zeros/config.vsh.yaml     |   0
 .../control_methods/zeros/script.py           |   0
 .../methods/jn_ap_op2/config.vsh.yaml         |   1 -
 src/{task => }/methods/jn_ap_op2/helper.py    |   0
 src/{task => }/methods/jn_ap_op2/script.py    |  10 +-
 .../methods/lgc_ensemble/config.vsh.yaml      |   0
 src/{task => }/methods/lgc_ensemble/main.nf   |   0
 src/{task => }/methods/lgc_ensemble/test.sh   |   0
 .../lgc_ensemble_direct/config.vsh.yaml       |   0
 .../methods/lgc_ensemble_direct/script.py     |   2 +-
 .../lgc_ensemble_helpers/divisor_finder.py    |   0
 .../lgc_ensemble_helpers/helper_classes.py    |   0
 .../lgc_ensemble_helpers/helper_functions.py  |   0
 .../methods/lgc_ensemble_helpers/models.py    |   0
 .../methods/lgc_ensemble_helpers/predict.py   |   0
 .../lgc_ensemble_helpers/prepare_data.py      |   0
 .../methods/lgc_ensemble_helpers/train.py     |   0
 .../lgc_ensemble_predict/config.vsh.yaml      |   0
 .../methods/lgc_ensemble_predict/script.py    |   0
 .../lgc_ensemble_prepare/config.vsh.yaml      |   0
 .../methods/lgc_ensemble_prepare/script.py    |   6 +-
 .../lgc_ensemble_train/config.vsh.yaml        |   0
 .../methods/lgc_ensemble_train/script.py      |   4 +-
 .../config.vsh.yaml                           |   0
 .../notebook_264.py                           |   0
 .../notebook_266.py                           |   0
 .../nn_retraining_with_pseudolabels/script.py |   2 +-
 .../methods/pyboost/config.vsh.yaml           |   0
 src/{task => }/methods/pyboost/helper.py      |   0
 src/{task => }/methods/pyboost/script.py      |   2 +-
 src/{task => }/methods/scape/config.vsh.yaml  |   0
 src/{task => }/methods/scape/script.py        |   0
 .../transformer_ensemble/config.vsh.yaml      |   0
 .../methods/transformer_ensemble/models.py    |   0
 .../methods/transformer_ensemble/script.py    |   2 +-
 .../methods/transformer_ensemble/train.py     |   0
 .../methods/transformer_ensemble/utils.py     |   0
 .../mean_rowwise_correlation/config.vsh.yaml  |   0
 .../metrics/mean_rowwise_correlation/script.R |   0
 .../mean_rowwise_error/config.vsh.yaml        |   0
 .../metrics/mean_rowwise_error/script.R       |   0
 .../add_uns_metadata/config.vsh.yaml          |   3 +-
 .../add_uns_metadata/script.py                |   0
 .../process_dataset/bootstrap/config.vsh.yaml |   1 -
 .../process_dataset/bootstrap/script.py       |   0
 .../clean_pseudobulk/config.vsh.yaml          |   1 -
 .../process_dataset/clean_pseudobulk/script.R |   0
 .../compute_pseudobulk/config.vsh.yaml        |   3 +-
 .../compute_pseudobulk/script.py              |   0
 .../convert_h5ad_to_parquet/config.vsh.yaml   |   1 -
 .../convert_h5ad_to_parquet/script.py         |   0
 .../config.vsh.yaml                           |   1 -
 .../convert_kaggle_h5ad_to_parquet/script.py  |   0
 .../filter_obs/config.vsh.yaml                |   1 -
 .../process_dataset/filter_obs/script.R       |   0
 .../generate_id_map/config.vsh.yaml           |   1 -
 .../process_dataset/generate_id_map/script.py |   0
 .../process_dataset/run_limma/config.vsh.yaml |   3 +-
 .../process_dataset/run_limma/script.R        |   0
 src/{task => }/utils/anndata_to_dataframe.py  |   0
 .../workflows/process_dataset/config.vsh.yaml |   0
 .../workflows/process_dataset/main.nf         |   0
 .../workflows/run_benchmark/config.vsh.yaml   |   0
 .../workflows/run_benchmark/main.nf           |   0
 .../run_stability_analysis/config.vsh.yaml    |   0
 .../workflows/run_stability_analysis/main.nf  |   0
 103 files changed, 272 insertions(+), 200 deletions(-)
 rename src/{task => }/api/comp_control_method.yaml (100%)
 rename src/{task => }/api/comp_method.yaml (100%)
 rename src/{task => }/api/comp_method_notest.yaml (85%)
 rename src/{task => }/api/comp_metric.yaml (100%)
 rename src/{task => }/api/comp_process_dataset.yaml (92%)
 rename src/{task => }/api/file_de_test_h5ad.yaml (100%)
 rename src/{task => }/api/file_de_train_h5ad.yaml (100%)
 rename src/{task => }/api/file_id_map.yaml (100%)
 create mode 100644 src/api/file_model.yaml
 rename src/{task => }/api/file_prediction.yaml (100%)
 rename src/{task => }/api/file_sc_counts.yaml (100%)
 rename src/{task => }/api/file_score.yaml (100%)
 rename src/{task => }/api/task_info.yaml (81%)
 rename src/{task => }/control_methods/ground_truth/config.vsh.yaml (100%)
 rename src/{task => }/control_methods/ground_truth/script.R (100%)
 rename src/{task => }/control_methods/mean_across_celltypes/config.vsh.yaml (100%)
 rename src/{task => }/control_methods/mean_across_celltypes/script.py (100%)
 rename src/{task => }/control_methods/mean_across_compounds/config.vsh.yaml (100%)
 rename src/{task => }/control_methods/mean_across_compounds/script.py (100%)
 rename src/{task => }/control_methods/mean_outcome/config.vsh.yaml (100%)
 rename src/{task => }/control_methods/mean_outcome/script.py (100%)
 rename src/{task => }/control_methods/sample/config.vsh.yaml (100%)
 rename src/{task => }/control_methods/sample/script.R (100%)
 rename src/{task => }/control_methods/zeros/config.vsh.yaml (100%)
 rename src/{task => }/control_methods/zeros/script.py (100%)
 rename src/{task => }/methods/jn_ap_op2/config.vsh.yaml (97%)
 rename src/{task => }/methods/jn_ap_op2/helper.py (100%)
 rename src/{task => }/methods/jn_ap_op2/script.py (90%)
 rename src/{task => }/methods/lgc_ensemble/config.vsh.yaml (100%)
 rename src/{task => }/methods/lgc_ensemble/main.nf (100%)
 rename src/{task => }/methods/lgc_ensemble/test.sh (100%)
 rename src/{task => }/methods/lgc_ensemble_direct/config.vsh.yaml (100%)
 rename src/{task => }/methods/lgc_ensemble_direct/script.py (96%)
 rename src/{task => }/methods/lgc_ensemble_helpers/divisor_finder.py (100%)
 rename src/{task => }/methods/lgc_ensemble_helpers/helper_classes.py (100%)
 rename src/{task => }/methods/lgc_ensemble_helpers/helper_functions.py (100%)
 rename src/{task => }/methods/lgc_ensemble_helpers/models.py (100%)
 rename src/{task => }/methods/lgc_ensemble_helpers/predict.py (100%)
 rename src/{task => }/methods/lgc_ensemble_helpers/prepare_data.py (100%)
 rename src/{task => }/methods/lgc_ensemble_helpers/train.py (100%)
 rename src/{task => }/methods/lgc_ensemble_predict/config.vsh.yaml (100%)
 rename src/{task => }/methods/lgc_ensemble_predict/script.py (100%)
 rename src/{task => }/methods/lgc_ensemble_prepare/config.vsh.yaml (100%)
 rename src/{task => }/methods/lgc_ensemble_prepare/script.py (97%)
 rename src/{task => }/methods/lgc_ensemble_train/config.vsh.yaml (100%)
 rename src/{task => }/methods/lgc_ensemble_train/script.py (94%)
 rename src/{task => }/methods/nn_retraining_with_pseudolabels/config.vsh.yaml (100%)
 rename src/{task => }/methods/nn_retraining_with_pseudolabels/notebook_264.py (100%)
 rename src/{task => }/methods/nn_retraining_with_pseudolabels/notebook_266.py (100%)
 rename src/{task => }/methods/nn_retraining_with_pseudolabels/script.py (96%)
 rename src/{task => }/methods/pyboost/config.vsh.yaml (100%)
 rename src/{task => }/methods/pyboost/helper.py (100%)
 rename src/{task => }/methods/pyboost/script.py (98%)
 rename src/{task => }/methods/scape/config.vsh.yaml (100%)
 rename src/{task => }/methods/scape/script.py (100%)
 rename src/{task => }/methods/transformer_ensemble/config.vsh.yaml (100%)
 rename src/{task => }/methods/transformer_ensemble/models.py (100%)
 rename src/{task => }/methods/transformer_ensemble/script.py (98%)
 rename src/{task => }/methods/transformer_ensemble/train.py (100%)
 rename src/{task => }/methods/transformer_ensemble/utils.py (100%)
 rename src/{task => }/metrics/mean_rowwise_correlation/config.vsh.yaml (100%)
 rename src/{task => }/metrics/mean_rowwise_correlation/script.R (100%)
 rename src/{task => }/metrics/mean_rowwise_error/config.vsh.yaml (100%)
 rename src/{task => }/metrics/mean_rowwise_error/script.R (100%)
 rename src/{task => }/process_dataset/add_uns_metadata/config.vsh.yaml (96%)
 rename src/{task => }/process_dataset/add_uns_metadata/script.py (100%)
 rename src/{task => }/process_dataset/bootstrap/config.vsh.yaml (98%)
 rename src/{task => }/process_dataset/bootstrap/script.py (100%)
 rename src/{task => }/process_dataset/clean_pseudobulk/config.vsh.yaml (96%)
 rename src/{task => }/process_dataset/clean_pseudobulk/script.R (100%)
 rename src/{task => }/process_dataset/compute_pseudobulk/config.vsh.yaml (89%)
 rename src/{task => }/process_dataset/compute_pseudobulk/script.py (100%)
 rename src/{task => }/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml (97%)
 rename src/{task => }/process_dataset/convert_h5ad_to_parquet/script.py (100%)
 rename src/{task => }/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml (98%)
 rename src/{task => }/process_dataset/convert_kaggle_h5ad_to_parquet/script.py (100%)
 rename src/{task => }/process_dataset/filter_obs/config.vsh.yaml (96%)
 rename src/{task => }/process_dataset/filter_obs/script.R (100%)
 rename src/{task => }/process_dataset/generate_id_map/config.vsh.yaml (96%)
 rename src/{task => }/process_dataset/generate_id_map/script.py (100%)
 rename src/{task => }/process_dataset/run_limma/config.vsh.yaml (94%)
 rename src/{task => }/process_dataset/run_limma/script.R (100%)
 rename src/{task => }/utils/anndata_to_dataframe.py (100%)
 rename src/{task => }/workflows/process_dataset/config.vsh.yaml (100%)
 rename src/{task => }/workflows/process_dataset/main.nf (100%)
 rename src/{task => }/workflows/run_benchmark/config.vsh.yaml (100%)
 rename src/{task => }/workflows/run_benchmark/main.nf (100%)
 rename src/{task => }/workflows/run_stability_analysis/config.vsh.yaml (100%)
 rename src/{task => }/workflows/run_stability_analysis/main.nf (100%)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 69a61bee..437bb9b0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,10 @@
-# task-dge-perturbation-prediction 0.1.0
+# task_perturbation_prediction 1.0.0
 
-Initial release of the DGE Perturbation Prediction task. Initial components:
+Initial release of the Perturbation Prediction task. Initial components:
 
-* `src/task/process_dataset`: Compute the DGE data from the raw single-cell counts using Limma.
-* `src/task/control_methods`: Baseline control methods: sample, ground_truth, zeros, mean_across_celltypes, mean_across_compounds, mean_outcome.
-* `src/task/methods`: DGE perturbation prediction methods: random_forest.
-* `src/task/metrics`: Evaluation metrics: mean_rowwise_error.
+* `src/process_dataset`: Compute the DGE data from the raw single-cell counts using Limma.
+* `src/control_methods`: Baseline control methods: sample, ground_truth, zeros, mean_across_celltypes, mean_across_compounds, mean_outcome.
+* `src/methods`: Perturbation prediction methods: jn_ap_op2, lgc_ensemble, nn_retraining_with_pseudolabels, pyboost, scape, transformer_ensemble.
+* `src/metrics`: Evaluation metrics: mean_rowwise_error, mean_rowwise_correlation.
 
 
diff --git a/README.md b/README.md
index 42eaf4be..596c4f44 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# DGE Perturbation Prediction
+# Perturbation Prediction
 
 
 <!--
@@ -10,48 +10,54 @@ Predicting how small molecules change gene expression in different cell
 types.
 
 Path to source:
-[`src/task`](https://github.com/openproblems-bio/task-dge-perturbation-prediction/tree/main/src/task)
+[`src`](https://github.com/openproblems-bio/task_perturbation_prediction/tree/main/src)
+
+## README
 
 ## Installation
 
-You need to have Docker, Java, and Viash installed. Follow
-[these instructions](https://openproblems.bio/documentation/fundamentals/requirements)
+You need to have Docker, Java, and Viash installed. Follow [these
+instructions](https://openproblems.bio/documentation/fundamentals/requirements)
 to install the required dependencies.
 
 ## Add a method
 
-To add a method to the repository, follow the instructions in the `scripts/add_a_method.sh` script.
+To add a method to the repository, follow the instructions in the
+`scripts/add_a_method.sh` script.
 
 ## Frequently used commands
 
 To get started, you can run the following commands:
 
-```bash
-git clone git@github.com:openproblems-bio/task-dge-perturbation-prediction.git
+``` bash
+git clone git@github.com:openproblems-bio/task_perturbation_prediction.git
 
-cd task-dge-perturbation-prediction
+cd task_perturbation_prediction
 
 # download resources
 scripts/download_resources.sh
 ```
 
-To run the benchmark, you first need to build the components. Afterwards, you can run the benchmark:
+To run the benchmark, you first need to build the components.
+Afterwards, you can run the benchmark:
 
-```bash
+``` bash
 viash ns build --parallel --setup cachedbuild
 
 scripts/run_benchmark.sh
 ```
 
-After adding a component, it is recommended to run the tests to ensure that the component is working correctly:
+After adding a component, it is recommended to run the tests to ensure
+that the component is working correctly:
 
-```bash
+``` bash
 viash ns test --parallel
 ```
 
-Optionally, you can provide the `--query` argument to test only a subset of components:
+Optionally, you can provide the `--query` argument to test only a subset
+of components:
 
-```bash
+``` bash
 viash ns test --parallel --query "component_name"
 ```
 
@@ -114,41 +120,51 @@ perturbation responses in difference biological contexts.
 
 ## Authors & contributors
 
-| name              | roles  |
-|:------------------|:-------|
-| Artur Szałata     | author |
-| Robrecht Cannoodt | author |
+| name              | roles       |
+|:------------------|:------------|
+| Artur Szałata     | author      |
+| Robrecht Cannoodt | author      |
+| Daniel Burkhardt  | author      |
+| Malte D. Luecken  | author      |
+| Tin M. Tunjic     | contributor |
+| Mengbo Wang       | contributor |
+| Andrew Benz       | author      |
+| Tianyu Liu        | contributor |
+| Jalil Nourisa     | contributor |
+| Rico Meinl        | contributor |
 
 ## API
 
 ``` mermaid
 flowchart LR
   file_sc_counts("Single Cell Counts")
-  comp_process_dataset[/"Data processor"/]
-  file_de_train("DE train")
-  file_de_test("DE test")
+  comp_process_dataset[/"Process dataset"/]
+  file_de_train_h5ad("DE train")
+  file_de_test_h5ad("DE test")
   file_id_map("ID Map")
   comp_control_method[/"Control Method"/]
   comp_method[/"Method"/]
   comp_metric[/"Metric"/]
   file_prediction("Prediction")
+  file_model("Model")
   file_score("Score")
-  file_lincs_id_compound_mapping("Mapping compound names to lincs ids and smiles")
   file_sc_counts---comp_process_dataset
-  comp_process_dataset-->file_de_train
-  comp_process_dataset-->file_de_test
+  comp_process_dataset-->file_de_train_h5ad
+  comp_process_dataset-->file_de_test_h5ad
   comp_process_dataset-->file_id_map
-  file_de_train---comp_control_method
-  file_de_train---comp_method
-  file_de_test---comp_control_method
-  file_de_test---comp_metric
+  file_de_train_h5ad---comp_control_method
+  file_de_train_h5ad---comp_method_notest
+  file_de_train_h5ad---comp_method
+  file_de_test_h5ad---comp_control_method
+  file_de_test_h5ad---comp_metric
   file_id_map---comp_control_method
+  file_id_map---comp_method_notest
   file_id_map---comp_method
   comp_control_method-->file_prediction
   comp_method-->file_prediction
+  comp_method-->file_model
   comp_metric-->file_score
   file_prediction---comp_metric
-  file_lincs_id_compound_mapping---comp_process_dataset
 ```
 
 ## File format: Single Cell Counts
@@ -196,24 +212,23 @@ Slot description:
 
 </div>
 
-## Component type: Data processor
+## Component type: Process dataset
 
 Path:
 [`src/process_dataset`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/process_dataset)
 
-A DGE regression dataset processor
+Process the raw dataset
 
 Arguments:
 
 <div class="small">
 
-| Name                          | Type   | Description                                                                                  |
-|:------------------------------|:-------|:---------------------------------------------------------------------------------------------|
-| `--sc_counts`                 | `file` | Anndata with the counts of the whole dataset.                                                |
-| `--lincs_id_compound_mapping` | `file` | Parquet file mapping compound names to lincs ids and smiles.                                 |
-| `--de_train`                  | `file` | (*Output*) Differential expression results for training.                                     |
-| `--de_test`                   | `file` | (*Output*) Differential expression results for testing.                                      |
-| `--id_map`                    | `file` | (*Output*) File indicates the order of de_test, the cell types and the small molecule names. |
+| Name              | Type   | Description                                                                                                         |
+|:------------------|:-------|:--------------------------------------------------------------------------------------------------------------------|
+| `--sc_counts`     | `file` | Anndata with the counts of the whole dataset.                                                                       |
+| `--de_train_h5ad` | `file` | (*Output*) Differential expression results for training. Default: `de_train.h5ad`.                                  |
+| `--de_test_h5ad`  | `file` | (*Output*) Differential expression results for testing. Default: `de_test.h5ad`.                                    |
+| `--id_map`        | `file` | (*Output*) File indicates the order of de_test, the cell types and the small molecule names. Default: `id_map.csv`. |
 
 </div>
 
@@ -229,7 +244,8 @@ Format:
 
     AnnData object
      obs: 'cell_type', 'sm_name', 'sm_lincs_id', 'SMILES', 'split', 'control'
-     layers: 'P.Value', 'adj.P.Value', 'is_de', 'is_de_adj', 'logFC', 'sign_log10_pval'
+     layers: 'logFC', 'AveExpr', 't', 'P.Value', 'adj.P.Value', 'B', 'is_de', 'is_de_adj', 'sign_log10_pval', 'clipped_sign_log10_pval'
+     uns: 'dataset_id', 'dataset_name', 'dataset_url', 'dataset_reference', 'dataset_summary', 'dataset_description', 'dataset_organism', 'single_cell_obs'
 
 </div>
 
@@ -237,20 +253,32 @@ Slot description:
 
 <div class="small">
 
-| Slot                        | Type      | Description                                                                                                                                                                                                                                                                                                      |
-|:----------------------------|:----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `obs["cell_type"]`          | `string`  | The annotated cell type of each cell based on RNA expression.                                                                                                                                                                                                                                                    |
-| `obs["sm_name"]`            | `string`  | The primary name for the (parent) compound (in a standardized representation) as chosen by LINCS. This is provided to map the data in this experiment to the LINCS Connectivity Map data.                                                                                                                        |
-| `obs["sm_lincs_id"]`        | `string`  | The global LINCS ID (parent) compound (in a standardized representation). This is provided to map the data in this experiment to the LINCS Connectivity Map data.                                                                                                                                                |
-| `obs["SMILES"]`             | `string`  | Simplified molecular-input line-entry system (SMILES) representations of the compounds used in the experiment. This is a 1D representation of molecular structure. These SMILES are provided by Cellarity based on the specific compounds ordered for this experiment.                                           |
-| `obs["split"]`              | `string`  | Split. Must be one of ‘control’, ‘train’, ‘public_test’, or ‘private_test’.                                                                                                                                                                                                                                      |
-| `obs["control"]`            | `boolean` | Boolean indicating whether this instance was used as a control.                                                                                                                                                                                                                                                  |
-| `layers["P.Value"]`         | `double`  | P-value of the differential expression test.                                                                                                                                                                                                                                                                     |
-| `layers["adj.P.Value"]`     | `double`  | Adjusted P-value of the differential expression test.                                                                                                                                                                                                                                                            |
-| `layers["is_de"]`           | `boolean` | Whether the gene is differentially expressed.                                                                                                                                                                                                                                                                    |
-| `layers["is_de_adj"]`       | `boolean` | Whether the gene is differentially expressed after adjustment.                                                                                                                                                                                                                                                   |
-| `layers["logFC"]`           | `double`  | Log fold change of the differential expression test.                                                                                                                                                                                                                                                             |
-| `layers["sign_log10_pval"]` | `double`  | Differential expression value (-log10(p-value) \* sign(LFC)) for each gene. Here, LFC is the estimated log-fold change in expression between the treatment and control condition after shrinkage as calculated by Limma. Positive LFC means the gene goes up in the treatment condition relative to the control. |
+| Slot                                | Type        | Description                                                                                                                                                                                                                                                                                                       |
+|:------------------------------------|:------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `obs["cell_type"]`                  | `string`    | The annotated cell type of each cell based on RNA expression.                                                                                                                                                                                                                                                     |
+| `obs["sm_name"]`                    | `string`    | The primary name for the (parent) compound (in a standardized representation) as chosen by LINCS. This is provided to map the data in this experiment to the LINCS Connectivity Map data.                                                                                                                         |
+| `obs["sm_lincs_id"]`                | `string`    | The global LINCS ID (parent) compound (in a standardized representation). This is provided to map the data in this experiment to the LINCS Connectivity Map data.                                                                                                                                                 |
+| `obs["SMILES"]`                     | `string`    | Simplified molecular-input line-entry system (SMILES) representations of the compounds used in the experiment. This is a 1D representation of molecular structure. These SMILES are provided by Cellarity based on the specific compounds ordered for this experiment.                                            |
+| `obs["split"]`                      | `string`    | Split. Must be one of ‘control’, ‘train’, ‘public_test’, or ‘private_test’.                                                                                                                                                                                                                                       |
+| `obs["control"]`                    | `boolean`   | Boolean indicating whether this instance was used as a control.                                                                                                                                                                                                                                                   |
+| `layers["logFC"]`                   | `double`    | Log fold change of the differential expression test.                                                                                                                                                                                                                                                              |
+| `layers["AveExpr"]`                 | `double`    | (*Optional*) Average expression of the differential expression test.                                                                                                                                                                                                                                              |
+| `layers["t"]`                       | `double`    | (*Optional*) T-statistic of the differential expression test.                                                                                                                                                                                                                                                     |
+| `layers["P.Value"]`                 | `double`    | P-value of the differential expression test.                                                                                                                                                                                                                                                                      |
+| `layers["adj.P.Value"]`             | `double`    | Adjusted P-value of the differential expression test.                                                                                                                                                                                                                                                             |
+| `layers["B"]`                       | `double`    | (*Optional*) B-statistic of the differential expression test.                                                                                                                                                                                                                                                     |
+| `layers["is_de"]`                   | `boolean`   | Whether the gene is differentially expressed.                                                                                                                                                                                                                                                                     |
+| `layers["is_de_adj"]`               | `boolean`   | Whether the gene is differentially expressed after adjustment.                                                                                                                                                                                                                                                    |
+| `layers["sign_log10_pval"]`         | `double`    | Differential expression value (`-log10(p-value) * sign(LFC)`) for each gene. Here, LFC is the estimated log-fold change in expression between the treatment and control condition after shrinkage as calculated by Limma. Positive LFC means the gene goes up in the treatment condition relative to the control. |
+| `layers["clipped_sign_log10_pval"]` | `double`    | A clipped version of the sign_log10_pval layer. Values are clipped to be between -4 and 4 (i.e. `-log10(0.0001)` and `-log10(0.0001)`).                                                                                                                                                                           |
+| `uns["dataset_id"]`                 | `string`    | A unique identifier for the dataset. This is different from the `obs.dataset_id` field, which is the identifier for the dataset from which the cell data is derived.                                                                                                                                              |
+| `uns["dataset_name"]`               | `string`    | A human-readable name for the dataset.                                                                                                                                                                                                                                                                            |
+| `uns["dataset_url"]`                | `string`    | (*Optional*) Link to the original source of the dataset.                                                                                                                                                                                                                                                          |
+| `uns["dataset_reference"]`          | `string`    | (*Optional*) Bibtex reference of the paper in which the dataset was published.                                                                                                                                                                                                                                    |
+| `uns["dataset_summary"]`            | `string`    | Short description of the dataset.                                                                                                                                                                                                                                                                                 |
+| `uns["dataset_description"]`        | `string`    | Long description of the dataset.                                                                                                                                                                                                                                                                                  |
+| `uns["dataset_organism"]`           | `string`    | (*Optional*) The organism of the sample in the dataset.                                                                                                                                                                                                                                                           |
+| `uns["single_cell_obs"]`            | `dataframe` | A dataframe with the cell-level metadata for the training set.                                                                                                                                                                                                                                                    |
 
 </div>
 
@@ -265,8 +293,9 @@ Format:
 <div class="small">
 
     AnnData object
-     obs: 'id', 'cell_type', 'sm_name', 'sm_lincs_id', 'SMILES', 'split', 'control'
-     layers: 'P.Value', 'adj.P.Value', 'is_de', 'is_de_adj', 'logFC', 'sign_log10_pval'
+     obs: 'cell_type', 'sm_name', 'sm_lincs_id', 'SMILES', 'split', 'control'
+     layers: 'logFC', 'AveExpr', 't', 'P.Value', 'adj.P.Value', 'B', 'is_de', 'is_de_adj', 'sign_log10_pval', 'clipped_sign_log10_pval'
+     uns: 'dataset_id', 'dataset_name', 'dataset_url', 'dataset_reference', 'dataset_summary', 'dataset_description', 'dataset_organism', 'single_cell_obs'
 
 </div>
 
@@ -274,21 +303,32 @@ Slot description:
 
 <div class="small">
 
-| Slot                        | Type      | Description                                                                                                                                                                                                                                                                                                      |
-|:----------------------------|:----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `obs["id"]`                 | `integer` | Index of the test observation.                                                                                                                                                                                                                                                                                   |
-| `obs["cell_type"]`          | `string`  | The annotated cell type of each cell based on RNA expression.                                                                                                                                                                                                                                                    |
-| `obs["sm_name"]`            | `string`  | The primary name for the (parent) compound (in a standardized representation) as chosen by LINCS. This is provided to map the data in this experiment to the LINCS Connectivity Map data.                                                                                                                        |
-| `obs["sm_lincs_id"]`        | `string`  | The global LINCS ID (parent) compound (in a standardized representation). This is provided to map the data in this experiment to the LINCS Connectivity Map data.                                                                                                                                                |
-| `obs["SMILES"]`             | `string`  | Simplified molecular-input line-entry system (SMILES) representations of the compounds used in the experiment. This is a 1D representation of molecular structure. These SMILES are provided by Cellarity based on the specific compounds ordered for this experiment.                                           |
-| `obs["split"]`              | `string`  | Split. Must be one of ‘control’, ‘train’, ‘public_test’, or ‘private_test’.                                                                                                                                                                                                                                      |
-| `obs["control"]`            | `boolean` | Boolean indicating whether this instance was used as a control.                                                                                                                                                                                                                                                  |
-| `layers["P.Value"]`         | `double`  | P-value of the differential expression test.                                                                                                                                                                                                                                                                     |
-| `layers["adj.P.Value"]`     | `double`  | Adjusted P-value of the differential expression test.                                                                                                                                                                                                                                                            |
-| `layers["is_de"]`           | `boolean` | Whether the gene is differentially expressed.                                                                                                                                                                                                                                                                    |
-| `layers["is_de_adj"]`       | `boolean` | Whether the gene is differentially expressed after adjustment.                                                                                                                                                                                                                                                   |
-| `layers["logFC"]`           | `double`  | Log fold change of the differential expression test.                                                                                                                                                                                                                                                             |
-| `layers["sign_log10_pval"]` | `double`  | Differential expression value (-log10(p-value) \* sign(LFC)) for each gene. Here, LFC is the estimated log-fold change in expression between the treatment and control condition after shrinkage as calculated by Limma. Positive LFC means the gene goes up in the treatment condition relative to the control. |
+| Slot                                | Type        | Description                                                                                                                                                                                                                                                                                                       |
+|:------------------------------------|:------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `obs["cell_type"]`                  | `string`    | The annotated cell type of each cell based on RNA expression.                                                                                                                                                                                                                                                     |
+| `obs["sm_name"]`                    | `string`    | The primary name for the (parent) compound (in a standardized representation) as chosen by LINCS. This is provided to map the data in this experiment to the LINCS Connectivity Map data.                                                                                                                         |
+| `obs["sm_lincs_id"]`                | `string`    | The global LINCS ID (parent) compound (in a standardized representation). This is provided to map the data in this experiment to the LINCS Connectivity Map data.                                                                                                                                                 |
+| `obs["SMILES"]`                     | `string`    | Simplified molecular-input line-entry system (SMILES) representations of the compounds used in the experiment. This is a 1D representation of molecular structure. These SMILES are provided by Cellarity based on the specific compounds ordered for this experiment.                                            |
+| `obs["split"]`                      | `string`    | Split. Must be one of ‘control’, ‘train’, ‘public_test’, or ‘private_test’.                                                                                                                                                                                                                                       |
+| `obs["control"]`                    | `boolean`   | Boolean indicating whether this instance was used as a control.                                                                                                                                                                                                                                                   |
+| `layers["logFC"]`                   | `double`    | Log fold change of the differential expression test.                                                                                                                                                                                                                                                              |
+| `layers["AveExpr"]`                 | `double`    | (*Optional*) Average expression of the differential expression test.                                                                                                                                                                                                                                              |
+| `layers["t"]`                       | `double`    | (*Optional*) T-statistic of the differential expression test.                                                                                                                                                                                                                                                     |
+| `layers["P.Value"]`                 | `double`    | P-value of the differential expression test.                                                                                                                                                                                                                                                                      |
+| `layers["adj.P.Value"]`             | `double`    | Adjusted P-value of the differential expression test.                                                                                                                                                                                                                                                             |
+| `layers["B"]`                       | `double`    | (*Optional*) B-statistic of the differential expression test.                                                                                                                                                                                                                                                     |
+| `layers["is_de"]`                   | `boolean`   | Whether the gene is differentially expressed.                                                                                                                                                                                                                                                                     |
+| `layers["is_de_adj"]`               | `boolean`   | Whether the gene is differentially expressed after adjustment.                                                                                                                                                                                                                                                    |
+| `layers["sign_log10_pval"]`         | `double`    | Differential expression value (`-log10(p-value) * sign(LFC)`) for each gene. Here, LFC is the estimated log-fold change in expression between the treatment and control condition after shrinkage as calculated by Limma. Positive LFC means the gene goes up in the treatment condition relative to the control. |
+| `layers["clipped_sign_log10_pval"]` | `double`    | A clipped version of the sign_log10_pval layer. Values are clipped to be between -4 and 4 (i.e. `-log10(0.0001)` and `-log10(0.0001)`).                                                                                                                                                                           |
+| `uns["dataset_id"]`                 | `string`    | A unique identifier for the dataset. This is different from the `obs.dataset_id` field, which is the identifier for the dataset from which the cell data is derived.                                                                                                                                              |
+| `uns["dataset_name"]`               | `string`    | A human-readable name for the dataset.                                                                                                                                                                                                                                                                            |
+| `uns["dataset_url"]`                | `string`    | (*Optional*) Link to the original source of the dataset.                                                                                                                                                                                                                                                          |
+| `uns["dataset_reference"]`          | `string`    | (*Optional*) Bibtex reference of the paper in which the dataset was published.                                                                                                                                                                                                                                    |
+| `uns["dataset_summary"]`            | `string`    | Short description of the dataset.                                                                                                                                                                                                                                                                                 |
+| `uns["dataset_description"]`        | `string`    | Long description of the dataset.                                                                                                                                                                                                                                                                                  |
+| `uns["dataset_organism"]`           | `string`    | (*Optional*) The organism of the sample in the dataset.                                                                                                                                                                                                                                                           |
+| `uns["single_cell_obs"]`            | `dataframe` | A dataframe with the cell-level metadata.                                                                                                                                                                                                                                                                         |
 
 </div>
 
@@ -303,8 +343,8 @@ Format:
 
 <div class="small">
 
-    AnnData object
-     obs: 'id', 'cell_type', 'sm_name'
+    Tabular data
+     'id', 'cell_type', 'sm_name'
 
 </div>
 
@@ -312,11 +352,11 @@ Slot description:
 
 <div class="small">
 
-| Slot               | Type      | Description                    |
-|:-------------------|:----------|:-------------------------------|
-| `obs["id"]`        | `integer` | Index of the test observation. |
-| `obs["cell_type"]` | `string`  | Cell type name.                |
-| `obs["sm_name"]`   | `string`  | Small molecule name.           |
+| Column      | Type      | Description                    |
+|:------------|:----------|:-------------------------------|
+| `id`        | `integer` | Index of the test observation. |
+| `cell_type` | `string`  | Cell type name.                |
+| `sm_name`   | `string`  | Small molecule name.           |
 
 </div>
 
@@ -331,15 +371,17 @@ Arguments:
 
 <div class="small">
 
-| Name         | Type   | Description                                                                       |
-|:-------------|:-------|:----------------------------------------------------------------------------------|
-| `--de_train` | `file` | Differential expression results for training.                                     |
-| `--de_test`  | `file` | Differential expression results for testing.                                      |
-| `--id_map`   | `file` | File indicates the order of de_test, the cell types and the small molecule names. |
-| `--output`   | `file` | (*Output*) Differential Gene Expression prediction.                               |
+| Name              | Type     | Description                                                                         |
+|:------------------|:---------|:------------------------------------------------------------------------------------|
+| `--de_train_h5ad` | `file`   | (*Optional*) Differential expression results for training.                          |
+| `--de_test_h5ad`  | `file`   | Differential expression results for testing.                                        |
+| `--id_map`        | `file`   | File indicates the order of de_test, the cell types and the small molecule names.   |
+| `--layer`         | `string` | (*Optional*) Which layer to use for prediction. Default: `clipped_sign_log10_pval`. |
+| `--output`        | `file`   | (*Output*) Differential Gene Expression prediction.                                 |
 
 </div>
 
+
 ## Component type: Method
 
 Path:
@@ -351,11 +393,13 @@ Arguments:
 
 <div class="small">
 
-| Name         | Type   | Description                                                                       |
-|:-------------|:-------|:----------------------------------------------------------------------------------|
-| `--de_train` | `file` | Differential expression results for training.                                     |
-| `--id_map`   | `file` | File indicates the order of de_test, the cell types and the small molecule names. |
-| `--output`   | `file` | (*Output*) Differential Gene Expression prediction.                               |
+| Name              | Type     | Description                                                                                                         |
+|:------------------|:---------|:--------------------------------------------------------------------------------------------------------------------|
+| `--de_train_h5ad` | `file`   | (*Optional*) Differential expression results for training.                                                          |
+| `--id_map`        | `file`   | File indicates the order of de_test, the cell types and the small molecule names.                                   |
+| `--layer`         | `string` | (*Optional*) Which layer to use for prediction. Default: `clipped_sign_log10_pval`.                                 |
+| `--output`        | `file`   | (*Output*) Differential Gene Expression prediction.                                                                 |
+| `--output_model`  | `file`   | (*Optional, Output*) Optional model output. If no value is passed, the model will be removed at the end of the run. |
 
 </div>
 
@@ -370,11 +414,15 @@ Arguments:
 
 <div class="small">
 
-| Name           | Type   | Description                                       |
-|:---------------|:-------|:--------------------------------------------------|
-| `--de_test`    | `file` | Differential expression results for testing.      |
-| `--prediction` | `file` | Differential Gene Expression prediction.          |
-| `--output`     | `file` | (*Output*) File indicating the score of a metric. |
+| Name                 | Type     | Description                                                                                   |
+|:---------------------|:---------|:----------------------------------------------------------------------------------------------|
+| `--de_test_h5ad`     | `file`   | Differential expression results for testing.                                                  |
+| `--de_test_layer`    | `string` | (*Optional*) In which layer to find the DE data. Default: `clipped_sign_log10_pval`.          |
+| `--prediction`       | `file`   | Differential Gene Expression prediction.                                                      |
+| `--prediction_layer` | `string` | (*Optional*) In which layer to find the predicted DE data. Default: `prediction`.             |
+| `--output`           | `file`   | (*Output*) File indicating the score of a metric.                                             |
+| `--resolve_genes`    | `string` | (*Optional*) How to resolve difference in genes between the two datasets. Default: `de_test`. |
+| `--resolve_genes`    | `string` | (*Optional*) How to resolve difference in genes between the two datasets. Default: `de_test`. |
 
 </div>
 
@@ -382,15 +430,15 @@ Arguments:
 
 Differential Gene Expression prediction
 
-Example file: `resources/neurips-2023-data/output_rf.parquet`
+Example file: `resources/neurips-2023-data/prediction.h5ad`
 
 Format:
 
 <div class="small">
 
     AnnData object
-     obs: 'id'
-     layers: 'sign_log10_pval'
+     layers: 'prediction'
+     uns: 'dataset_id', 'method_id'
 
 </div>
 
@@ -398,54 +446,45 @@ Slot description:
 
 <div class="small">
 
-| Slot                        | Type      | Description                                                          |
-|:----------------------------|:----------|:---------------------------------------------------------------------|
-| `obs["id"]`                 | `integer` | Index of the test observation.                                       |
-| `layers["sign_log10_pval"]` | `double`  | Predicted sign of the logFC times the log10 of the adjusted p-value. |
+| Slot                   | Type     | Description                                                                                                                                                          |
+|:-----------------------|:---------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `layers["prediction"]` | `double` | Predicted differential gene expression.                                                                                                                              |
+| `uns["dataset_id"]`    | `string` | A unique identifier for the dataset. This is different from the `obs.dataset_id` field, which is the identifier for the dataset from which the cell data is derived. |
+| `uns["method_id"]`     | `string` | A unique identifier for the method used to generate the prediction.                                                                                                  |
 
 </div>
 
-## File format: Score
+## File format: Model
 
-File indicating the score of a metric.
+Optional model output. If no value is passed, the model will be removed
+at the end of the run.
 
-Example file: `resources/neurips-2023-data/score_rf.json`
+Example file: `resources/neurips-2023-data/model/`
 
 Format:
 
 <div class="small">
 
-    AnnData object
-     uns: 'dataset_id', 'method_id', 'metric_ids', 'metric_values'
-
 </div>
 
 Slot description:
 
 <div class="small">
 
-| Slot                   | Type     | Description                                                                                  |
-|:-----------------------|:---------|:---------------------------------------------------------------------------------------------|
-| `uns["dataset_id"]`    | `string` | A unique identifier for the dataset.                                                         |
-| `uns["method_id"]`     | `string` | A unique identifier for the method.                                                          |
-| `uns["metric_ids"]`    | `string` | One or more unique metric identifiers.                                                       |
-| `uns["metric_values"]` | `double` | The metric values obtained for the given prediction. Must be of same length as ‘metric_ids’. |
-
 </div>
 
-## File format: Mapping compound names to lincs ids and smiles
+## File format: Score
 
-Parquet file mapping compound names to lincs ids and smiles.
+File indicating the score of a metric.
 
-Example file:
-`resources/neurips-2023-raw/lincs_id_compound_mapping.parquet`
+Example file: `resources/neurips-2023-data/score.h5ad`
 
 Format:
 
 <div class="small">
 
     AnnData object
-     obs: 'compound_id', 'sm_lincs_id', 'sm_name', 'smiles'
+     uns: 'dataset_id', 'method_id', 'metric_ids', 'metric_values'
 
 </div>
 
@@ -453,12 +492,12 @@ Slot description:
 
 <div class="small">
 
-| Slot                 | Type     | Description                                           |
-|:---------------------|:---------|:------------------------------------------------------|
-| `obs["compound_id"]` | `string` | Unique identifier for the compound.                   |
-| `obs["sm_lincs_id"]` | `string` | LINCS identifier for the compound.                    |
-| `obs["sm_name"]`     | `string` | Name of the compound.                                 |
-| `obs["smiles"]`      | `string` | SMILES notation representing the molecular structure. |
+| Slot                   | Type     | Description                                                                                  |
+|:-----------------------|:---------|:---------------------------------------------------------------------------------------------|
+| `uns["dataset_id"]`    | `string` | A unique identifier for the dataset.                                                         |
+| `uns["method_id"]`     | `string` | A unique identifier for the method.                                                          |
+| `uns["metric_ids"]`    | `string` | One or more unique metric identifiers.                                                       |
+| `uns["metric_values"]` | `double` | The metric values obtained for the given prediction. Must be of same length as ‘metric_ids’. |
 
 </div>
 
diff --git a/_viash.yaml b/_viash.yaml
index 42118b5f..5106f43c 100644
--- a/_viash.yaml
+++ b/_viash.yaml
@@ -7,8 +7,8 @@ config_mods: |
   .functionality.version := 'dev'
   .functionality.arguments[.multiple == true].multiple_sep := ';'
   .platforms[.type == 'docker'].target_registry := 'ghcr.io'
-  .platforms[.type == 'docker'].target_organization := 'openproblems-bio/task-dge-perturbation-prediction'
-  .platforms[.type == 'docker'].target_image_source := 'https://github.com/openproblems-bio/task-dge-perturbation-prediction'
+  .platforms[.type == 'docker'].target_organization := 'openproblems-bio/task_perturbation_prediction'
+  .platforms[.type == 'docker'].target_image_source := 'https://github.com/openproblems-bio/task_perturbation_prediction'
   .platforms[.type == "nextflow"].directives.tag := "$id"
   .platforms[.type == "nextflow"].auto.simplifyOutput := false
   .platforms[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h" }
diff --git a/scripts/add_a_method.sh b/scripts/add_a_method.sh
index fe07dfdc..aada69a1 100755
--- a/scripts/add_a_method.sh
+++ b/scripts/add_a_method.sh
@@ -15,25 +15,25 @@ viash run src/common/create_component/config.vsh.yaml -- \
   --language "$method_lang" \
   --name "$method_id"
 
-# TODO: fill in required fields in src/task/methods/foo/config.vsh.yaml
-# TODO: edit src/task/methods/foo/script.py/R
+# TODO: fill in required fields in src/methods/foo/config.vsh.yaml
+# TODO: edit src/methods/foo/script.py/R
 
 # test the component
-viash test src/task/methods/$method_id/config.vsh.yaml
+viash test src/methods/$method_id/config.vsh.yaml
 
 # rebuild the container (only if you change something to the docker platform)
 # You can reduce the memory and cpu allotted to jobs in _viash.yaml by modifying .platforms[.type == "nextflow"].config.labels
-viash run src/task/methods/$method_id/config.vsh.yaml -- \
+viash run src/methods/$method_id/config.vsh.yaml -- \
   ---setup cachedbuild ---verbose
 
 # run the method (using h5ad as input)
-viash run src/task/methods/$method_id/config.vsh.yaml -- \
+viash run src/methods/$method_id/config.vsh.yaml -- \
   --de_train_h5ad "resources/neurips-2023-kaggle/2023-09-12_de_by_cell_type_train.h5ad" \
   --id_map "resources/neurips-2023-kaggle/id_map.csv" \
   --output "output/prediction.h5ad"
 
 # run evaluation metric
-viash run src/task/metrics/mean_rowwise_error/config.vsh.yaml -- \
+viash run src/metrics/mean_rowwise_error/config.vsh.yaml -- \
   --de_test_h5ad "resources/neurips-2023-kaggle/de_test.h5ad" \
   --prediction "output/prediction.h5ad" \
   --output "output/score.h5ad"
diff --git a/scripts/generate_kaggle_resources.sh b/scripts/generate_kaggle_resources.sh
index 54aa237f..13a9d005 100755
--- a/scripts/generate_kaggle_resources.sh
+++ b/scripts/generate_kaggle_resources.sh
@@ -18,7 +18,7 @@ if [[ ! -f "$OUT/2023-09-12_de_by_cell_type_test.h5ad" ]]; then
     "import anndata as ad; ad.read_h5ad('$OUT/2023-09-12_de_by_cell_type_train.h5ad').write_h5ad('$OUT/2023-09-12_de_by_cell_type_train.h5ad', compression='gzip')"
 fi
 
-viash run src/task/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml -- \
+viash run src/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml -- \
   --input_train "$OUT/2023-09-12_de_by_cell_type_train.h5ad" \
   --input_test "$OUT/2023-09-12_de_by_cell_type_test.h5ad" \
   --input_single_cell_h5ad "resources/neurips-2023-raw/sc_counts.h5ad" \
@@ -34,14 +34,14 @@ viash run src/task/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yam
   --dataset_organism homo_sapiens
 
 echo ">> Run method"
-viash run src/task/control_methods/mean_across_compounds/config.vsh.yaml -- \
+viash run src/control_methods/mean_across_compounds/config.vsh.yaml -- \
   --de_train_h5ad "$OUT/de_train.h5ad" \
   --de_test_h5ad "$OUT/de_test.h5ad" \
   --id_map "$OUT/id_map.csv" \
   --output "$OUT/prediction.h5ad"
 
 echo ">> Run metric"
-viash run src/task/metrics/mean_rowwise_error/config.vsh.yaml -- \
+viash run src/metrics/mean_rowwise_error/config.vsh.yaml -- \
   --prediction "$OUT/prediction.h5ad" \
   --de_test_h5ad "$OUT/de_test.h5ad" \
   --output "$OUT/score.h5ad"
diff --git a/scripts/generate_resources.sh b/scripts/generate_resources.sh
index 72137f85..13d4a10a 100755
--- a/scripts/generate_resources.sh
+++ b/scripts/generate_resources.sh
@@ -32,14 +32,14 @@ nextflow run \
   --publish_dir "$OUT"
 
 echo ">> Run method"
-viash run src/task/control_methods/mean_across_compounds/config.vsh.yaml -- \
+viash run src/control_methods/mean_across_compounds/config.vsh.yaml -- \
   --de_train_h5ad "$OUT/de_train.h5ad" \
   --de_test_h5ad "$OUT/de_test.h5ad" \
   --id_map "$OUT/id_map.csv" \
   --output "$OUT/prediction.h5ad"
 
 echo ">> Run metric"
-viash run src/task/metrics/mean_rowwise_error/config.vsh.yaml -- \
+viash run src/metrics/mean_rowwise_error/config.vsh.yaml -- \
   --prediction "$OUT/prediction.h5ad" \
   --de_test_h5ad "$OUT/de_test.h5ad" \
   --output "$OUT/score.h5ad"
diff --git a/scripts/render_readme.sh b/scripts/render_readme.sh
index b805cbea..21908bb5 100755
--- a/scripts/render_readme.sh
+++ b/scripts/render_readme.sh
@@ -5,7 +5,7 @@ set -e
 [[ ! -d ../openproblems-v2 ]] && echo "You need to clone the openproblems-v2 repository next to this repository" && exit 1
 
 ../openproblems-v2/target/docker/common/create_task_readme/create_task_readme \
-  --task "dge_perturbation_prediction" \
-  --task_dir "src/task" \
-  --github_url "https://github.com/openproblems-bio/task-dge-perturbation-prediction/tree/main/" \
+  --task "perturbation_prediction" \
+  --task_dir "src" \
+  --github_url "https://github.com/openproblems-bio/task_perturbation_prediction/tree/main/" \
   --output "README.md"
diff --git a/scripts/run_benchmark_tw.sh b/scripts/run_benchmark_tw.sh
index 76f46b6e..86472d44 100755
--- a/scripts/run_benchmark_tw.sh
+++ b/scripts/run_benchmark_tw.sh
@@ -2,7 +2,7 @@
 
 RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
 resources_dir="s3://openproblems-bio/public/neurips-2023-competition/workflow-resources"
-publish_dir="s3://openproblems-data/resources/dge_perturbation_prediction/results/${RUN_ID}"
+publish_dir="s3://openproblems-data/resources/perturbation_prediction/results/${RUN_ID}"
 
 cat > /tmp/params.yaml << HERE
 param_list:
@@ -20,7 +20,7 @@ output_state: "state.yaml"
 publish_dir: "$publish_dir"
 HERE
 
-tw launch https://github.com/openproblems-bio/task-dge-perturbation-prediction.git \
+tw launch https://github.com/openproblems-bio/task_perturbation_prediction.git \
   --revision main_build \
   --pull-latest \
   --main-script target/nextflow/workflows/run_benchmark/main.nf \
diff --git a/scripts/run_benchmark_tw_traens.sh b/scripts/run_benchmark_tw_traens.sh
index 908a150f..f5f10639 100755
--- a/scripts/run_benchmark_tw_traens.sh
+++ b/scripts/run_benchmark_tw_traens.sh
@@ -4,7 +4,7 @@
 
 RUN_ID="traens_$(date +%Y-%m-%d_%H-%M-%S)"
 resources_dir="s3://openproblems-bio/public/neurips-2023-competition/workflow-resources"
-publish_dir="s3://openproblems-data/resources/dge_perturbation_prediction/results/${RUN_ID}"
+publish_dir="s3://openproblems-data/resources/perturbation_prediction/results/${RUN_ID}"
 
 cat > /tmp/params.yaml << HERE
 param_list:
@@ -18,8 +18,8 @@ output_state: "state.yaml"
 publish_dir: "$publish_dir"
 HERE
 
-tw launch https://github.com/openproblems-bio/task-dge-perturbation-prediction.git \
-  --revision fix_trafo_ens_build \
+tw launch https://github.com/openproblems-bio/task_perturbation_prediction.git \
+  --revision suggestions_elior_build \
   --pull-latest \
   --main-script target/nextflow/workflows/run_benchmark/main.nf \
   --workspace 53907369739130 \
diff --git a/scripts/run_layert_tw.sh b/scripts/run_layert_tw.sh
index a05880fa..c56e49f5 100755
--- a/scripts/run_layert_tw.sh
+++ b/scripts/run_layert_tw.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 RUN_ID="layert_$(date +%Y-%m-%d_%H-%M-%S)"
-publish_dir="s3://openproblems-data/resources/dge_perturbation_prediction/results/${RUN_ID}"
+publish_dir="s3://openproblems-data/resources/perturbation_prediction/results/${RUN_ID}"
 
 cat > /tmp/params.yaml << HERE
 id: dge_perturbation_task
@@ -12,7 +12,7 @@ rename_keys: "de_train_h5ad:de_train_h5ad,de_test_h5ad:de_test_h5ad,id_map:id_ma
 settings: '{"layer": "t"}'
 HERE
 
-tw launch https://github.com/openproblems-bio/task-dge-perturbation-prediction.git \
+tw launch https://github.com/openproblems-bio/task_perturbation_prediction.git \
   --revision main_build \
   --pull-latest \
   --main-script target/nextflow/workflows/run_benchmark/main.nf \
diff --git a/scripts/run_stability_tw.sh b/scripts/run_stability_tw.sh
index 3890fdf9..c752af8a 100755
--- a/scripts/run_stability_tw.sh
+++ b/scripts/run_stability_tw.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 RUN_ID="stability_$(date +%Y-%m-%d_%H-%M-%S)"
-publish_dir="s3://openproblems-data/resources/dge_perturbation_prediction/results/${RUN_ID}"
+publish_dir="s3://openproblems-data/resources/perturbation_prediction/results/${RUN_ID}"
 
 cat > /tmp/params.yaml << HERE
 id: neurips-2023-data
@@ -11,7 +11,7 @@ output_state: "state.yaml"
 publish_dir: "$publish_dir"
 HERE
 
-tw launch https://github.com/openproblems-bio/task-dge-perturbation-prediction.git \
+tw launch https://github.com/openproblems-bio/task_perturbation_prediction.git \
   --revision main_build \
   --pull-latest \
   --main-script target/nextflow/workflows/run_stability_analysis/main.nf \
diff --git a/scripts/sync_results.sh b/scripts/sync_results.sh
index a46623cb..ed4acd26 100755
--- a/scripts/sync_results.sh
+++ b/scripts/sync_results.sh
@@ -1,18 +1,18 @@
 #!/bin/bash
 
 aws s3 sync \
-  s3://openproblems-data/resources/dge_perturbation_prediction/results/ \
+  s3://openproblems-data/resources/perturbation_prediction/results/ \
   output/benchmark_results/ \
   --delete --dryrun
 
 # sync back modified results
 aws s3 sync \
   output/benchmark_results/ \
-  s3://openproblems-data/resources/dge_perturbation_prediction/results/ \
+  s3://openproblems-data/resources/perturbation_prediction/results/ \
   --delete --dryrun
 
 # sync one run
 runid=run_2024-06-01_00-03-09; aws s3 sync \
   output/benchmark_results/${runid}/ \
-  s3://openproblems-data/resources/dge_perturbation_prediction/results/${runid}/ \
-  --delete --dryrun
\ No newline at end of file
+  s3://openproblems-data/resources/perturbation_prediction/results/${runid}/ \
+  --delete --dryrun
diff --git a/src/task/api/comp_control_method.yaml b/src/api/comp_control_method.yaml
similarity index 100%
rename from src/task/api/comp_control_method.yaml
rename to src/api/comp_control_method.yaml
diff --git a/src/task/api/comp_method.yaml b/src/api/comp_method.yaml
similarity index 100%
rename from src/task/api/comp_method.yaml
rename to src/api/comp_method.yaml
diff --git a/src/task/api/comp_method_notest.yaml b/src/api/comp_method_notest.yaml
similarity index 85%
rename from src/task/api/comp_method_notest.yaml
rename to src/api/comp_method_notest.yaml
index 5e2e0477..b1e68ca9 100644
--- a/src/task/api/comp_method_notest.yaml
+++ b/src/api/comp_method_notest.yaml
@@ -26,8 +26,7 @@ functionality:
       required: true
       direction: output
     - name: "--output_model"
-      type: "file"
-      description: "Optional model output. If no value is passed, the model will be removed at the end of the run."
+      __merge__: file_model.yaml
       direction: output
       required: false
       must_exist: false
diff --git a/src/task/api/comp_metric.yaml b/src/api/comp_metric.yaml
similarity index 100%
rename from src/task/api/comp_metric.yaml
rename to src/api/comp_metric.yaml
diff --git a/src/task/api/comp_process_dataset.yaml b/src/api/comp_process_dataset.yaml
similarity index 92%
rename from src/task/api/comp_process_dataset.yaml
rename to src/api/comp_process_dataset.yaml
index b1e4d6b5..95544a99 100644
--- a/src/task/api/comp_process_dataset.yaml
+++ b/src/api/comp_process_dataset.yaml
@@ -1,4 +1,5 @@
 functionality:
+  namespace: process_dataset
   info:
     type: process_dataset
     type_info:
@@ -25,5 +26,4 @@ functionality:
       __merge__: file_id_map.yaml
       required: true
       direction: output
-      default: id_map.csv
-  test_resources: []
\ No newline at end of file
+      default: id_map.csv
\ No newline at end of file
diff --git a/src/task/api/file_de_test_h5ad.yaml b/src/api/file_de_test_h5ad.yaml
similarity index 100%
rename from src/task/api/file_de_test_h5ad.yaml
rename to src/api/file_de_test_h5ad.yaml
diff --git a/src/task/api/file_de_train_h5ad.yaml b/src/api/file_de_train_h5ad.yaml
similarity index 100%
rename from src/task/api/file_de_train_h5ad.yaml
rename to src/api/file_de_train_h5ad.yaml
diff --git a/src/task/api/file_id_map.yaml b/src/api/file_id_map.yaml
similarity index 100%
rename from src/task/api/file_id_map.yaml
rename to src/api/file_id_map.yaml
diff --git a/src/api/file_model.yaml b/src/api/file_model.yaml
new file mode 100644
index 00000000..cb6cbc63
--- /dev/null
+++ b/src/api/file_model.yaml
@@ -0,0 +1,6 @@
+type: file
+example: resources/neurips-2023-data/model/
+info:
+  label: Model
+  summary: "Optional model output. If no value is passed, the model will be removed at the end of the run."
+  file_type: directory
\ No newline at end of file
diff --git a/src/task/api/file_prediction.yaml b/src/api/file_prediction.yaml
similarity index 100%
rename from src/task/api/file_prediction.yaml
rename to src/api/file_prediction.yaml
diff --git a/src/task/api/file_sc_counts.yaml b/src/api/file_sc_counts.yaml
similarity index 100%
rename from src/task/api/file_sc_counts.yaml
rename to src/api/file_sc_counts.yaml
diff --git a/src/task/api/file_score.yaml b/src/api/file_score.yaml
similarity index 100%
rename from src/task/api/file_score.yaml
rename to src/api/file_score.yaml
diff --git a/src/task/api/task_info.yaml b/src/api/task_info.yaml
similarity index 81%
rename from src/task/api/task_info.yaml
rename to src/api/task_info.yaml
index 40b3973b..c0dfb93f 100644
--- a/src/task/api/task_info.yaml
+++ b/src/api/task_info.yaml
@@ -1,5 +1,5 @@
-name: dge_perturbation_prediction
-label: DGE Perturbation Prediction
+name: perturbation_prediction
+label: Perturbation Prediction
 summary: Predicting how small molecules change gene expression in different cell types.
 readme: |
   ## Installation
@@ -17,9 +17,9 @@ readme: |
   To get started, you can run the following commands:
 
   ```bash
-  git clone git@github.com:openproblems-bio/task-dge-perturbation-prediction.git
+  git clone git@github.com:openproblems-bio/task_perturbation_prediction.git
 
-  cd task-dge-perturbation-prediction
+  cd task_perturbation_prediction
   
   # download resources
   scripts/download_resources.sh
@@ -99,3 +99,43 @@ authors:
     info:
       github: rcannood
       orcid: "0000-0003-3641-729X"
+  - name: Daniel Burkhardt
+    roles: [ author ]
+    info:
+      github: dburkhardt
+      orcid: 0000-0001-7744-1363
+  - name: Malte D. Luecken
+    roles: [ author ]
+    info:
+      github: LuckyMD
+      orcid: 0000-0001-7464-7921
+  - name: Tin M. Tunjic
+    roles: [ contributor ]
+    info:
+      github: ttunja
+      orcid: 0000-0001-8842-6548
+  - name: Mengbo Wang
+    roles: [ contributor ]
+    info:
+      github: wangmengbo
+      orcid: 0000-0002-0266-9993
+  - name: Andrew Benz
+    roles: [ author ]
+    info:
+      github: andrew-benz
+      orcid: 0009-0002-8118-1861
+  - name: Tianyu Liu
+    roles: [ contributor ]
+    info:
+      github: HelloWorldLTY
+      orcid: 0000-0002-9412-6573
+  - name: Jalil Nourisa
+    roles: [ contributor ]
+    info:
+      github: janursa
+      orcid: 0000-0002-7539-4396
+  - name: Rico Meinl
+    roles: [ contributor ]
+    info:
+      github: ricomnl
+      orcid: 0000-0003-4356-6058
diff --git a/src/common/create_component/config.vsh.yaml b/src/common/create_component/config.vsh.yaml
index 91bcc633..9a214832 100644
--- a/src/common/create_component/config.vsh.yaml
+++ b/src/common/create_component/config.vsh.yaml
@@ -24,7 +24,7 @@ functionality:
       direction: output
       # required: true
       description: Path to the component directory. Suggested location is `src/<TASK>/<TYPE>s/<NAME>`.
-      default: src/task/methods/${VIASH_PAR_NAME}
+      default: src/methods/${VIASH_PAR_NAME}
     - type: file
       name: --api_file
       description: |
@@ -33,7 +33,7 @@ functionality:
         to manually specify a different API file to inherit from.
       must_exist: false
       # required: true
-      default: src/task/api/comp_method.yaml
+      default: src/api/comp_method.yaml
     - type: file
       name: --viash_yaml
       description: |
diff --git a/src/common/create_component/script.py b/src/common/create_component/script.py
index b7fef5e8..65aaad9a 100644
--- a/src/common/create_component/script.py
+++ b/src/common/create_component/script.py
@@ -6,12 +6,12 @@
 
 ## VIASH START
 par = {
-  "task": "DGE Perturbation Prediction",
+  "task": "Perturbation Prediction",
   "type": "method",
   "language": "python",
   "name": "new_comp",
-  "output": "src/task/method/new_comp",
-  "api_file": "src/task/api/comp_method.yaml",
+  "output": "src/method/new_comp",
+  "api_file": "src/api/comp_method.yaml",
   "viash_yaml": "_viash.yaml"
 }
 ## VIASH END
diff --git a/src/task/control_methods/ground_truth/config.vsh.yaml b/src/control_methods/ground_truth/config.vsh.yaml
similarity index 100%
rename from src/task/control_methods/ground_truth/config.vsh.yaml
rename to src/control_methods/ground_truth/config.vsh.yaml
diff --git a/src/task/control_methods/ground_truth/script.R b/src/control_methods/ground_truth/script.R
similarity index 100%
rename from src/task/control_methods/ground_truth/script.R
rename to src/control_methods/ground_truth/script.R
diff --git a/src/task/control_methods/mean_across_celltypes/config.vsh.yaml b/src/control_methods/mean_across_celltypes/config.vsh.yaml
similarity index 100%
rename from src/task/control_methods/mean_across_celltypes/config.vsh.yaml
rename to src/control_methods/mean_across_celltypes/config.vsh.yaml
diff --git a/src/task/control_methods/mean_across_celltypes/script.py b/src/control_methods/mean_across_celltypes/script.py
similarity index 100%
rename from src/task/control_methods/mean_across_celltypes/script.py
rename to src/control_methods/mean_across_celltypes/script.py
diff --git a/src/task/control_methods/mean_across_compounds/config.vsh.yaml b/src/control_methods/mean_across_compounds/config.vsh.yaml
similarity index 100%
rename from src/task/control_methods/mean_across_compounds/config.vsh.yaml
rename to src/control_methods/mean_across_compounds/config.vsh.yaml
diff --git a/src/task/control_methods/mean_across_compounds/script.py b/src/control_methods/mean_across_compounds/script.py
similarity index 100%
rename from src/task/control_methods/mean_across_compounds/script.py
rename to src/control_methods/mean_across_compounds/script.py
diff --git a/src/task/control_methods/mean_outcome/config.vsh.yaml b/src/control_methods/mean_outcome/config.vsh.yaml
similarity index 100%
rename from src/task/control_methods/mean_outcome/config.vsh.yaml
rename to src/control_methods/mean_outcome/config.vsh.yaml
diff --git a/src/task/control_methods/mean_outcome/script.py b/src/control_methods/mean_outcome/script.py
similarity index 100%
rename from src/task/control_methods/mean_outcome/script.py
rename to src/control_methods/mean_outcome/script.py
diff --git a/src/task/control_methods/sample/config.vsh.yaml b/src/control_methods/sample/config.vsh.yaml
similarity index 100%
rename from src/task/control_methods/sample/config.vsh.yaml
rename to src/control_methods/sample/config.vsh.yaml
diff --git a/src/task/control_methods/sample/script.R b/src/control_methods/sample/script.R
similarity index 100%
rename from src/task/control_methods/sample/script.R
rename to src/control_methods/sample/script.R
diff --git a/src/task/control_methods/zeros/config.vsh.yaml b/src/control_methods/zeros/config.vsh.yaml
similarity index 100%
rename from src/task/control_methods/zeros/config.vsh.yaml
rename to src/control_methods/zeros/config.vsh.yaml
diff --git a/src/task/control_methods/zeros/script.py b/src/control_methods/zeros/script.py
similarity index 100%
rename from src/task/control_methods/zeros/script.py
rename to src/control_methods/zeros/script.py
diff --git a/src/task/methods/jn_ap_op2/config.vsh.yaml b/src/methods/jn_ap_op2/config.vsh.yaml
similarity index 97%
rename from src/task/methods/jn_ap_op2/config.vsh.yaml
rename to src/methods/jn_ap_op2/config.vsh.yaml
index edbfa55f..1da6eb5b 100644
--- a/src/task/methods/jn_ap_op2/config.vsh.yaml
+++ b/src/methods/jn_ap_op2/config.vsh.yaml
@@ -30,7 +30,6 @@ functionality:
     - type: python_script
       path: script.py
     - path: helper.py
-    - path: ../../utils/anndata_to_dataframe.py
 platforms:
   - type: docker
     image: ghcr.io/openproblems-bio/base_pytorch_nvidia:1.0.4
diff --git a/src/task/methods/jn_ap_op2/helper.py b/src/methods/jn_ap_op2/helper.py
similarity index 100%
rename from src/task/methods/jn_ap_op2/helper.py
rename to src/methods/jn_ap_op2/helper.py
diff --git a/src/task/methods/jn_ap_op2/script.py b/src/methods/jn_ap_op2/script.py
similarity index 90%
rename from src/task/methods/jn_ap_op2/script.py
rename to src/methods/jn_ap_op2/script.py
index 753da0ee..1529cbef 100644
--- a/src/task/methods/jn_ap_op2/script.py
+++ b/src/methods/jn_ap_op2/script.py
@@ -20,18 +20,16 @@
     "submission_names": ["dl40"]
 }
 meta = {
-    "resources_dir": "src/task/methods/jn_ap_op2",
+    "resources_dir": "src/methods/jn_ap_op2",
 }
 ## VIASH END
 
 sys.path.append(meta["resources_dir"])
 
-from anndata_to_dataframe import anndata_to_dataframe
 from helper import plant_seed, MultiOutputTargetEncoder, train
 
 print('Reading input files', flush=True)
 de_train_h5ad = ad.read_h5ad(par["de_train_h5ad"])
-de_train = anndata_to_dataframe(de_train_h5ad, par["layer"])
 id_map = pd.read_csv(par["id_map"])
 
 gene_names = list(de_train_h5ad.var_names)
@@ -58,10 +56,10 @@
 
 print('Data location', flush=True)
 # Data location
-cell_types = de_train['cell_type']
-sm_names = de_train['sm_name']
+cell_types = de_train_h5ad.obs['cell_type'].astype(str)
+sm_names = de_train_h5ad.obs['sm_name'].astype(str)
 
-data = de_train.drop(columns=["cell_type", "sm_name", "sm_lincs_id", "SMILES", "split", "control"]).to_numpy(dtype=float)
+data = de_train_h5ad.layers[par["layer"]]
 
 print('Train model', flush=True)
 # ... train model ...
diff --git a/src/task/methods/lgc_ensemble/config.vsh.yaml b/src/methods/lgc_ensemble/config.vsh.yaml
similarity index 100%
rename from src/task/methods/lgc_ensemble/config.vsh.yaml
rename to src/methods/lgc_ensemble/config.vsh.yaml
diff --git a/src/task/methods/lgc_ensemble/main.nf b/src/methods/lgc_ensemble/main.nf
similarity index 100%
rename from src/task/methods/lgc_ensemble/main.nf
rename to src/methods/lgc_ensemble/main.nf
diff --git a/src/task/methods/lgc_ensemble/test.sh b/src/methods/lgc_ensemble/test.sh
similarity index 100%
rename from src/task/methods/lgc_ensemble/test.sh
rename to src/methods/lgc_ensemble/test.sh
diff --git a/src/task/methods/lgc_ensemble_direct/config.vsh.yaml b/src/methods/lgc_ensemble_direct/config.vsh.yaml
similarity index 100%
rename from src/task/methods/lgc_ensemble_direct/config.vsh.yaml
rename to src/methods/lgc_ensemble_direct/config.vsh.yaml
diff --git a/src/task/methods/lgc_ensemble_direct/script.py b/src/methods/lgc_ensemble_direct/script.py
similarity index 96%
rename from src/task/methods/lgc_ensemble_direct/script.py
rename to src/methods/lgc_ensemble_direct/script.py
index fdf90516..6aeba5c6 100644
--- a/src/task/methods/lgc_ensemble_direct/script.py
+++ b/src/methods/lgc_ensemble_direct/script.py
@@ -19,7 +19,7 @@
     "output_model": None
 }
 meta = {
-    "resources_dir": "src/task/methods/lgc_ensemble",
+    "resources_dir": "src/methods/lgc_ensemble",
     "temp_dir": "/tmp"
 }
 ## VIASH END
diff --git a/src/task/methods/lgc_ensemble_helpers/divisor_finder.py b/src/methods/lgc_ensemble_helpers/divisor_finder.py
similarity index 100%
rename from src/task/methods/lgc_ensemble_helpers/divisor_finder.py
rename to src/methods/lgc_ensemble_helpers/divisor_finder.py
diff --git a/src/task/methods/lgc_ensemble_helpers/helper_classes.py b/src/methods/lgc_ensemble_helpers/helper_classes.py
similarity index 100%
rename from src/task/methods/lgc_ensemble_helpers/helper_classes.py
rename to src/methods/lgc_ensemble_helpers/helper_classes.py
diff --git a/src/task/methods/lgc_ensemble_helpers/helper_functions.py b/src/methods/lgc_ensemble_helpers/helper_functions.py
similarity index 100%
rename from src/task/methods/lgc_ensemble_helpers/helper_functions.py
rename to src/methods/lgc_ensemble_helpers/helper_functions.py
diff --git a/src/task/methods/lgc_ensemble_helpers/models.py b/src/methods/lgc_ensemble_helpers/models.py
similarity index 100%
rename from src/task/methods/lgc_ensemble_helpers/models.py
rename to src/methods/lgc_ensemble_helpers/models.py
diff --git a/src/task/methods/lgc_ensemble_helpers/predict.py b/src/methods/lgc_ensemble_helpers/predict.py
similarity index 100%
rename from src/task/methods/lgc_ensemble_helpers/predict.py
rename to src/methods/lgc_ensemble_helpers/predict.py
diff --git a/src/task/methods/lgc_ensemble_helpers/prepare_data.py b/src/methods/lgc_ensemble_helpers/prepare_data.py
similarity index 100%
rename from src/task/methods/lgc_ensemble_helpers/prepare_data.py
rename to src/methods/lgc_ensemble_helpers/prepare_data.py
diff --git a/src/task/methods/lgc_ensemble_helpers/train.py b/src/methods/lgc_ensemble_helpers/train.py
similarity index 100%
rename from src/task/methods/lgc_ensemble_helpers/train.py
rename to src/methods/lgc_ensemble_helpers/train.py
diff --git a/src/task/methods/lgc_ensemble_predict/config.vsh.yaml b/src/methods/lgc_ensemble_predict/config.vsh.yaml
similarity index 100%
rename from src/task/methods/lgc_ensemble_predict/config.vsh.yaml
rename to src/methods/lgc_ensemble_predict/config.vsh.yaml
diff --git a/src/task/methods/lgc_ensemble_predict/script.py b/src/methods/lgc_ensemble_predict/script.py
similarity index 100%
rename from src/task/methods/lgc_ensemble_predict/script.py
rename to src/methods/lgc_ensemble_predict/script.py
diff --git a/src/task/methods/lgc_ensemble_prepare/config.vsh.yaml b/src/methods/lgc_ensemble_prepare/config.vsh.yaml
similarity index 100%
rename from src/task/methods/lgc_ensemble_prepare/config.vsh.yaml
rename to src/methods/lgc_ensemble_prepare/config.vsh.yaml
diff --git a/src/task/methods/lgc_ensemble_prepare/script.py b/src/methods/lgc_ensemble_prepare/script.py
similarity index 97%
rename from src/task/methods/lgc_ensemble_prepare/script.py
rename to src/methods/lgc_ensemble_prepare/script.py
index aaa91682..562291ac 100644
--- a/src/task/methods/lgc_ensemble_prepare/script.py
+++ b/src/methods/lgc_ensemble_prepare/script.py
@@ -22,7 +22,7 @@
     "train_data_aug_dir": "output/train_data_aug_dir",
 }
 meta = {
-    "resources_dir": "src/task/methods/lgc_ensemble",
+    "resources_dir": "src/methods/lgc_ensemble",
     "temp_dir": "/tmp"
 }
 ## VIASH END
@@ -37,7 +37,7 @@
 
 
 ###################################################################
-# interpreted from src/task/methods/lgc_ensemble/prepare_data.py
+# interpreted from src/methods/lgc_ensemble/prepare_data.py
 # prepare data
 seed_everything()
 
@@ -91,7 +91,7 @@
 _, _ = save_ChemBERTa_features(test_smiles, out_dir=par["train_data_aug_dir"], on_train_data=False)
 
 ###################################################################
-# interpreted from src/task/methods/lgc_ensemble/train.py
+# interpreted from src/methods/lgc_ensemble/train.py
 
 ## Prepare cross-validation
 cell_types_sm_names = de_train[['cell_type', 'sm_name']]
diff --git a/src/task/methods/lgc_ensemble_train/config.vsh.yaml b/src/methods/lgc_ensemble_train/config.vsh.yaml
similarity index 100%
rename from src/task/methods/lgc_ensemble_train/config.vsh.yaml
rename to src/methods/lgc_ensemble_train/config.vsh.yaml
diff --git a/src/task/methods/lgc_ensemble_train/script.py b/src/methods/lgc_ensemble_train/script.py
similarity index 94%
rename from src/task/methods/lgc_ensemble_train/script.py
rename to src/methods/lgc_ensemble_train/script.py
index fa557221..a50e0083 100644
--- a/src/task/methods/lgc_ensemble_train/script.py
+++ b/src/methods/lgc_ensemble_train/script.py
@@ -18,7 +18,7 @@
     "log_file": "output/log.json",
 }
 meta = {
-    "resources_dir": "src/task/methods/lgc_ensemble",
+    "resources_dir": "src/methods/lgc_ensemble",
     "temp_dir": "/tmp"
 }
 ## VIASH END
@@ -32,7 +32,7 @@
 from helper_functions import train_function
 
 ###################################################################
-# Interpretation from src/task/methods/lgc_ensemble/helper_functions.py
+# Interpretation from src/methods/lgc_ensemble/helper_functions.py
 
 print("Load data...", flush=True)
 # read kf_cv_initial from json
diff --git a/src/task/methods/nn_retraining_with_pseudolabels/config.vsh.yaml b/src/methods/nn_retraining_with_pseudolabels/config.vsh.yaml
similarity index 100%
rename from src/task/methods/nn_retraining_with_pseudolabels/config.vsh.yaml
rename to src/methods/nn_retraining_with_pseudolabels/config.vsh.yaml
diff --git a/src/task/methods/nn_retraining_with_pseudolabels/notebook_264.py b/src/methods/nn_retraining_with_pseudolabels/notebook_264.py
similarity index 100%
rename from src/task/methods/nn_retraining_with_pseudolabels/notebook_264.py
rename to src/methods/nn_retraining_with_pseudolabels/notebook_264.py
diff --git a/src/task/methods/nn_retraining_with_pseudolabels/notebook_266.py b/src/methods/nn_retraining_with_pseudolabels/notebook_266.py
similarity index 100%
rename from src/task/methods/nn_retraining_with_pseudolabels/notebook_266.py
rename to src/methods/nn_retraining_with_pseudolabels/notebook_266.py
diff --git a/src/task/methods/nn_retraining_with_pseudolabels/script.py b/src/methods/nn_retraining_with_pseudolabels/script.py
similarity index 96%
rename from src/task/methods/nn_retraining_with_pseudolabels/script.py
rename to src/methods/nn_retraining_with_pseudolabels/script.py
index d9b83461..13fcb880 100644
--- a/src/task/methods/nn_retraining_with_pseudolabels/script.py
+++ b/src/methods/nn_retraining_with_pseudolabels/script.py
@@ -26,7 +26,7 @@
     "output": "output.h5ad",
     "reps": 2,
 }
-meta = {"resources_dir": "src/task/methods/nn_retraining_with_pseudolabels"}
+meta = {"resources_dir": "src/methods/nn_retraining_with_pseudolabels"}
 ## VIASH END
 
 # load helper functions in notebooks
diff --git a/src/task/methods/pyboost/config.vsh.yaml b/src/methods/pyboost/config.vsh.yaml
similarity index 100%
rename from src/task/methods/pyboost/config.vsh.yaml
rename to src/methods/pyboost/config.vsh.yaml
diff --git a/src/task/methods/pyboost/helper.py b/src/methods/pyboost/helper.py
similarity index 100%
rename from src/task/methods/pyboost/helper.py
rename to src/methods/pyboost/helper.py
diff --git a/src/task/methods/pyboost/script.py b/src/methods/pyboost/script.py
similarity index 98%
rename from src/task/methods/pyboost/script.py
rename to src/methods/pyboost/script.py
index 95e190b7..ef6f878c 100644
--- a/src/task/methods/pyboost/script.py
+++ b/src/methods/pyboost/script.py
@@ -20,7 +20,7 @@
     output = "output.h5ad",
 )
 meta = dict(
-    resources_dir = "src/task/methods/pyboost"
+    resources_dir = "src/methods/pyboost"
 )
 ## VIASH END
 
diff --git a/src/task/methods/scape/config.vsh.yaml b/src/methods/scape/config.vsh.yaml
similarity index 100%
rename from src/task/methods/scape/config.vsh.yaml
rename to src/methods/scape/config.vsh.yaml
diff --git a/src/task/methods/scape/script.py b/src/methods/scape/script.py
similarity index 100%
rename from src/task/methods/scape/script.py
rename to src/methods/scape/script.py
diff --git a/src/task/methods/transformer_ensemble/config.vsh.yaml b/src/methods/transformer_ensemble/config.vsh.yaml
similarity index 100%
rename from src/task/methods/transformer_ensemble/config.vsh.yaml
rename to src/methods/transformer_ensemble/config.vsh.yaml
diff --git a/src/task/methods/transformer_ensemble/models.py b/src/methods/transformer_ensemble/models.py
similarity index 100%
rename from src/task/methods/transformer_ensemble/models.py
rename to src/methods/transformer_ensemble/models.py
diff --git a/src/task/methods/transformer_ensemble/script.py b/src/methods/transformer_ensemble/script.py
similarity index 98%
rename from src/task/methods/transformer_ensemble/script.py
rename to src/methods/transformer_ensemble/script.py
index 79764953..f6368f10 100644
--- a/src/task/methods/transformer_ensemble/script.py
+++ b/src/methods/transformer_ensemble/script.py
@@ -19,7 +19,7 @@
     "layer": "sign_log10_pval"
 }
 meta = {
-    "resources_dir": "src/task/methods/transformer_ensemble",
+    "resources_dir": "src/methods/transformer_ensemble",
 }
 ## VIASH END
 
diff --git a/src/task/methods/transformer_ensemble/train.py b/src/methods/transformer_ensemble/train.py
similarity index 100%
rename from src/task/methods/transformer_ensemble/train.py
rename to src/methods/transformer_ensemble/train.py
diff --git a/src/task/methods/transformer_ensemble/utils.py b/src/methods/transformer_ensemble/utils.py
similarity index 100%
rename from src/task/methods/transformer_ensemble/utils.py
rename to src/methods/transformer_ensemble/utils.py
diff --git a/src/task/metrics/mean_rowwise_correlation/config.vsh.yaml b/src/metrics/mean_rowwise_correlation/config.vsh.yaml
similarity index 100%
rename from src/task/metrics/mean_rowwise_correlation/config.vsh.yaml
rename to src/metrics/mean_rowwise_correlation/config.vsh.yaml
diff --git a/src/task/metrics/mean_rowwise_correlation/script.R b/src/metrics/mean_rowwise_correlation/script.R
similarity index 100%
rename from src/task/metrics/mean_rowwise_correlation/script.R
rename to src/metrics/mean_rowwise_correlation/script.R
diff --git a/src/task/metrics/mean_rowwise_error/config.vsh.yaml b/src/metrics/mean_rowwise_error/config.vsh.yaml
similarity index 100%
rename from src/task/metrics/mean_rowwise_error/config.vsh.yaml
rename to src/metrics/mean_rowwise_error/config.vsh.yaml
diff --git a/src/task/metrics/mean_rowwise_error/script.R b/src/metrics/mean_rowwise_error/script.R
similarity index 100%
rename from src/task/metrics/mean_rowwise_error/script.R
rename to src/metrics/mean_rowwise_error/script.R
diff --git a/src/task/process_dataset/add_uns_metadata/config.vsh.yaml b/src/process_dataset/add_uns_metadata/config.vsh.yaml
similarity index 96%
rename from src/task/process_dataset/add_uns_metadata/config.vsh.yaml
rename to src/process_dataset/add_uns_metadata/config.vsh.yaml
index febcc84a..fbb2047e 100644
--- a/src/task/process_dataset/add_uns_metadata/config.vsh.yaml
+++ b/src/process_dataset/add_uns_metadata/config.vsh.yaml
@@ -1,13 +1,12 @@
 functionality:
   name: add_uns_metadata
-  namespace: process_dataset
   info:
     type: process_dataset
     type_info:
       label: Add metadata
       summary: Add metadata to the pseudobulked data
       description: |
-        Add metadata to the pseudobulked single-cell dataset for the DGE regression task.
+        Add metadata to the pseudobulked single-cell dataset for the perturbation regression task.
   arguments:
     - name: --input
       type: file
diff --git a/src/task/process_dataset/add_uns_metadata/script.py b/src/process_dataset/add_uns_metadata/script.py
similarity index 100%
rename from src/task/process_dataset/add_uns_metadata/script.py
rename to src/process_dataset/add_uns_metadata/script.py
diff --git a/src/task/process_dataset/bootstrap/config.vsh.yaml b/src/process_dataset/bootstrap/config.vsh.yaml
similarity index 98%
rename from src/task/process_dataset/bootstrap/config.vsh.yaml
rename to src/process_dataset/bootstrap/config.vsh.yaml
index a8fb69ed..c23b3583 100644
--- a/src/task/process_dataset/bootstrap/config.vsh.yaml
+++ b/src/process_dataset/bootstrap/config.vsh.yaml
@@ -1,6 +1,5 @@
 functionality:
   name: bootstrap
-  namespace: process_dataset
   info:
     type: process_dataset
     type_info:
diff --git a/src/task/process_dataset/bootstrap/script.py b/src/process_dataset/bootstrap/script.py
similarity index 100%
rename from src/task/process_dataset/bootstrap/script.py
rename to src/process_dataset/bootstrap/script.py
diff --git a/src/task/process_dataset/clean_pseudobulk/config.vsh.yaml b/src/process_dataset/clean_pseudobulk/config.vsh.yaml
similarity index 96%
rename from src/task/process_dataset/clean_pseudobulk/config.vsh.yaml
rename to src/process_dataset/clean_pseudobulk/config.vsh.yaml
index a7a63730..d983a446 100644
--- a/src/task/process_dataset/clean_pseudobulk/config.vsh.yaml
+++ b/src/process_dataset/clean_pseudobulk/config.vsh.yaml
@@ -1,6 +1,5 @@
 functionality:
   name: filter_vars
-  namespace: process_dataset
   info:
     type: process_dataset
     type_info:
diff --git a/src/task/process_dataset/clean_pseudobulk/script.R b/src/process_dataset/clean_pseudobulk/script.R
similarity index 100%
rename from src/task/process_dataset/clean_pseudobulk/script.R
rename to src/process_dataset/clean_pseudobulk/script.R
diff --git a/src/task/process_dataset/compute_pseudobulk/config.vsh.yaml b/src/process_dataset/compute_pseudobulk/config.vsh.yaml
similarity index 89%
rename from src/task/process_dataset/compute_pseudobulk/config.vsh.yaml
rename to src/process_dataset/compute_pseudobulk/config.vsh.yaml
index 30bbd5c7..c63af091 100644
--- a/src/task/process_dataset/compute_pseudobulk/config.vsh.yaml
+++ b/src/process_dataset/compute_pseudobulk/config.vsh.yaml
@@ -1,13 +1,12 @@
 functionality:
   name: compute_pseudobulk
-  namespace: process_dataset
   info:
     type: process_dataset
     type_info:
       label: Pseudobulk
       summary: Compute pseudobulk data
       description: |
-        Compute pseudobulk data for the DGE regression task.
+        Compute pseudobulk data for the perturbation regression task.
   arguments:
     - name: --input
       type: file
diff --git a/src/task/process_dataset/compute_pseudobulk/script.py b/src/process_dataset/compute_pseudobulk/script.py
similarity index 100%
rename from src/task/process_dataset/compute_pseudobulk/script.py
rename to src/process_dataset/compute_pseudobulk/script.py
diff --git a/src/task/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml b/src/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml
similarity index 97%
rename from src/task/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml
rename to src/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml
index e6d6c209..77670fa6 100644
--- a/src/task/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml
+++ b/src/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml
@@ -1,6 +1,5 @@
 functionality:
   name: convert_h5ad_to_parquet
-  namespace: process_dataset
   info:
     type: process_dataset
     type_info:
diff --git a/src/task/process_dataset/convert_h5ad_to_parquet/script.py b/src/process_dataset/convert_h5ad_to_parquet/script.py
similarity index 100%
rename from src/task/process_dataset/convert_h5ad_to_parquet/script.py
rename to src/process_dataset/convert_h5ad_to_parquet/script.py
diff --git a/src/task/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml b/src/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml
similarity index 98%
rename from src/task/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml
rename to src/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml
index 95c4e83c..4b8cda82 100644
--- a/src/task/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml
+++ b/src/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml
@@ -1,6 +1,5 @@
 functionality:
   name: convert_kaggle_h5ad_to_parquet
-  namespace: task/process_dataset
   info:
     type: process_dataset
     type_info:
diff --git a/src/task/process_dataset/convert_kaggle_h5ad_to_parquet/script.py b/src/process_dataset/convert_kaggle_h5ad_to_parquet/script.py
similarity index 100%
rename from src/task/process_dataset/convert_kaggle_h5ad_to_parquet/script.py
rename to src/process_dataset/convert_kaggle_h5ad_to_parquet/script.py
diff --git a/src/task/process_dataset/filter_obs/config.vsh.yaml b/src/process_dataset/filter_obs/config.vsh.yaml
similarity index 96%
rename from src/task/process_dataset/filter_obs/config.vsh.yaml
rename to src/process_dataset/filter_obs/config.vsh.yaml
index 24f19e76..bc950f9d 100644
--- a/src/task/process_dataset/filter_obs/config.vsh.yaml
+++ b/src/process_dataset/filter_obs/config.vsh.yaml
@@ -1,6 +1,5 @@
 functionality:
   name: filter_obs
-  namespace: process_dataset
   info:
     type: process_dataset
     type_info:
diff --git a/src/task/process_dataset/filter_obs/script.R b/src/process_dataset/filter_obs/script.R
similarity index 100%
rename from src/task/process_dataset/filter_obs/script.R
rename to src/process_dataset/filter_obs/script.R
diff --git a/src/task/process_dataset/generate_id_map/config.vsh.yaml b/src/process_dataset/generate_id_map/config.vsh.yaml
similarity index 96%
rename from src/task/process_dataset/generate_id_map/config.vsh.yaml
rename to src/process_dataset/generate_id_map/config.vsh.yaml
index 648634c8..97696a7e 100644
--- a/src/task/process_dataset/generate_id_map/config.vsh.yaml
+++ b/src/process_dataset/generate_id_map/config.vsh.yaml
@@ -1,6 +1,5 @@
 functionality:
   name: generate_id_map
-  namespace: process_dataset
   info:
     type: process_dataset
     type_info:
diff --git a/src/task/process_dataset/generate_id_map/script.py b/src/process_dataset/generate_id_map/script.py
similarity index 100%
rename from src/task/process_dataset/generate_id_map/script.py
rename to src/process_dataset/generate_id_map/script.py
diff --git a/src/task/process_dataset/run_limma/config.vsh.yaml b/src/process_dataset/run_limma/config.vsh.yaml
similarity index 94%
rename from src/task/process_dataset/run_limma/config.vsh.yaml
rename to src/process_dataset/run_limma/config.vsh.yaml
index e513d5ea..b50845d1 100644
--- a/src/task/process_dataset/run_limma/config.vsh.yaml
+++ b/src/process_dataset/run_limma/config.vsh.yaml
@@ -1,13 +1,12 @@
 functionality:
   name: run_limma
-  namespace: process_dataset
   info:
     type: process_dataset
     type_info:
       label: Limma
       summary: Run limma
       description: |
-        Run limma for the DGE regression task.
+        Run limma for the parturbation regression task.
   arguments:
     - name: --input
       type: file
diff --git a/src/task/process_dataset/run_limma/script.R b/src/process_dataset/run_limma/script.R
similarity index 100%
rename from src/task/process_dataset/run_limma/script.R
rename to src/process_dataset/run_limma/script.R
diff --git a/src/task/utils/anndata_to_dataframe.py b/src/utils/anndata_to_dataframe.py
similarity index 100%
rename from src/task/utils/anndata_to_dataframe.py
rename to src/utils/anndata_to_dataframe.py
diff --git a/src/task/workflows/process_dataset/config.vsh.yaml b/src/workflows/process_dataset/config.vsh.yaml
similarity index 100%
rename from src/task/workflows/process_dataset/config.vsh.yaml
rename to src/workflows/process_dataset/config.vsh.yaml
diff --git a/src/task/workflows/process_dataset/main.nf b/src/workflows/process_dataset/main.nf
similarity index 100%
rename from src/task/workflows/process_dataset/main.nf
rename to src/workflows/process_dataset/main.nf
diff --git a/src/task/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml
similarity index 100%
rename from src/task/workflows/run_benchmark/config.vsh.yaml
rename to src/workflows/run_benchmark/config.vsh.yaml
diff --git a/src/task/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf
similarity index 100%
rename from src/task/workflows/run_benchmark/main.nf
rename to src/workflows/run_benchmark/main.nf
diff --git a/src/task/workflows/run_stability_analysis/config.vsh.yaml b/src/workflows/run_stability_analysis/config.vsh.yaml
similarity index 100%
rename from src/task/workflows/run_stability_analysis/config.vsh.yaml
rename to src/workflows/run_stability_analysis/config.vsh.yaml
diff --git a/src/task/workflows/run_stability_analysis/main.nf b/src/workflows/run_stability_analysis/main.nf
similarity index 100%
rename from src/task/workflows/run_stability_analysis/main.nf
rename to src/workflows/run_stability_analysis/main.nf