update TMLECli

TARGENE · Aug 23, 2024 · abc5b59 · abc5b59
1 parent d6f09de
commit abc5b59
Show file tree

Hide file tree

Showing 23 changed files with 309 additions and 199 deletions.
diff --git a/docs/src/associated_softwares.md b/docs/src/associated_softwares.md
@@ -3,4 +3,4 @@
 You may either not be interested in population genetics at all, or not willing to run the full pipeline on your project. If you are still eager to leverage the Targeted Learning framework, you can either rely on:
 
 - [TMLE.jl](https://targene.github.io/TMLE.jl/stable/): A Julia package for Targeted Maximum Likelihood Estimation (TMLE).
-- [TargetedEstimation.jl](https://github.com/TARGENE/TargetedEstimation.jl): A command line interface to run TMLE on your data.
+- [TmleCLI.jl](https://github.com/TARGENE/TmleCLI.jl): A command line interface to run TMLE on your data.
diff --git a/docs/src/developer_guide/contribution_guide.md b/docs/src/developer_guide/contribution_guide.md
@@ -35,7 +35,7 @@ Currently, all TarGene building blocks (executables) are provided as docker imag
 | --- | --- |
 | [TargeneCore.jl](https://github.com/TARGENE/TargeneCore.jl) | [tl-core](https://hub.docker.com/r/olivierlabayle/tl-core/tags) |
 | [UKBMain.jl](https://github.com/TARGENE/UKBMain.jl) | [ukbmain](https://hub.docker.com/r/olivierlabayle/ukbmain/tags) |
-| [TargetedEstimation.jl](https://github.com/TARGENE/TargetedEstimation.jl) | [targeted-estimation](https://hub.docker.com/r/olivierlabayle/targeted-estimation/tags) |
+| [TmleCLI.jl](https://github.com/TARGENE/TmleCLI.jl) | [targeted-estimation](https://hub.docker.com/r/olivierlabayle/targeted-estimation/tags) |
 
 ## Note on the pipeline's tests
 

diff --git a/docs/src/developer_guide/project_organization.md b/docs/src/developer_guide/project_organization.md
@@ -2,7 +2,7 @@
 
 The TarGene project is organized around the [targene-pipeline](https://github.com/TARGENE/targene-pipeline) repository which contains the [Nextflow](https://www.nextflow.io/) workflows. However, this repository does not contain the executables that are used by the Nextflow processes. Those executables originate from complementary repositories:
 
-- [TargetedEstimation.jl](https://github.com/TARGENE/TargetedEstimation.jl)
+- [TmleCLI.jl](https://github.com/TARGENE/TmleCLI.jl)
 - [TargeneCore.jl](https://github.com/TARGENE/TargeneCore.jl)
 - [UKBMain.jl](https://github.com/TARGENE/UKBMain.jl)
 

diff --git a/docs/src/targene/runtime_considerations.md b/docs/src/targene/runtime_considerations.md
@@ -1,3 +1,3 @@
 # Some Runtime considerations
 
-Runtime is discussed in detail [here](https://targene.github.io/TargetedEstimation.jl/stable/tmle_estimation/#Runtime).
+Runtime is discussed in detail [here](https://targene.github.io/TmleCLI.jl/stable/tmle_estimation/#Runtime).
diff --git a/docs/src/targene/tmle.md b/docs/src/targene/tmle.md
@@ -58,6 +58,6 @@ All estimators will learn the nuisance functions `Q` and `G` with the provided `
 - `Q_binary`: A MLJ model used for the estimation of `E[Y|T, W, C]` when the outcome `Y` is binary.
 - `G`: A MLJ model used for the estimation of `p(T|W)`.
 
-For the list of available models and resampling strategies, checkout the [TargetedEstimation documentation](https://targene.github.io/TargetedEstimation.jl/stable/models/).
+For the list of available models and resampling strategies, checkout the [TmleCLI documentation](https://targene.github.io/TmleCLI.jl/stable/models/).
 
 For full details, on available estimators and how to specify them, visit the [TMLE.jl documentation](https://targene.github.io/TMLE.jl/stable/).
diff --git a/main.nf b/main.nf
@@ -1,6 +1,8 @@
 #!/usr/bin/env nextflow
 nextflow.enable.dsl = 2
 
+import org.yaml.snakeyaml.Yaml
+
 // Misc Parameters
 params.VERBOSITY = 0
 params.RNG = 123
@@ -60,11 +62,17 @@ params.MIN_FACTOR_LEVEL_OCCURENCES = 10
 params.MAX_SAMPLING_ATTEMPTS = 1000
 params.NSAMPLES_FOR_TRUTH = 1000000
 
+include { GWAS } from './workflows/gwas.nf'
 include { TARGENE } from './workflows/targene.nf'
 include { PCA } from './workflows/pca.nf'
 include { MAKE_DATASET } from './workflows/dataset.nf'
 include { NULL_SIMULATION; REALISTIC_SIMULATION } from './workflows/simulations.nf'
 
+def isGWAS(){
+    config = new Yaml().load(new FileReader(params.ESTIMANDS_FILE))
+    return config.type == 'gwas'
+}
+
 log.info """\
          ${workflow.manifest.name} v${workflow.manifest.version}
          ==========================
@@ -81,5 +89,10 @@ log.info """\
 
 
 workflow  {
-    TARGENE()
+    if (isGWAS()) {
+        GWAS()
+    }
+    else {
+        TARGENE()
+    }
 }
diff --git a/modules/estimation.nf b/modules/estimation.nf
@@ -14,7 +14,7 @@ process MergeOutputs {
         json_option = params.JSON_OUTPUT != "NO_JSON_OUTPUT" ? "--json-output=${params.JSON_OUTPUT}" : ""
         """
         TEMPD=\$(mktemp -d)
-        JULIA_DEPOT_PATH=\$TEMPD:/opt julia --sysimage=/TargetedEstimation.jl/TMLESysimage.so --project=/TargetedEstimation.jl --startup-file=no /TargetedEstimation.jl/tmle.jl merge \
+        JULIA_DEPOT_PATH=\$TEMPD:/opt julia --sysimage=/TmleCLI.jl/TMLESysimage.so --project=/TmleCLI.jl --startup-file=no /TmleCLI.jl/tmle.jl merge \
         tmle_result \
         ${json_option} \
         --hdf5-output=${params.HDF5_OUTPUT}
@@ -26,8 +26,7 @@ process TMLE {
     label 'tmle_image'
 
     input:
-        path data
-        path estimands_file
+        tuple path(dataset), path(estimands_file)
         path estimator_file
 
     output:
@@ -40,8 +39,8 @@ process TMLE {
         save_sample_ids = params.SVP == true ? "--save-sample-ids" : ""
         """
         TEMPD=\$(mktemp -d)
-        JULIA_DEPOT_PATH=\$TEMPD:/opt julia --sysimage=/TargetedEstimation.jl/TMLESysimage.so --project=/TargetedEstimation.jl --threads=${task.cpus} --startup-file=no /TargetedEstimation.jl/tmle.jl tmle \
-        ${data} \
+        JULIA_DEPOT_PATH=\$TEMPD:/opt julia --sysimage=/TmleCLI.jl/TMLESysimage.so --project=/TmleCLI.jl --threads=${task.cpus} --startup-file=no /TmleCLI.jl/tmle.jl tmle \
+        ${dataset} \
         --estimands=${estimands_file} \
         --estimators=${estimator_file} \
         --hdf5-output=${hdf5out} \

diff --git a/modules/estimation_inputs.nf b/modules/estimation_inputs.nf
@@ -12,8 +12,7 @@ process EstimationInputs {
         path config_file
 
     output:
-        path "final.data.arrow", emit: dataset
-        path "final.*.jls", emit: estimands
+        tuple path("${genotypes_id}.data.arrow"), path("${genotypes_id}.*.jls")
 
     script:
         genotypes_prefix = longest_prefix(genotypes)
@@ -26,7 +25,7 @@ process EstimationInputs {
         --genotypes-prefix=${genotypes_prefix} \
         --traits-file=${traits} \
         --pcs-file=${pcs} \
-        --outprefix=final \
+        --outprefix=${genotypes_id} \
         ${batch_size} \
         ${call_threshold} \
         --positivity-constraint=${params.POSITIVITY_CONSTRAINT} \

diff --git a/modules/svp.nf b/modules/svp.nf
@@ -51,7 +51,7 @@ process SVP {
         hdf5_prefix = longest_prefix(hdf5_results)
         """
         TEMPD=\$(mktemp -d)
-        JULIA_DEPOT_PATH=\$TEMPD:/opt julia --sysimage=/TargetedEstimation.jl/TMLESysimage.so --project=/TargetedEstimation.jl --startup-file=no /TargetedEstimation.jl/tmle.jl svp \
+        JULIA_DEPOT_PATH=\$TEMPD:/opt julia --sysimage=/TmleCLI.jl/TMLESysimage.so --project=/TmleCLI.jl --startup-file=no /TmleCLI.jl/tmle.jl svp \
         ${hdf5_prefix} \
         --n-estimators=${params.NB_SVP_ESTIMATORS} \
         --max-tau=${params.MAX_SVP_THRESHOLD} \

diff --git a/subworkflows/estimation.nf b/subworkflows/estimation.nf
@@ -3,15 +3,13 @@ include { GenerateSummaryPlots } from '../modules/plot.nf'
 
 workflow EstimationWorkflow {
     take:
-        dataset
-        estimands_configs
+        dataset_and_estimands
         estimators_config
 
     main:
         // Run the estimation process for each estimands configuration
         TMLE(
-            dataset,
-            estimands_configs,
+            dataset_and_estimands,
             estimators_config,
         )
         // Merge results files together