From 770c385ecace28a178dbfae950c4a84e305e4f7b Mon Sep 17 00:00:00 2001 From: Jay Miles Date: Tue, 18 Jul 2023 11:37:49 +0100 Subject: [PATCH 01/14] update reports version to 2.1.0 --- egg4_config_v2.0.2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egg4_config_v2.0.2.py b/egg4_config_v2.0.2.py index f2c210d..79725f1 100644 --- a/egg4_config_v2.0.2.py +++ b/egg4_config_v2.0.2.py @@ -28,7 +28,7 @@ ### Apps and workflows: # dias_reports -# v2.0.4 +# v2.1.0 rpt_workflow_id = "{}:workflow-GBQ985Q433GYJjv0379PJqqg".format(ref_project_id) generate_bed_vep_stage_id = "stage-G9P8p104vyJJGy6y86FQBxkv" From e6838bdf84794841352a3f95b5c32f73f514115b Mon Sep 17 00:00:00 2001 From: Jay Miles Date: Tue, 18 Jul 2023 11:40:36 +0100 Subject: [PATCH 02/14] update reports stage names to new verbose forms --- egg4_config_v2.0.2.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/egg4_config_v2.0.2.py b/egg4_config_v2.0.2.py index 79725f1..b3bbc6f 100644 --- a/egg4_config_v2.0.2.py +++ b/egg4_config_v2.0.2.py @@ -31,11 +31,11 @@ # v2.1.0 rpt_workflow_id = "{}:workflow-GBQ985Q433GYJjv0379PJqqg".format(ref_project_id) -generate_bed_vep_stage_id = "stage-G9P8p104vyJJGy6y86FQBxkv" -vep_stage_id = "stage-G9Q0jzQ4vyJ3x37X4KBKXZ5v" -generate_workbook_stage_id = "stage-G9P8VQj4vyJBJ0kg50vzVPxY" -generate_bed_athena_stage_id = "stage-Fyq5yy0433GXxz691bKyvjPJ" -athena_stage_id = "stage-Fyq5z18433GfYZbp3vX1KqjB" +generate_bed_vep_stage_id = "stage-rpt_generate_bed_vep" +vep_stage_id = "stage-rpt_vep" +generate_workbook_stage_id = "stage-rpt_generate_workbook" +generate_bed_athena_stage_id = "stage-rpt_generate_bed_athena" +athena_stage_id = "stage-rpt_athena" rpt_dynamic_files = { # inputs for generate bed for vep From cd7383c0646535a7693ba16e1e336ef7bb9ee2e3 Mon Sep 17 00:00:00 2001 From: Jay Miles Date: Tue, 18 Jul 2023 13:01:29 +0100 Subject: [PATCH 03/14] renamed config file with new version --- egg4_config_v2.0.2.py => egg4_config_v2.1.0.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename egg4_config_v2.0.2.py => egg4_config_v2.1.0.py (100%) diff --git a/egg4_config_v2.0.2.py b/egg4_config_v2.1.0.py similarity index 100% rename from egg4_config_v2.0.2.py rename to egg4_config_v2.1.0.py From 72a2511281575c28a3036aedcbbbc8b2686efe4b Mon Sep 17 00:00:00 2001 From: Jay Miles Date: Tue, 18 Jul 2023 13:02:53 +0100 Subject: [PATCH 04/14] updated twe version in config --- egg4_config_v2.1.0.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egg4_config_v2.1.0.py b/egg4_config_v2.1.0.py index b3bbc6f..edfb3f0 100644 --- a/egg4_config_v2.1.0.py +++ b/egg4_config_v2.1.0.py @@ -1,5 +1,5 @@ assay_name = "TWE" # Twist Whole Exome -assay_version = "v2.0.2" +assay_version = "v2.1.0" ref_project_id = "project-Fkb6Gkj433GVVvj73J7x8KbV" From 2db5fb735ab4e794b94a3c84d7302d0a40fc9e54 Mon Sep 17 00:00:00 2001 From: Jay Miles Date: Wed, 19 Jul 2023 14:32:23 +0100 Subject: [PATCH 05/14] moved generate_variant_workbooks static inputs from reports workflow --- egg4_config_v2.1.0.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/egg4_config_v2.1.0.py b/egg4_config_v2.1.0.py index edfb3f0..f898087 100644 --- a/egg4_config_v2.1.0.py +++ b/egg4_config_v2.1.0.py @@ -45,9 +45,19 @@ "{}.nirvana_genes2transcripts".format(generate_bed_vep_stage_id): "", "{}.gene_panels ID".format(generate_bed_vep_stage_id): genepanels_file, "{}.gene_panels".format(generate_bed_vep_stage_id): "", - # input for eggd_vep + # inputs for eggd_vep "{}.config_file ID".format(vep_stage_id): vep_config, "{}.config_file".format(vep_stage_id): "", + # inputs for generate_variant_workbook + "{}.exclude_columns".format(generate_workbook_stage_id): "BaseQRankSum ClippingRankSum DB ExcessHet FS MLEAC MLEAF MQ MQRankSum QD ReadPosRankSum SOR PL QUAL ID FILTER CSQ_ClinVar_CLNSIGCONF CSQ_Allele CSQ_HGNC_ID DP AC AF AN CSQ_SpliceAI_pred_DP_AL CSQ_SpliceAI_pred_DP_AG CSQ_SpliceAI_pred_DP_DG CSQ_SpliceAI_pred_DP_DL", + "{}.acmg".format(generate_workbook_stage_id): True, + "{}.rename_columns".format(generate_workbook_stage_id): "CSQ_Feature=Transcript DP_FMT=DP", + "{}.add_comment_column".format(generate_workbook_stage_id): True, + "{}.keep_tmp".format(generate_workbook_stage_id): True, + "{}.summary".format(generate_workbook_stage_id): "dias", + "{}.filter".format(generate_workbook_stage_id): "bcftools filter -e '(CSQ_Consequence==\"synonymous_variant\" | CSQ_Consequence==\"intron_variant\" | CSQ_Consequence==\"upstream_gene_variant\" | CSQ_Consequence==\"downstream_gene_variant\" | CSQ_Consequence==\"intergenic_variant\" | CSQ_Consequence==\"5_prime_UTR_variant\" | CSQ_Consequence==\"3_prime_UTR_variant\" | CSQ_gnomADe_AF>0.01 | CSQ_gnomADg_AF>0.01 | CSQ_TWE_AF>0.05) & CSQ_HGMD_CLASS!~ \"DM\" & CSQ_ClinVar_CLNSIG!~ \"pathogenic\\/i\" & CSQ_ClinVar_CLNSIGCONF!~ \"pathogenic\\/i\"'", + "{}.human_filter".format(generate_workbook_stage_id): "excluded gnomAD exomes / genomes > 1%, TWE > 5%, synonymous / intronic / intergenic / upstream / downstream / UTRs EXCEPT pathogenic status in ClinVar OR DM in HGMD Class", + "{}.reorder_columns".format(generate_workbook_stage_id): "CHROM POS REF ALT GT GQ DP_FMT AD CSQ_SYMBOL CSQ_EXON CSQ_INTRON CSQ_HGVSc CSQ_HGVSp CSQ_Consequence CSQ_IMPACT CSQ_VARIANT_CLASS CSQ_gnomADe_AF CSQ_gnomADe_Hom CSQ_gnomADe_AC CSQ_gnomADe_AN CSQ_gnomADg_AF CSQ_gnomADg_AC CSQ_gnomADg_AN CSQ_TWE_AF CSQ_TWE_AC_Hom CSQ_TWE_AC_Het CSQ_TWE_AN CSQ_HGMD CSQ_HGMD_CLASS CSQ_HGMD_RANKSCORE CSQ_HGMD_PHEN CSQ_Existing_variation CSQ_ClinVar CSQ_ClinVar_CLNDN CSQ_ClinVar_CLNSIG CSQ_Mastermind_MMID3 CSQ_CADD_PHRED CSQ_REVEL CSQ_SpliceAI_pred_DS_AG CSQ_SpliceAI_pred_DS_AL CSQ_SpliceAI_pred_DS_DG CSQ_SpliceAI_pred_DS_DL CSQ_HGVS_OFFSET CSQ_STRAND CSQ_Feature", # inputs for generate bed for athena "{}.exons_nirvana ID".format(generate_bed_athena_stage_id): exons_nirvana, "{}.exons_nirvana".format(generate_bed_athena_stage_id): "", From b5823205ad55072288cd47e6eebbdce8e6069e11 Mon Sep 17 00:00:00 2001 From: Jay Miles Date: Wed, 19 Jul 2023 14:34:54 +0100 Subject: [PATCH 06/14] moved athena static inputs from reports workflow --- egg4_config_v2.1.0.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/egg4_config_v2.1.0.py b/egg4_config_v2.1.0.py index f898087..920698b 100644 --- a/egg4_config_v2.1.0.py +++ b/egg4_config_v2.1.0.py @@ -67,7 +67,9 @@ "{}.gene_panels".format(generate_bed_athena_stage_id): "", # inputs for athena "{}.exons_file ID".format(athena_stage_id): exons_file, - "{}.exons_file".format(athena_stage_id): "" + "{}.exons_file".format(athena_stage_id): "", + "{}.limit".format(athena_stage_id): 260, + "{}.summary".format(athena_stage_id): True } # Sample-specific input files and their search patterns From 486b589aa13520836f2cdef708e53d1224797418 Mon Sep 17 00:00:00 2001 From: Jay Miles Date: Mon, 24 Jul 2023 16:23:19 +0100 Subject: [PATCH 07/14] changed bools to strings --- egg4_config_v2.1.0.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/egg4_config_v2.1.0.py b/egg4_config_v2.1.0.py index 920698b..c5fcbcf 100644 --- a/egg4_config_v2.1.0.py +++ b/egg4_config_v2.1.0.py @@ -50,10 +50,10 @@ "{}.config_file".format(vep_stage_id): "", # inputs for generate_variant_workbook "{}.exclude_columns".format(generate_workbook_stage_id): "BaseQRankSum ClippingRankSum DB ExcessHet FS MLEAC MLEAF MQ MQRankSum QD ReadPosRankSum SOR PL QUAL ID FILTER CSQ_ClinVar_CLNSIGCONF CSQ_Allele CSQ_HGNC_ID DP AC AF AN CSQ_SpliceAI_pred_DP_AL CSQ_SpliceAI_pred_DP_AG CSQ_SpliceAI_pred_DP_DG CSQ_SpliceAI_pred_DP_DL", - "{}.acmg".format(generate_workbook_stage_id): True, + "{}.acmg".format(generate_workbook_stage_id): "true", "{}.rename_columns".format(generate_workbook_stage_id): "CSQ_Feature=Transcript DP_FMT=DP", - "{}.add_comment_column".format(generate_workbook_stage_id): True, - "{}.keep_tmp".format(generate_workbook_stage_id): True, + "{}.add_comment_column".format(generate_workbook_stage_id): "true", + "{}.keep_tmp".format(generate_workbook_stage_id): "true", "{}.summary".format(generate_workbook_stage_id): "dias", "{}.filter".format(generate_workbook_stage_id): "bcftools filter -e '(CSQ_Consequence==\"synonymous_variant\" | CSQ_Consequence==\"intron_variant\" | CSQ_Consequence==\"upstream_gene_variant\" | CSQ_Consequence==\"downstream_gene_variant\" | CSQ_Consequence==\"intergenic_variant\" | CSQ_Consequence==\"5_prime_UTR_variant\" | CSQ_Consequence==\"3_prime_UTR_variant\" | CSQ_gnomADe_AF>0.01 | CSQ_gnomADg_AF>0.01 | CSQ_TWE_AF>0.05) & CSQ_HGMD_CLASS!~ \"DM\" & CSQ_ClinVar_CLNSIG!~ \"pathogenic\\/i\" & CSQ_ClinVar_CLNSIGCONF!~ \"pathogenic\\/i\"'", "{}.human_filter".format(generate_workbook_stage_id): "excluded gnomAD exomes / genomes > 1%, TWE > 5%, synonymous / intronic / intergenic / upstream / downstream / UTRs EXCEPT pathogenic status in ClinVar OR DM in HGMD Class", @@ -69,7 +69,7 @@ "{}.exons_file ID".format(athena_stage_id): exons_file, "{}.exons_file".format(athena_stage_id): "", "{}.limit".format(athena_stage_id): 260, - "{}.summary".format(athena_stage_id): True + "{}.summary".format(athena_stage_id): "true" } # Sample-specific input files and their search patterns From 21ba7be2a5d12aeb0aaac2438811aa14fd0899c3 Mon Sep 17 00:00:00 2001 From: Jay Miles Date: Mon, 24 Jul 2023 16:31:09 +0100 Subject: [PATCH 08/14] changed int to string --- egg4_config_v2.1.0.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egg4_config_v2.1.0.py b/egg4_config_v2.1.0.py index c5fcbcf..ad8dff9 100644 --- a/egg4_config_v2.1.0.py +++ b/egg4_config_v2.1.0.py @@ -68,7 +68,7 @@ # inputs for athena "{}.exons_file ID".format(athena_stage_id): exons_file, "{}.exons_file".format(athena_stage_id): "", - "{}.limit".format(athena_stage_id): 260, + "{}.limit".format(athena_stage_id): "260", "{}.summary".format(athena_stage_id): "true" } From e4e981480dff9ad44861a559444b0d6d40760e9c Mon Sep 17 00:00:00 2001 From: Jay Miles Date: Mon, 24 Jul 2023 16:40:20 +0100 Subject: [PATCH 09/14] updated workflow ID --- egg4_config_v2.1.0.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egg4_config_v2.1.0.py b/egg4_config_v2.1.0.py index ad8dff9..0412483 100644 --- a/egg4_config_v2.1.0.py +++ b/egg4_config_v2.1.0.py @@ -29,7 +29,7 @@ # dias_reports # v2.1.0 -rpt_workflow_id = "{}:workflow-GBQ985Q433GYJjv0379PJqqg".format(ref_project_id) +rpt_workflow_id = "project-GXvQGY04QPQxjbz9zXVYF3xK:workflow-GXvYQ104QPQqx7pxjYZV5PXQ" generate_bed_vep_stage_id = "stage-rpt_generate_bed_vep" vep_stage_id = "stage-rpt_vep" From f386743568df04d143fb428fa865b90ad42b2134 Mon Sep 17 00:00:00 2001 From: Jay Miles Date: Tue, 25 Jul 2023 10:54:56 +0100 Subject: [PATCH 10/14] updated workflow id due to changed athena version --- egg4_config_v2.1.0.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egg4_config_v2.1.0.py b/egg4_config_v2.1.0.py index 0412483..f6880fd 100644 --- a/egg4_config_v2.1.0.py +++ b/egg4_config_v2.1.0.py @@ -29,7 +29,7 @@ # dias_reports # v2.1.0 -rpt_workflow_id = "project-GXvQGY04QPQxjbz9zXVYF3xK:workflow-GXvYQ104QPQqx7pxjYZV5PXQ" +rpt_workflow_id = "project-GXvQGY04QPQxjbz9zXVYF3xK:workflow-GXzkfYj4QPQp9z4Jz4BF09y6" generate_bed_vep_stage_id = "stage-rpt_generate_bed_vep" vep_stage_id = "stage-rpt_vep" From 0b85ba3d264f228a6abba5c3716f37954f79fcd6 Mon Sep 17 00:00:00 2001 From: Jay Miles Date: Tue, 25 Jul 2023 15:34:30 +0100 Subject: [PATCH 11/14] fixed workflow id format --- egg4_config_v2.1.0.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egg4_config_v2.1.0.py b/egg4_config_v2.1.0.py index f6880fd..e2aa93c 100644 --- a/egg4_config_v2.1.0.py +++ b/egg4_config_v2.1.0.py @@ -29,7 +29,7 @@ # dias_reports # v2.1.0 -rpt_workflow_id = "project-GXvQGY04QPQxjbz9zXVYF3xK:workflow-GXzkfYj4QPQp9z4Jz4BF09y6" +rpt_workflow_id = "workflow-GXzkfYj4QPQp9z4Jz4BF09y6" generate_bed_vep_stage_id = "stage-rpt_generate_bed_vep" vep_stage_id = "stage-rpt_vep" From 12e95028880cc9b2c55389ca707c704b0f4c5d06 Mon Sep 17 00:00:00 2001 From: Jay Miles Date: Tue, 25 Jul 2023 15:40:23 +0100 Subject: [PATCH 12/14] undid workflow id fix --- egg4_config_v2.1.0.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egg4_config_v2.1.0.py b/egg4_config_v2.1.0.py index e2aa93c..8a718ab 100644 --- a/egg4_config_v2.1.0.py +++ b/egg4_config_v2.1.0.py @@ -29,7 +29,7 @@ # dias_reports # v2.1.0 -rpt_workflow_id = "workflow-GXzkfYj4QPQp9z4Jz4BF09y6" +rpt_workflow_id = "{}:workflow-GXzkfYj4QPQp9z4Jz4BF09y6".format(ref_project_id) generate_bed_vep_stage_id = "stage-rpt_generate_bed_vep" vep_stage_id = "stage-rpt_vep" From acf7aa082d4fb1f048e704cfa701635f1c92a95b Mon Sep 17 00:00:00 2001 From: kjwinfield Date: Mon, 4 Sep 2023 10:55:21 +0100 Subject: [PATCH 13/14] update README --- README.md | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7ee7e3f..7aaf4c4 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,26 @@ # egg4_dias_TWE_config -Config for the Dias TWE assay + +This repo contains a Python config file which is used with dias_batch_running to specify inputs for running the Dias pipeline for TWE data. + +## What does the config do? +dias_batch_running ([https://github.com/eastgenomics/dias_batch_running](https://github.com/eastgenomics/dias_batch_running)) is a Python module that runs the Dias pipeline for germline sequence data analysis on DNAnexus. The egg5_dias_TWE_config specifies the executables and their input files to be used in the Dias pipeline for analysing TWE data. + +New versions of apps and app inputs for use in the Dias pipeline can be updated in the config without needing to update the pipeline itself. + +## Parts of the config +* dias_reports + * specifies the workflow ID, stage IDs (matching those in the workflow), and dynamic files for dias_reports. + +## Versions of workflows and dynamic files in the config +Workflows: +* Dias reports: **dias_reports_v2.1.0** + * DNAnexus workflow ID: `workflow-GXzkfYj4QPQp9z4Jz4BF09y6` + +Dynamic files: +| File | File name | DNAnexus file ID | +| --------- | --------- | ---------------- | +| genepanels | **230602_genepanels.tsv** | `file-GVx0vkQ433Gvq63k1Kj4Y562` | +| genes2transcripts | **230421_g2t.tsv** | `file-GV4P970433Gj6812zGVBZvB4` | +| exons_nirvana | **GCF_000001405.25_GRCh37.p13_genomic.exon_5bp_v2.0.0.tsv** | `file-GF611Z8433Gk7gZ47gypK7ZZ` | +| exons_file for eggd_athena | **GCF_000001405.25_GRCh37.p13_genomic.symbols.exon_5bp_v2.0.0.tsv** | `file-GF611Z8433Gf99pBPbJkV7bq` | +| twe_vep_config for SNV reports | **twe_vep_config_v1.1.4.json** | `file-GXJy7F04j4fV8gj37qYgfFfg` | From e791df13b01e9e07a37a02b97574da6a1691d248 Mon Sep 17 00:00:00 2001 From: kjwinfield Date: Mon, 4 Sep 2023 12:36:18 +0100 Subject: [PATCH 14/14] update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7aaf4c4..b61a7fb 100644 --- a/README.md +++ b/README.md @@ -23,4 +23,4 @@ Dynamic files: | genes2transcripts | **230421_g2t.tsv** | `file-GV4P970433Gj6812zGVBZvB4` | | exons_nirvana | **GCF_000001405.25_GRCh37.p13_genomic.exon_5bp_v2.0.0.tsv** | `file-GF611Z8433Gk7gZ47gypK7ZZ` | | exons_file for eggd_athena | **GCF_000001405.25_GRCh37.p13_genomic.symbols.exon_5bp_v2.0.0.tsv** | `file-GF611Z8433Gf99pBPbJkV7bq` | -| twe_vep_config for SNV reports | **twe_vep_config_v1.1.4.json** | `file-GXJy7F04j4fV8gj37qYgfFfg` | +| twe_vep_config for SNV reports | **twe_vep_config_v1.1.6.json** | `file-GYX8q204j4fpP18Qx7YGkJvX` |