diff --git a/Snakefile b/Snakefile index 5ef2eaa2..6f0eba79 100644 --- a/Snakefile +++ b/Snakefile @@ -8,8 +8,12 @@ if not config: if not config.get("transform"): configfile: "ingest/config/config.yaml" -# WIP: BIG COMMENTS HERE, write documentation on the reasons this workaround is necessary. TK -my_basedir="ingest" +# Add the hard-coded ingest basedir to the workflow config so that we can +# pass it to the module ingest workflow. This will allow shell scripts to +# use the proper paths for local script invocation since we cannot set the +# workdir separately for module workflows. +# This work around is based on https://stackoverflow.com/a/66890412 +config["ingest_basedir"] = f"{workflow.current_basedir}/ingest" serotypes = ['all', 'denv1', 'denv2', 'denv3', 'denv4'] diff --git a/ingest/Snakefile b/ingest/Snakefile index f3fca0df..3fb79a14 100644 --- a/ingest/Snakefile +++ b/ingest/Snakefile @@ -2,7 +2,10 @@ if not config: configfile: "config/config.yaml" -my_basedir = workflow.current_basedir + +if not config.get("ingest_basedir"): + + config["ingest_basedir"] = workflow.current_basedir send_slack_notifications = config.get("send_slack_notifications", False) diff --git a/ingest/workflow/snakemake_rules/fetch_sequences.smk b/ingest/workflow/snakemake_rules/fetch_sequences.smk index 11fffb59..7f334a7d 100644 --- a/ingest/workflow/snakemake_rules/fetch_sequences.smk +++ b/ingest/workflow/snakemake_rules/fetch_sequences.smk @@ -33,7 +33,7 @@ rule fetch_from_genbank: serotype_tax_id=download_serotype, shell: """ - {my_basedir}/bin/fetch-from-genbank {params.serotype_tax_id} > {output.genbank_ndjson} + {config[ingest_basedir]}/bin/fetch-from-genbank {params.serotype_tax_id} > {output.genbank_ndjson} """ diff --git a/ingest/workflow/snakemake_rules/slack_notifications.smk b/ingest/workflow/snakemake_rules/slack_notifications.smk index 1c6e28a2..b868cf3d 100644 --- a/ingest/workflow/snakemake_rules/slack_notifications.smk +++ b/ingest/workflow/snakemake_rules/slack_notifications.smk @@ -31,7 +31,7 @@ rule notify_on_genbank_record_change: genbank_filename="genbank_{serotype}.ndjson.xz", shell: """ - {my_basedir}/bin/notify-on-record-change {input.genbank_ndjson} {params.s3_src:q}/{params.genbank_filename:q} Genbank + {config[ingest_basedir]}/bin/notify-on-record-change {input.genbank_ndjson} {params.s3_src:q}/{params.genbank_filename:q} Genbank """ @@ -45,13 +45,13 @@ rule notify_on_metadata_diff: metadata_filename="metadata_{serotype}.tsv.gz", shell: """ - {my_basedir}/bin/notify-on-diff {input.metadata} {params.s3_src:q}/{params.metadata_filename:q} + {config[ingest_basedir]}/bin/notify-on-diff {input.metadata} {params.s3_src:q}/{params.metadata_filename:q} """ onstart: - shell("{my_basedir}/bin/notify-on-job-start") + shell("{config[ingest_basedir]}/bin/notify-on-job-start") onerror: - shell("{my_basedir}/bin/notify-on-job-fail") + shell("{config[ingest_basedir]}/bin/notify-on-job-fail") diff --git a/ingest/workflow/snakemake_rules/transform.smk b/ingest/workflow/snakemake_rules/transform.smk index 613040c8..4e0d52fc 100644 --- a/ingest/workflow/snakemake_rules/transform.smk +++ b/ingest/workflow/snakemake_rules/transform.smk @@ -65,31 +65,31 @@ rule transform: shell: """ (cat {input.sequences_ndjson} \ - | {my_basedir}/bin/transform-field-names \ + | {config[ingest_basedir]}/bin/transform-field-names \ --field-map {params.field_map} \ | augur curate normalize-strings \ - | {my_basedir}/bin/transform-strain-names \ + | {config[ingest_basedir]}/bin/transform-strain-names \ --strain-regex {params.strain_regex} \ --backup-fields {params.strain_backup_fields} \ - | {my_basedir}/bin/transform-date-fields \ + | {config[ingest_basedir]}/bin/transform-date-fields \ --date-fields {params.date_fields} \ --expected-date-formats {params.expected_date_formats} \ - | {my_basedir}/bin/transform-genbank-location \ - | {my_basedir}/bin/transform-string-fields \ + | {config[ingest_basedir]}/bin/transform-genbank-location \ + | {config[ingest_basedir]}/bin/transform-string-fields \ --titlecase-fields {params.titlecase_fields} \ --articles {params.articles} \ --abbreviations {params.abbreviations} \ - | {my_basedir}/bin/transform-authors \ + | {config[ingest_basedir]}/bin/transform-authors \ --authors-field {params.authors_field} \ --default-value {params.authors_default_value} \ --abbr-authors-field {params.abbr_authors_field} \ - | {my_basedir}/bin/apply-geolocation-rules \ + | {config[ingest_basedir]}/bin/apply-geolocation-rules \ --geolocation-rules {input.all_geolocation_rules} \ - | {my_basedir}/bin/post_process_metadata.py \ - | {my_basedir}/bin/merge-user-metadata \ + | {config[ingest_basedir]}/bin/post_process_metadata.py \ + | {config[ingest_basedir]}/bin/merge-user-metadata \ --annotations {input.annotations} \ --id-field {params.annotations_id} \ - | {my_basedir}/bin/ndjson-to-tsv-and-fasta \ + | {config[ingest_basedir]}/bin/ndjson-to-tsv-and-fasta \ --metadata-columns {params.metadata_columns} \ --metadata {output.metadata} \ --fasta {output.sequences} \ diff --git a/ingest/workflow/snakemake_rules/trigger_rebuild.smk b/ingest/workflow/snakemake_rules/trigger_rebuild.smk index 80a11b70..b360abac 100644 --- a/ingest/workflow/snakemake_rules/trigger_rebuild.smk +++ b/ingest/workflow/snakemake_rules/trigger_rebuild.smk @@ -15,5 +15,5 @@ rule trigger_build: touch("data/trigger/rebuild_{serotype}.done") shell: """ - {my_basedir}/bin/trigger-on-new-data {input.metadata_upload} {input.fasta_upload} + {config[ingest_basedir]}/bin/trigger-on-new-data {input.metadata_upload} {input.fasta_upload} """ diff --git a/ingest/workflow/snakemake_rules/upload.smk b/ingest/workflow/snakemake_rules/upload.smk index 56895200..40d992af 100644 --- a/ingest/workflow/snakemake_rules/upload.smk +++ b/ingest/workflow/snakemake_rules/upload.smk @@ -56,7 +56,7 @@ rule upload_to_s3: cloudfront_domain=config["upload"].get("s3", {}).get("cloudfront_domain", ""), shell: """ - {my_basedir}/bin/upload-to-s3 \ + {config[ingest_basedir]}/bin/upload-to-s3 \ {params.quiet} \ {input.file_to_upload:q} \ {params.s3_dst:q}/{wildcards.remote_file_name:q} \