Skip to content

Commit

Permalink
Update workaround for ingest shell
Browse files Browse the repository at this point in the history
Use the config to set the ingest basedir so that we're not using some
arbitrary variable.
  • Loading branch information
joverlee521 authored and j23414 committed Jun 10, 2023
1 parent 526cf3d commit bca3415
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 20 deletions.
8 changes: 6 additions & 2 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,12 @@ if not config:
if not config.get("transform"):
configfile: "ingest/config/config.yaml"

# WIP: BIG COMMENTS HERE, write documentation on the reasons this workaround is necessary. TK
my_basedir="ingest"
# Add the hard-coded ingest basedir to the workflow config so that we can
# pass it to the module ingest workflow. This will allow shell scripts to
# use the proper paths for local script invocation since we cannot set the
# workdir separately for module workflows.
# This work around is based on https://stackoverflow.com/a/66890412
config["ingest_basedir"] = f"{workflow.current_basedir}/ingest"

serotypes = ['all', 'denv1', 'denv2', 'denv3', 'denv4']

Expand Down
5 changes: 4 additions & 1 deletion ingest/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ if not config:

configfile: "config/config.yaml"

my_basedir = workflow.current_basedir

if not config.get("ingest_basedir"):

config["ingest_basedir"] = workflow.current_basedir

send_slack_notifications = config.get("send_slack_notifications", False)

Expand Down
2 changes: 1 addition & 1 deletion ingest/workflow/snakemake_rules/fetch_sequences.smk
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ rule fetch_from_genbank:
serotype_tax_id=download_serotype,
shell:
"""
{my_basedir}/bin/fetch-from-genbank {params.serotype_tax_id} > {output.genbank_ndjson}
{config[ingest_basedir]}/bin/fetch-from-genbank {params.serotype_tax_id} > {output.genbank_ndjson}
"""


Expand Down
8 changes: 4 additions & 4 deletions ingest/workflow/snakemake_rules/slack_notifications.smk
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ rule notify_on_genbank_record_change:
genbank_filename="genbank_{serotype}.ndjson.xz",
shell:
"""
{my_basedir}/bin/notify-on-record-change {input.genbank_ndjson} {params.s3_src:q}/{params.genbank_filename:q} Genbank
{config[ingest_basedir]}/bin/notify-on-record-change {input.genbank_ndjson} {params.s3_src:q}/{params.genbank_filename:q} Genbank
"""


Expand All @@ -45,13 +45,13 @@ rule notify_on_metadata_diff:
metadata_filename="metadata_{serotype}.tsv.gz",
shell:
"""
{my_basedir}/bin/notify-on-diff {input.metadata} {params.s3_src:q}/{params.metadata_filename:q}
{config[ingest_basedir]}/bin/notify-on-diff {input.metadata} {params.s3_src:q}/{params.metadata_filename:q}
"""


onstart:
shell("{my_basedir}/bin/notify-on-job-start")
shell("{config[ingest_basedir]}/bin/notify-on-job-start")


onerror:
shell("{my_basedir}/bin/notify-on-job-fail")
shell("{config[ingest_basedir]}/bin/notify-on-job-fail")
20 changes: 10 additions & 10 deletions ingest/workflow/snakemake_rules/transform.smk
Original file line number Diff line number Diff line change
Expand Up @@ -65,31 +65,31 @@ rule transform:
shell:
"""
(cat {input.sequences_ndjson} \
| {my_basedir}/bin/transform-field-names \
| {config[ingest_basedir]}/bin/transform-field-names \
--field-map {params.field_map} \
| augur curate normalize-strings \
| {my_basedir}/bin/transform-strain-names \
| {config[ingest_basedir]}/bin/transform-strain-names \
--strain-regex {params.strain_regex} \
--backup-fields {params.strain_backup_fields} \
| {my_basedir}/bin/transform-date-fields \
| {config[ingest_basedir]}/bin/transform-date-fields \
--date-fields {params.date_fields} \
--expected-date-formats {params.expected_date_formats} \
| {my_basedir}/bin/transform-genbank-location \
| {my_basedir}/bin/transform-string-fields \
| {config[ingest_basedir]}/bin/transform-genbank-location \
| {config[ingest_basedir]}/bin/transform-string-fields \
--titlecase-fields {params.titlecase_fields} \
--articles {params.articles} \
--abbreviations {params.abbreviations} \
| {my_basedir}/bin/transform-authors \
| {config[ingest_basedir]}/bin/transform-authors \
--authors-field {params.authors_field} \
--default-value {params.authors_default_value} \
--abbr-authors-field {params.abbr_authors_field} \
| {my_basedir}/bin/apply-geolocation-rules \
| {config[ingest_basedir]}/bin/apply-geolocation-rules \
--geolocation-rules {input.all_geolocation_rules} \
| {my_basedir}/bin/post_process_metadata.py \
| {my_basedir}/bin/merge-user-metadata \
| {config[ingest_basedir]}/bin/post_process_metadata.py \
| {config[ingest_basedir]}/bin/merge-user-metadata \
--annotations {input.annotations} \
--id-field {params.annotations_id} \
| {my_basedir}/bin/ndjson-to-tsv-and-fasta \
| {config[ingest_basedir]}/bin/ndjson-to-tsv-and-fasta \
--metadata-columns {params.metadata_columns} \
--metadata {output.metadata} \
--fasta {output.sequences} \
Expand Down
2 changes: 1 addition & 1 deletion ingest/workflow/snakemake_rules/trigger_rebuild.smk
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ rule trigger_build:
touch("data/trigger/rebuild_{serotype}.done")
shell:
"""
{my_basedir}/bin/trigger-on-new-data {input.metadata_upload} {input.fasta_upload}
{config[ingest_basedir]}/bin/trigger-on-new-data {input.metadata_upload} {input.fasta_upload}
"""
2 changes: 1 addition & 1 deletion ingest/workflow/snakemake_rules/upload.smk
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ rule upload_to_s3:
cloudfront_domain=config["upload"].get("s3", {}).get("cloudfront_domain", ""),
shell:
"""
{my_basedir}/bin/upload-to-s3 \
{config[ingest_basedir]}/bin/upload-to-s3 \
{params.quiet} \
{input.file_to_upload:q} \
{params.s3_dst:q}/{wildcards.remote_file_name:q} \
Expand Down

0 comments on commit bca3415

Please sign in to comment.