diff --git a/ingest/workflow/snakemake_rules/fetch_sequences.smk b/ingest/workflow/snakemake_rules/fetch_sequences.smk index c44c4ab..aff8f28 100644 --- a/ingest/workflow/snakemake_rules/fetch_sequences.smk +++ b/ingest/workflow/snakemake_rules/fetch_sequences.smk @@ -23,15 +23,24 @@ rule fetch_from_genbank: genbank_url_url="https://raw.githubusercontent.com/nextstrain/dengue/ca659008bfbe4b3f799e11ecd106a0b95977fe93/ingest/bin/genbank-url", # Update if dengue merged shell: """ - if [[ ! -d bin ]]; then - mkdir bin + # (1) Pick curl or wget based on availability + if which curl > /dev/null; then + download_cmd="curl -fsSL --output" + elif which wget > /dev/null; then + download_cmd="wget -O" + else + echo "ERROR: Neither curl nor wget found. Please install one of them." + exit 1 fi - cd bin - [[ -f csv-to-ndjson ]] || wget {params.csv_to_ndjson_url} - [[ -f genbank-url ]] || wget {params.genbank_url_url} - [[ -f fetch-from-genbank ]] || wget {params.fetch_from_genbank_url} - chmod 755 * - cd .. + + # (2) Download the required scripts if not already present + [[ -d bin ]] || mkdir bin + [[ -f bin/csv-to-ndjson ]] || $download_cmd bin/csv-to-ndjson {params.csv_to_ndjson_url} + [[ -f bin/genbank-url ]] || $download_cmd bin/genbank-url {params.genbank_url_url} + [[ -f bin/fetch-from-genbank ]] || $download_cmd bin/fetch-from-genbank {params.fetch_from_genbank_url} + chmod +x bin/* + + # (3) Fetch the sequences ./bin/fetch-from-genbank {params.serotype_tax_id} > {output.genbank_ndjson} """ diff --git a/ingest/workflow/snakemake_rules/slack_notifications.smk b/ingest/workflow/snakemake_rules/slack_notifications.smk index acf95f3..88ed446 100644 --- a/ingest/workflow/snakemake_rules/slack_notifications.smk +++ b/ingest/workflow/snakemake_rules/slack_notifications.smk @@ -31,16 +31,22 @@ rule notify_on_genbank_record_change: notify_on_record_change_url="https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/notify-on-record-change", shell: """ - if [[ ! -d bin ]]; then - mkdir bin - fi - if [[ ! -f bin/notify-on-record-change ]]; then - cd bin - wget {params.notify_on_record_change_url} - chmod 755 - cd .. + # (1) Pick curl or wget based on availability + if which curl > /dev/null; then + download_cmd="curl -fsSL --output" + elif which wget > /dev/null; then + download_cmd="wget -O" + else + echo "ERROR: Neither curl nor wget found. Please install one of them." + exit 1 fi + # (2) Download the required scripts if not already present + [[ -d bin ]] || mkdir bin + [[ -f bin/notify-on-record-change ]] || $download_cmd bin/notify-on-record-change {params.notify_on_record_change_url} + chmod +x bin/* + + # (3) Run the script ./bin/notify-on-record-change {input.genbank_ndjson} {params.s3_src:q}/genbank.ndjson.xz Genbank """ @@ -55,15 +61,22 @@ rule notify_on_metadata_diff: notify_on_diff_url = "https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/notify-on-diff", shell: """ - if [[ ! -d bin ]]; then - mkdir bin - fi - if [[ ! -f bin/notify-on-diff ]]; then - cd bin - wget {params.notify_on_diff_url} - chmod 755 - cd .. + # (1) Pick curl or wget based on availability + if which curl > /dev/null; then + download_cmd="curl -fsSL --output" + elif which wget > /dev/null; then + download_cmd="wget -O" + else + echo "ERROR: Neither curl nor wget found. Please install one of them." + exit 1 fi + + # (2) Download the required scripts if not already present + [[ -d bin ]] || mkdir bin + [[ -f bin/notify-on-diff ]] || $download_cmd bin/notify-on-diff {params.notify_on_diff_url} + chmod +x bin/* + + # (3) Run the script ./bin/notify-on-diff {input.metadata} {params.s3_src:q}/metadata.tsv.gz """ diff --git a/ingest/workflow/snakemake_rules/transform.smk b/ingest/workflow/snakemake_rules/transform.smk index 3b86dac..b33f0d9 100644 --- a/ingest/workflow/snakemake_rules/transform.smk +++ b/ingest/workflow/snakemake_rules/transform.smk @@ -20,7 +20,18 @@ rule fetch_general_geolocation_rules: geolocation_rules_url=config["transform"]["geolocation_rules_url"], shell: """ - curl {params.geolocation_rules_url} > {output.general_geolocation_rules} + # (1) Pick curl or wget based on availability + if which curl > /dev/null; then + download_cmd="curl -fsSL --output" + elif which wget > /dev/null; then + download_cmd="wget -O" + else + echo "ERROR: Neither curl nor wget found. Please install one of them." + exit 1 + fi + + # (2) Fetch general geolocation rules + $download_cmd {output.general_geolocation_rules} {params.geolocation_rules_url} """ @@ -73,22 +84,30 @@ rule transform: ndjson_to_tsv_and_fasta_url="https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/ndjson-to-tsv-and-fasta", shell: """ - if [[ ! -d bin ]]; then - mkdir bin + # (1) Pick curl or wget based on availability + if which curl > /dev/null; then + download_cmd="curl -fsSL --output" + elif which wget > /dev/null; then + download_cmd="wget -O" + else + echo "ERROR: Neither curl nor wget found. Please install one of them." + exit 1 fi - cd bin - [[ -f transform-field-names ]] || wget {params.transform_field_names_url} - [[ -f transform-string-fields ]] || wget {params.transform_string_fields_url} - [[ -f transform-strain-names ]] || wget {params.transform_strain_names_url} - [[ -f transform-date-fields ]] || wget {params.transform_date_fields_url} - [[ -f transform-genbank-location ]] || wget {params.transform_genbank_location_url} - [[ -f transform-authors ]] || wget {params.transform_authors_url} - [[ -f apply-geolocation-rules ]] || wget {params.apply_geolocation_rules_url} - [[ -f merge-user-metadata ]] || wget {params.merge_user_metadata_url} - [[ -f ndjson-to-tsv-and-fasta ]] || wget {params.ndjson_to_tsv_and_fasta_url} - chmod 755 * - cd .. + # (2) Download the required scripts if not already present + [[ -d bin ]] || mkdir bin + [[ -f bin/transform-field-names ]] || $download_cmd bin/transform-field-names {params.transform_field_names_url} + [[ -f bin/transform-string-fields ]] || $download_cmd bin/transform-string-fields {params.transform_string_fields_url} + [[ -f bin/transform-strain-names ]] || $download_cmd bin/transform-strain-names {params.transform_strain_names_url} + [[ -f bin/transform-date-fields ]] || $download_cmd bin/transform-date-fields {params.transform_date_fields_url} + [[ -f bin/transform-genbank-location ]] || $download_cmd bin/transform-genbank-location {params.transform_genbank_location_url} + [[ -f bin/transform-authors ]] || $download_cmd bin/transform-authors {params.transform_authors_url} + [[ -f bin/apply-geolocation-rules ]] || $download_cmd bin/apply-geolocation-rules {params.apply_geolocation_rules_url} + [[ -f bin/merge-user-metadata ]] || $download_cmd bin/merge-user-metadata {params.merge_user_metadata_url} + [[ -f bin/ndjson-to-tsv-and-fasta ]] || $download_cmd bin/ndjson-to-tsv-and-fasta {params.ndjson_to_tsv_and_fasta_url} + chmod +x bin/* + + # (3) Transform the sequences (cat {input.sequences_ndjson} \ | ./bin/transform-field-names \ --field-map {params.field_map} \ @@ -131,14 +150,22 @@ rule post_process_metadata: shell: """ - if [[ ! -d bin ]]; then - mkdir bin + # (1) Pick curl or wget based on availability + if which curl > /dev/null; then + download_cmd="curl -fsSL --output" + elif which wget > /dev/null; then + download_cmd="wget -O" + else + echo "ERROR: Neither curl nor wget found. Please install one of them." + exit 1 fi - cd bin - [[ -f post_process_metadata.py ]] || wget {params.post_process_metadata_url} - chmod 755 * - cd .. + # (2) Download the required scripts if not already present + [[ -d bin ]] || mkdir bin + [[ -f bin/post_process_metadata.py ]] || $download_cmd bin/post_process_metadata.py {params.post_process_metadata_url} + chmod +x bin/* + + # (3) Post-process the metadata ./bin/post_process_metadata.py --metadata {input.metadata} --outfile {output.metadata} """ diff --git a/ingest/workflow/snakemake_rules/trigger_rebuild.smk b/ingest/workflow/snakemake_rules/trigger_rebuild.smk index 1a1cade..f6fb5e5 100644 --- a/ingest/workflow/snakemake_rules/trigger_rebuild.smk +++ b/ingest/workflow/snakemake_rules/trigger_rebuild.smk @@ -17,14 +17,21 @@ rule trigger_build: trigger_on_new_data_url="https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/trigger-on-new-data" shell: """ - if [[ ! -d bin ]]; then - mkdir bin - fi - if [[ ! -f bin/trigger-on-new-data ]]; then - cd bin - wget {params.trigger_on_new_data_url} - chmod 755 * - cd .. + # (1) Pick curl or wget based on availability + if which curl > /dev/null; then + download_cmd="curl -fsSL --output" + elif which wget > /dev/null; then + download_cmd="wget -O" + else + echo "ERROR: Neither curl nor wget found. Please install one of them." + exit 1 fi + + # (2) Download the required scripts if not already present + [[ -d bin ]] || mkdir bin + [[ -f bin/trigger-on-new-data ]] || $download_cmd bin/trigger-on-new-data {params.trigger_on_new_data_url} + chmod +x bin/* + + # (3) Trigger the build ./bin/trigger-on-new-data {input.metadata_upload} {input.fasta_upload} """ diff --git a/ingest/workflow/snakemake_rules/upload.smk b/ingest/workflow/snakemake_rules/upload.smk index db2de41..acc4cf8 100644 --- a/ingest/workflow/snakemake_rules/upload.smk +++ b/ingest/workflow/snakemake_rules/upload.smk @@ -59,15 +59,24 @@ rule upload_to_s3: cloudfront_invalidate_url="https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/cloudfront-invalidate" shell: """ - if [[ ! -d bin ]]; then - mkdir bin + # (1) Pick curl or wget based on availability + if which curl > /dev/null; then + download_cmd="curl -fsSL --output" + elif which wget > /dev/null; then + download_cmd="wget -O" + else + echo "ERROR: Neither curl nor wget found. Please install one of them." + exit 1 fi - cd bin - [[ -f upload-to-s3 ]] || wget {params.upload_to_s3_url} - [[ -f sha256sum ]] || wget {params.sha256sum_url} - [[ -f cloudfront-invalidate ]] || wget {params.cloudfront_invalidate_url} - chmod 755 * - cd .. + + # (2) Download the required scripts if not already present + [[ -d bin ]] || mkdir bin + [[ -f bin/upload-to-s3 ]] || $download_cmd bin/upload-to-s3 {params.upload_to_s3_url} + [[ -f bin/sha256sum ]] || $download_cmd bin/sha256sum {params.sha256sum_url} + [[ -f bin/cloudfront-invalidate ]] || $download_cmd bin/cloudfront-invalidate {params.cloudfront_invalidate_url} + chmod +x bin/* + + # (3) Run the upload script ./bin/upload-to-s3 \ {params.quiet} \ {input.file_to_upload:q} \