From 571083e345e9a0c68a19a6f9f86aed5e7fe43f34 Mon Sep 17 00:00:00 2001 From: j23414 Date: Thu, 29 Dec 2022 17:49:44 -0800 Subject: [PATCH] Pick curl or wget based on availability Discussed in https://github.com/nextstrain/ebola/pull/6#discussion_r1048835183 --- .../snakemake_rules/fetch_sequences.smk | 23 ++++++--- .../snakemake_rules/slack_notifications.smk | 45 +++++++++++------ ingest/workflow/snakemake_rules/transform.smk | 49 +++++++++++++------ .../snakemake_rules/trigger_rebuild.smk | 23 ++++++--- ingest/workflow/snakemake_rules/upload.smk | 25 +++++++--- 5 files changed, 110 insertions(+), 55 deletions(-) diff --git a/ingest/workflow/snakemake_rules/fetch_sequences.smk b/ingest/workflow/snakemake_rules/fetch_sequences.smk index fbf9bfe3..f6348509 100644 --- a/ingest/workflow/snakemake_rules/fetch_sequences.smk +++ b/ingest/workflow/snakemake_rules/fetch_sequences.smk @@ -31,15 +31,22 @@ rule fetch_from_genbank: csv_to_ndjson_url="https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/csv-to-ndjson", shell: """ - if [[ ! -d bin ]]; then - mkdir bin - fi - if [[ ! -f bin/csv-to-ndjson ]]; then - cd bin - wget {params.csv_to_ndjson_url} - chmod 755 * - cd .. + # (1) Pick curl or wget based on availability + if which curl > /dev/null; then + download_cmd="curl -fsSL --output" + elif which wget > /dev/null; then + download_cmd="wget -O" + else + echo "ERROR: Neither curl nor wget found. Please install one of them." + exit 1 fi + + # (2) Download the required scripts if not already present + [[ -d bin ]] || mkdir bin + [[ -f bin/csv-to-ndjson ]] || $download_cmd bin/csv-to-ndjson {params.csv_to_ndjson_url} + chmod +x bin/* + + # (3) Fetch sequences from GenBank ./bin/fetch-from-genbank {params.serotype_tax_id} > {output.genbank_ndjson} """ diff --git a/ingest/workflow/snakemake_rules/slack_notifications.smk b/ingest/workflow/snakemake_rules/slack_notifications.smk index 91d5ffc2..21dd3ccf 100644 --- a/ingest/workflow/snakemake_rules/slack_notifications.smk +++ b/ingest/workflow/snakemake_rules/slack_notifications.smk @@ -31,16 +31,22 @@ rule notify_on_genbank_record_change: notify_on_record_change_url="https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/notify-on-record-change", shell: """ - if [[ ! -d bin ]]; then - mkdir bin - fi - if [[ ! -f bin/notify-on-record-change ]]; then - cd bin - wget {params.notify_on_record_change_url} - chmod 755 - cd .. + # (1) Pick curl or wget based on availability + if which curl > /dev/null; then + download_cmd="curl -fsSL --output" + elif which wget > /dev/null; then + download_cmd="wget -O" + else + echo "ERROR: Neither curl nor wget found. Please install one of them." + exit 1 fi + # (2) Download the required scripts if not already present + [[ -d bin ]] || mkdir bin + [[ -f bin/notify-on-record-change ]] || $download_cmd bin/notify-on-record-change {params.notify_on_record_change_url} + chmod +x bin/* + + # (3) Run the script ./bin/notify-on-record-change {input.genbank_ndjson} {params.s3_src:q}/genbank.ndjson.xz Genbank """ @@ -55,15 +61,22 @@ rule notify_on_metadata_diff: notify_on_diff_url = "https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/notify-on-diff", shell: """ - if [[ ! -d bin ]]; then - mkdir bin - fi - if [[ ! -f bin/notify-on-diff ]]; then - cd bin - wget {params.notify_on_diff_url} - chmod 755 - cd .. + # (1) Pick curl or wget based on availability + if which curl > /dev/null; then + download_cmd="curl -fsSL --output" + elif which wget > /dev/null; then + download_cmd="wget -O" + else + echo "ERROR: Neither curl nor wget found. Please install one of them." + exit 1 fi + + # (2) Download the required scripts if not already present + [[ -d bin ]] || mkdir bin + [[ -f bin/notify-on-diff ]] || $download_cmd bin/notify-on-diff {params.notify_on_diff_url} + chmod +x bin/* + + # (3) Run the script ./bin/notify-on-diff {input.metadata} {params.s3_src:q}/metadata.tsv.gz """ diff --git a/ingest/workflow/snakemake_rules/transform.smk b/ingest/workflow/snakemake_rules/transform.smk index d70eaa6c..568b30f5 100644 --- a/ingest/workflow/snakemake_rules/transform.smk +++ b/ingest/workflow/snakemake_rules/transform.smk @@ -20,7 +20,18 @@ rule fetch_general_geolocation_rules: geolocation_rules_url=config["transform"]["geolocation_rules_url"], shell: """ - curl {params.geolocation_rules_url} > {output.general_geolocation_rules} + # (1) Pick curl or wget based on availability + if which curl > /dev/null; then + download_cmd="curl -fsSL --output" + elif which wget > /dev/null; then + download_cmd="wget -O" + else + echo "ERROR: Neither curl nor wget found. Please install one of them." + exit 1 + fi + + # (2) Fetch general geolocation rules + $download_cmd {output.general_geolocation_rules} {params.geolocation_rules_url} """ @@ -73,22 +84,30 @@ rule transform: ndjson_to_tsv_and_fasta_url="https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/ndjson-to-tsv-and-fasta", shell: """ - if [[ ! -d bin ]]; then - mkdir bin + # (1) Pick curl or wget based on availability + if which curl > /dev/null; then + download_cmd="curl -fsSL --output" + elif which wget > /dev/null; then + download_cmd="wget -O" + else + echo "ERROR: Neither curl nor wget found. Please install one of them." + exit 1 fi - cd bin - [[ -f transform-field-names ]] || wget {params.transform_field_names_url} - [[ -f transform-string-fields ]] || wget {params.transform_string_fields_url} - [[ -f transform-strain-names ]] || wget {params.transform_strain_names_url} - [[ -f transform-date-fields ]] || wget {params.transform_date_fields_url} - [[ -f transform-genbank-location ]] || wget {params.transform_genbank_location_url} - [[ -f transform-authors ]] || wget {params.transform_authors_url} - [[ -f apply-geolocation-rules ]] || wget {params.apply_geolocation_rules_url} - [[ -f merge-user-metadata ]] || wget {params.merge_user_metadata_url} - [[ -f ndjson-to-tsv-and-fasta ]] || wget {params.ndjson_to_tsv_and_fasta_url} - chmod 755 * - cd .. + # (2) Download the required scripts if not already present + [[ -d bin ]] || mkdir bin + [[ -f bin/transform-field-names ]] || $download_cmd bin/transform-field-names {params.transform_field_names_url} + [[ -f bin/transform-string-fields ]] || $download_cmd bin/transform-string-fields {params.transform_string_fields_url} + [[ -f bin/transform-strain-names ]] || $download_cmd bin/transform-strain-names {params.transform_strain_names_url} + [[ -f bin/transform-date-fields ]] || $download_cmd bin/transform-date-fields {params.transform_date_fields_url} + [[ -f bin/transform-genbank-location ]] || $download_cmd bin/transform-genbank-location {params.transform_genbank_location_url} + [[ -f bin/transform-authors ]] || $download_cmd bin/transform-authors {params.transform_authors_url} + [[ -f bin/apply-geolocation-rules ]] || $download_cmd bin/apply-geolocation-rules {params.apply_geolocation_rules_url} + [[ -f bin/merge-user-metadata ]] || $download_cmd bin/merge-user-metadata {params.merge_user_metadata_url} + [[ -f bin/ndjson-to-tsv-and-fasta ]] || $download_cmd bin/ndjson-to-tsv-and-fasta {params.ndjson_to_tsv_and_fasta_url} + chmod +x bin/* + + # (3) Transform the sequences (cat {input.sequences_ndjson} \ | ./bin/transform-field-names \ --field-map {params.field_map} \ diff --git a/ingest/workflow/snakemake_rules/trigger_rebuild.smk b/ingest/workflow/snakemake_rules/trigger_rebuild.smk index aa5e52a1..122931e5 100644 --- a/ingest/workflow/snakemake_rules/trigger_rebuild.smk +++ b/ingest/workflow/snakemake_rules/trigger_rebuild.smk @@ -17,14 +17,21 @@ rule trigger_build: trigger_on_new_data_url="https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/trigger-on-new-data" shell: """ - if [[ ! -d bin ]]; then - mkdir bin - fi - if [[ ! -f bin/trigger-on-new-data ]]; then - cd bin - wget {params.trigger_on_new_data_url} - chmod 755 * - cd .. + # (1) Pick curl or wget based on availability + if which curl > /dev/null; then + download_cmd="curl -fsSL --output" + elif which wget > /dev/null; then + download_cmd="wget -O" + else + echo "ERROR: Neither curl nor wget found. Please install one of them." + exit 1 fi + + # (2) Download the required scripts if not already present + [[ -d bin ]] || mkdir bin + [[ -f bin/trigger-on-new-data ]] || $download_cmd bin/trigger-on-new-data {params.trigger_on_new_data_url} + chmod +x bin/* + + # (3) Trigger the build ./bin/trigger-on-new-data {input.metadata_upload} {input.fasta_upload} """ diff --git a/ingest/workflow/snakemake_rules/upload.smk b/ingest/workflow/snakemake_rules/upload.smk index db2de41c..67b99d43 100644 --- a/ingest/workflow/snakemake_rules/upload.smk +++ b/ingest/workflow/snakemake_rules/upload.smk @@ -59,15 +59,24 @@ rule upload_to_s3: cloudfront_invalidate_url="https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/cloudfront-invalidate" shell: """ - if [[ ! -d bin ]]; then - mkdir bin + # (1) Pick curl or wget based on availability + if which curl > /dev/null; then + download_cmd="curl -fsSL --output" + elif which wget > /dev/null; then + download_cmd="wget -O" + else + echo "ERROR: Neither curl nor wget found. Please install one of them." + exit 1 fi - cd bin - [[ -f upload-to-s3 ]] || wget {params.upload_to_s3_url} - [[ -f sha256sum ]] || wget {params.sha256sum_url} - [[ -f cloudfront-invalidate ]] || wget {params.cloudfront_invalidate_url} - chmod 755 * - cd .. + + # (2) Download the required scripts if not already present + [[ -d bin ]] || mkdir bin + [[ -f bin/upload-to-s3 ]] || $download_cmd bin/upload-to-s3 {params.upload_to_s3_url} + [[ -f bin/sha256sum ]] || $download_cmd bin/sha256sum {params.sha256sum_url} + [[ -f bin/cloudfront-invalidate ]] || $download_cmd bin/cloudfront-invalidate {params.cloudfront_invalidate_url} + chmod +x bin/* + + # (3) Run the upload script ./bin/upload-to-s3 \ {params.quiet} \ {input.file_to_upload:q} \