Skip to content

Commit

Permalink
Pick curl or wget based on availability
Browse files Browse the repository at this point in the history
  • Loading branch information
j23414 authored and j23414 committed Jun 20, 2023
1 parent 5a2c5c4 commit 6ad1636
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 73 deletions.
40 changes: 28 additions & 12 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,22 @@ rule wrangle_metadata:
wrangle_metadata_url="https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/scripts/wrangle_metadata.py",
shell:
"""
if [[ ! -d bin ]]; then
mkdir bin
# (1) Pick curl or wget based on availability
if which curl > /dev/null; then
download_cmd="curl -fsSL --output"
elif which wget > /dev/null; then
download_cmd="wget -O"
else
echo "ERROR: Neither curl nor wget found. Please install one of them."
exit 1
fi
cd bin
[[ -f wrangle_metadata.py ]] || wget {params.wrangle_metadata_url}
chmod 755 *
cd ..
# (2) Download the required scripts if not already present
[[ -d bin ]] || mkdir bin
[[ -f bin/wrangle_metadata.py ]] || $download_cmd bin/wrangle_metadata.py {params.wrangle_metadata_url}
chmod +x bin/*
# (3) Run the script
python3 ./bin/wrangle_metadata.py --metadata {input.metadata} \
--strain-id {params.strain_id} \
--output {output.metadata}
Expand Down Expand Up @@ -264,14 +272,22 @@ rule final_strain_name:
set_final_strain_name_url="https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/scripts/set_final_strain_name.py",
shell:
"""
if [[ ! -d bin ]]; then
mkdir bin
# (1) Pick curl or wget based on availability
if which curl > /dev/null; then
download_cmd="curl -fsSL --output"
elif which wget > /dev/null; then
download_cmd="wget -O"
else
echo "ERROR: Neither curl nor wget found. Please install one of them."
exit 1
fi
cd bin
[[ -f set_final_strain_name.py ]] || wget {params.set_final_strain_name_url}
chmod 755 *
cd ..
# (2) Download the required scripts if not already present
[[ -d bin ]] || mkdir bin
[[ -f bin/set_final_strain_name.py ]] || $download_cmd bin/set_final_strain_name.py {params.set_final_strain_name_url}
chmod +x bin/*
# (3) Run the script
python3 bin/set_final_strain_name.py \
--metadata {input.metadata} \
--input-auspice-json {input.auspice_json} \
Expand Down
25 changes: 17 additions & 8 deletions ingest/workflow/snakemake_rules/fetch_sequences.smk
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,24 @@ rule fetch_from_genbank:
genbank_url_url="https://raw.githubusercontent.com/nextstrain/dengue/ca659008bfbe4b3f799e11ecd106a0b95977fe93/ingest/bin/genbank-url", # Update if dengue merged
shell:
"""
if [[ ! -d bin ]]; then
mkdir bin
# (1) Pick curl or wget based on availability
if which curl > /dev/null; then
download_cmd="curl -fsSL --output"
elif which wget > /dev/null; then
download_cmd="wget -O"
else
echo "ERROR: Neither curl nor wget found. Please install one of them."
exit 1
fi
cd bin
[[ -f csv-to-ndjson ]] || wget {params.csv_to_ndjson_url}
[[ -f genbank-url ]] || wget {params.genbank_url_url}
[[ -f fetch-from-genbank ]] || wget {params.fetch_from_genbank_url}
chmod 755 *
cd ..
# (2) Download the required scripts if not already present
[[ -d bin ]] || mkdir bin
[[ -f bin/csv-to-ndjson ]] || $download_cmd bin/csv-to-ndjson {params.csv_to_ndjson_url}
[[ -f bin/genbank-url ]] || $download_cmd bin/genbank-url {params.genbank_url_url}
[[ -f bin/fetch-from-genbank ]] || $download_cmd bin/fetch-from-genbank {params.fetch_from_genbank_url}
chmod +x bin/*
# (3) Fetch the sequences
./bin/fetch-from-genbank {params.serotype_tax_id} > {output.genbank_ndjson}
"""

Expand Down
45 changes: 29 additions & 16 deletions ingest/workflow/snakemake_rules/slack_notifications.smk
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,22 @@ rule notify_on_genbank_record_change:
notify_on_record_change_url="https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/notify-on-record-change",
shell:
"""
if [[ ! -d bin ]]; then
mkdir bin
fi
if [[ ! -f bin/notify-on-record-change ]]; then
cd bin
wget {params.notify_on_record_change_url}
chmod 755
cd ..
# (1) Pick curl or wget based on availability
if which curl > /dev/null; then
download_cmd="curl -fsSL --output"
elif which wget > /dev/null; then
download_cmd="wget -O"
else
echo "ERROR: Neither curl nor wget found. Please install one of them."
exit 1
fi
# (2) Download the required scripts if not already present
[[ -d bin ]] || mkdir bin
[[ -f bin/notify-on-record-change ]] || $download_cmd bin/notify-on-record-change {params.notify_on_record_change_url}
chmod +x bin/*
# (3) Run the script
./bin/notify-on-record-change {input.genbank_ndjson} {params.s3_src:q}/genbank.ndjson.xz Genbank
"""

Expand All @@ -55,15 +61,22 @@ rule notify_on_metadata_diff:
notify_on_diff_url = "https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/notify-on-diff",
shell:
"""
if [[ ! -d bin ]]; then
mkdir bin
fi
if [[ ! -f bin/notify-on-diff ]]; then
cd bin
wget {params.notify_on_diff_url}
chmod 755
cd ..
# (1) Pick curl or wget based on availability
if which curl > /dev/null; then
download_cmd="curl -fsSL --output"
elif which wget > /dev/null; then
download_cmd="wget -O"
else
echo "ERROR: Neither curl nor wget found. Please install one of them."
exit 1
fi
# (2) Download the required scripts if not already present
[[ -d bin ]] || mkdir bin
[[ -f bin/notify-on-diff ]] || $download_cmd bin/notify-on-diff {params.notify_on_diff_url}
chmod +x bin/*
# (3) Run the script
./bin/notify-on-diff {input.metadata} {params.s3_src:q}/metadata.tsv.gz
"""

Expand Down
69 changes: 48 additions & 21 deletions ingest/workflow/snakemake_rules/transform.smk
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,18 @@ rule fetch_general_geolocation_rules:
geolocation_rules_url=config["transform"]["geolocation_rules_url"],
shell:
"""
curl {params.geolocation_rules_url} > {output.general_geolocation_rules}
# (1) Pick curl or wget based on availability
if which curl > /dev/null; then
download_cmd="curl -fsSL --output"
elif which wget > /dev/null; then
download_cmd="wget -O"
else
echo "ERROR: Neither curl nor wget found. Please install one of them."
exit 1
fi
# (2) Fetch general geolocation rules
$download_cmd {output.general_geolocation_rules} {params.geolocation_rules_url}
"""


Expand Down Expand Up @@ -73,22 +84,30 @@ rule transform:
ndjson_to_tsv_and_fasta_url="https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/ndjson-to-tsv-and-fasta",
shell:
"""
if [[ ! -d bin ]]; then
mkdir bin
# (1) Pick curl or wget based on availability
if which curl > /dev/null; then
download_cmd="curl -fsSL --output"
elif which wget > /dev/null; then
download_cmd="wget -O"
else
echo "ERROR: Neither curl nor wget found. Please install one of them."
exit 1
fi
cd bin
[[ -f transform-field-names ]] || wget {params.transform_field_names_url}
[[ -f transform-string-fields ]] || wget {params.transform_string_fields_url}
[[ -f transform-strain-names ]] || wget {params.transform_strain_names_url}
[[ -f transform-date-fields ]] || wget {params.transform_date_fields_url}
[[ -f transform-genbank-location ]] || wget {params.transform_genbank_location_url}
[[ -f transform-authors ]] || wget {params.transform_authors_url}
[[ -f apply-geolocation-rules ]] || wget {params.apply_geolocation_rules_url}
[[ -f merge-user-metadata ]] || wget {params.merge_user_metadata_url}
[[ -f ndjson-to-tsv-and-fasta ]] || wget {params.ndjson_to_tsv_and_fasta_url}
chmod 755 *
cd ..
# (2) Download the required scripts if not already present
[[ -d bin ]] || mkdir bin
[[ -f bin/transform-field-names ]] || $download_cmd bin/transform-field-names {params.transform_field_names_url}
[[ -f bin/transform-string-fields ]] || $download_cmd bin/transform-string-fields {params.transform_string_fields_url}
[[ -f bin/transform-strain-names ]] || $download_cmd bin/transform-strain-names {params.transform_strain_names_url}
[[ -f bin/transform-date-fields ]] || $download_cmd bin/transform-date-fields {params.transform_date_fields_url}
[[ -f bin/transform-genbank-location ]] || $download_cmd bin/transform-genbank-location {params.transform_genbank_location_url}
[[ -f bin/transform-authors ]] || $download_cmd bin/transform-authors {params.transform_authors_url}
[[ -f bin/apply-geolocation-rules ]] || $download_cmd bin/apply-geolocation-rules {params.apply_geolocation_rules_url}
[[ -f bin/merge-user-metadata ]] || $download_cmd bin/merge-user-metadata {params.merge_user_metadata_url}
[[ -f bin/ndjson-to-tsv-and-fasta ]] || $download_cmd bin/ndjson-to-tsv-and-fasta {params.ndjson_to_tsv_and_fasta_url}
chmod +x bin/*
# (3) Transform the sequences
(cat {input.sequences_ndjson} \
| ./bin/transform-field-names \
--field-map {params.field_map} \
Expand Down Expand Up @@ -131,14 +150,22 @@ rule post_process_metadata:

shell:
"""
if [[ ! -d bin ]]; then
mkdir bin
# (1) Pick curl or wget based on availability
if which curl > /dev/null; then
download_cmd="curl -fsSL --output"
elif which wget > /dev/null; then
download_cmd="wget -O"
else
echo "ERROR: Neither curl nor wget found. Please install one of them."
exit 1
fi
cd bin
[[ -f post_process_metadata.py ]] || wget {params.post_process_metadata_url}
chmod 755 *
cd ..
# (2) Download the required scripts if not already present
[[ -d bin ]] || mkdir bin
[[ -f bin/post_process_metadata.py ]] || $download_cmd bin/post_process_metadata.py {params.post_process_metadata_url}
chmod +x bin/*
# (3) Post-process the metadata
./bin/post_process_metadata.py --metadata {input.metadata} --outfile {output.metadata}
"""

Expand Down
23 changes: 15 additions & 8 deletions ingest/workflow/snakemake_rules/trigger_rebuild.smk
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,21 @@ rule trigger_build:
trigger_on_new_data_url="https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/trigger-on-new-data"
shell:
"""
if [[ ! -d bin ]]; then
mkdir bin
fi
if [[ ! -f bin/trigger-on-new-data ]]; then
cd bin
wget {params.trigger_on_new_data_url}
chmod 755 *
cd ..
# (1) Pick curl or wget based on availability
if which curl > /dev/null; then
download_cmd="curl -fsSL --output"
elif which wget > /dev/null; then
download_cmd="wget -O"
else
echo "ERROR: Neither curl nor wget found. Please install one of them."
exit 1
fi
# (2) Download the required scripts if not already present
[[ -d bin ]] || mkdir bin
[[ -f bin/trigger-on-new-data ]] || $download_cmd bin/trigger-on-new-data {params.trigger_on_new_data_url}
chmod +x bin/*
# (3) Trigger the build
./bin/trigger-on-new-data {input.metadata_upload} {input.fasta_upload}
"""
25 changes: 17 additions & 8 deletions ingest/workflow/snakemake_rules/upload.smk
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,24 @@ rule upload_to_s3:
cloudfront_invalidate_url="https://raw.githubusercontent.com/nextstrain/monkeypox/644d07ebe3fa5ded64d27d0964064fb722797c5d/ingest/bin/cloudfront-invalidate"
shell:
"""
if [[ ! -d bin ]]; then
mkdir bin
# (1) Pick curl or wget based on availability
if which curl > /dev/null; then
download_cmd="curl -fsSL --output"
elif which wget > /dev/null; then
download_cmd="wget -O"
else
echo "ERROR: Neither curl nor wget found. Please install one of them."
exit 1
fi
cd bin
[[ -f upload-to-s3 ]] || wget {params.upload_to_s3_url}
[[ -f sha256sum ]] || wget {params.sha256sum_url}
[[ -f cloudfront-invalidate ]] || wget {params.cloudfront_invalidate_url}
chmod 755 *
cd ..
# (2) Download the required scripts if not already present
[[ -d bin ]] || mkdir bin
[[ -f bin/upload-to-s3 ]] || $download_cmd bin/upload-to-s3 {params.upload_to_s3_url}
[[ -f bin/sha256sum ]] || $download_cmd bin/sha256sum {params.sha256sum_url}
[[ -f bin/cloudfront-invalidate ]] || $download_cmd bin/cloudfront-invalidate {params.cloudfront_invalidate_url}
chmod +x bin/*
# (3) Run the upload script
./bin/upload-to-s3 \
{params.quiet} \
{input.file_to_upload:q} \
Expand Down

0 comments on commit 6ad1636

Please sign in to comment.