Skip to content

Commit

Permalink
fix: incorporate changes from nextstrain/mpox#146
Browse files Browse the repository at this point in the history
  • Loading branch information
j23414 committed Apr 13, 2023
1 parent 2d9113b commit e48cb99
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 18 deletions.
7 changes: 4 additions & 3 deletions ingest/bin/fetch-from-genbank
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
set -euo pipefail

bin="$(dirname "$0")"
TAXID="${1:?NCBI taxon id is required.}"


main() {
fetch "$TAXID" | "$bin"/csv-to-ndjson
local ncbi_taxon_id="${1:?NCBI taxon id is required.}"
fetch "$ncbi_taxon_id" | "$bin"/csv-to-ndjson
}

fetch() {
curl "$("$bin"/genbank-url --taxonid "$1")" \
curl "$("$bin"/genbank-url --ncbi-taxon-id "$1")" \
--fail --silent --show-error --http1.1 \
--header 'User-Agent: https://github.com/nextstrain/dengue (hello@nextstrain.org)'
}
Expand Down
22 changes: 7 additions & 15 deletions ingest/bin/genbank-url
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,19 @@ import argparse


def parse_args():
"""
Define parse args
"""
parser = argparse.ArgumentParser(
description="Given an NCBI taxon ID, generate URL to download "
"all viral sequences and their curated metadata from GenBank via NCBI Virus."
)
parser.add_argument(
"--taxonid",
"--ncbi-taxon-id",
help="NCBI Taxon ID.",
default="12637",
required=True
)
return parser.parse_args()


def build_query_url(ncbi_id: str):
def build_query_url(ncbi_taxon_id: str):
"""
Generate URL to download all viral sequences and their curated metadata
from GenBank via NCBI Virus.
Expand All @@ -42,7 +39,7 @@ def build_query_url(ncbi_id: str):
# Search criteria
'fq': [
'{!tag=SeqType_s}SeqType_s:("Nucleotide")', # Nucleotide sequences (as opposed to protein)
f'VirusLineageId_ss:({ncbi_id})', # NCBI Taxon id for virus
f'VirusLineageId_ss:({ncbi_taxon_id})', # NCBI Taxon id for virus
'Division_s:("VRL")', # Restrict to viral sequences, avoid PAT
'{!tag=SLen_i}SLen_i:([5000 TO 15000])', # Longer than 5K bp, shorter than 15k bp
#'{!tag=UpdateDate_dt}UpdateDate_dt:([2022-01-01T00:00:00.00Z TO 2022-12-01T00:00:00.00Z ])', # recent
Expand Down Expand Up @@ -90,18 +87,13 @@ def build_query_url(ncbi_id: str):
# to be nice.
'email': 'hello@nextstrain.org',
}
query = urlencode(params, doseq=True, encoding="utf-8")
query = urlencode(params, doseq = True, encoding = "utf-8")

print(f"{endpoint}?{query}")


def main():
"""
Main method
"""
args = parse_args()
build_query_url(args.taxonid)

build_query_url(args.ncbi_taxon_id)

if __name__ == "__main__":
if __name__ == '__main__':
main()

0 comments on commit e48cb99

Please sign in to comment.