From 72fc8e29a9feb560459061234a5c2b06d0e64129 Mon Sep 17 00:00:00 2001 From: zyxue Date: Thu, 4 Feb 2021 22:21:12 -0800 Subject: [PATCH] updated README and docstring --- README.md | 6 +++++- ncbitax2lin/ncbitax2lin.py | 14 +++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 03af8dc..c3f1a47 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ mkdir -p taxdump && tar zxf taxdump.tar.gz -C ./taxdump Then, run ncbitax2lin ```bash -ncbitax2lin taxdump/nodes.dmp taxdump/names.dmp +ncbitax2lin --nodes-file taxdump/nodes.dmp --names-file taxdump/names.dmp ``` By default, the generated lineages will be saved to @@ -62,3 +62,7 @@ of a different timestamp. ## Used in * Mahmoudabadi, G., & Phillips, R. (2018). A comprehensive and quantitative exploration of thousands of viral genomes. ELife, 7. https://doi.org/10.7554/eLife.31955 +* Dombrowski, N. et al. (2020) Undinarchaeota illuminate DPANN phylogeny and the impact of gene transfer on archaeal evolution, Nature Communications. Springer US, 11(1). doi: 10.1038/s41467-020-17408-w. https://www.nature.com/articles/s41467-020-17408-w +* Schenberger Santos, A. R. et al. (2020) NAD+ biosynthesis in bacteria is controlled by global carbon/ nitrogen levels via PII signaling, Journal of Biological Chemistry, 295(18), pp. 6165–6176. doi: 10.1074/jbc.RA120.012793. https://www.sciencedirect.com/science/article/pii/S0021925817482433 +* Villada, J. C., Duran, M. F. and Lee, P. K. H. (2020) Interplay between Position-Dependent Codon Usage Bias and Hydrogen Bonding at the 5' End of ORFeomes, mSystems, 5(4), pp. 1–18. doi: 10.1128/msystems.00613-20. https://msystems.asm.org/content/5/4/e00613-20 +* Byadgi, O. et al. (2020) Transcriptome analysis of amyloodinium ocellatum tomonts revealed basic information on the major potential virulence factors, Genes, 11(11), pp. 1–12. doi: 10.3390/genes11111252. https://www.mdpi.com/2073-4425/11/11/1252 diff --git a/ncbitax2lin/ncbitax2lin.py b/ncbitax2lin/ncbitax2lin.py index ef3828d..b773531 100755 --- a/ncbitax2lin/ncbitax2lin.py +++ b/ncbitax2lin/ncbitax2lin.py @@ -29,7 +29,7 @@ class TaxUnit(TypedDict): rank_name: str -# the strings are tax_id, rank, rank_name +# A lineage is a list of (tax_id, rank, rank_name) tuples. Lineage = NewType("Lineage", List[Tuple[int, str, str]]) # set TAXONOMY_DICT as global variable so it can work with multiprocess.Pool @@ -105,17 +105,17 @@ def convert_lineage_to_dict(lineage: Lineage) -> Dict[str, Union[int, str]]: """Converts the lineage in a list-of-tuples represetantion to a dictionary representation [ - (tax_id1, rank1, name_txt1), - (tax_id2, rank2, name_txt2), + ("tax_id1", "rank1", "name_txt1"), + ("tax_id2", "rank2", "name_txt2"), ... ] becomes { - rank1: name_txt1, - rank2: name_txt2, - tax_id, tax_id2, # using the last rank as the tax_id of this lineage + "rank1": "name_txt1", + "rank2": "name_txt2", + "tax_id": "tax_id2", # using the last rank as the tax_id of this lineage } A concrete example: @@ -129,8 +129,8 @@ def convert_lineage_to_dict(lineage: Lineage) -> Dict[str, Union[int, str]]: { 'no rank': 'cellular organisms', + 'superkingdom': 'Bacteria', 'tax_id': 2, - 'superkingdom': 'Bacteria' } """