-
Notifications
You must be signed in to change notification settings - Fork 129
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #891 from nextstrain/fix-single-trait-inference
Fix single trait inference
- Loading branch information
Showing
6 changed files
with
240 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
Integration tests for augur traits. | ||
|
||
$ pushd "$TESTDIR" > /dev/null | ||
$ export AUGUR="../../bin/augur" | ||
|
||
Infer the ancestral region for a given tree and metadata. | ||
|
||
$ ${AUGUR} traits \ | ||
> --metadata "traits/metadata.tsv" \ | ||
> --tree "traits/tree.nwk" \ | ||
> --columns region \ | ||
> --output-node-data "$TMP/traits.json" > /dev/null | ||
|
||
$ python3 "$TESTDIR/../../scripts/diff_jsons.py" "traits/traits_region.json" "$TMP/traits.json" --significant-digits 5 | ||
{} | ||
$ rm -f "$TMP/traits.json" | ||
|
||
Infer the ancestral "virus" value from the same metadata. | ||
Since there is only a single virus in the data, Augur warns the user through stderr. | ||
|
||
$ ${AUGUR} traits \ | ||
> --metadata "traits/metadata.tsv" \ | ||
> --tree "traits/tree.nwk" \ | ||
> --columns virus \ | ||
> --output-node-data "$TMP/traits.json" > /dev/null | ||
WARNING: only one state found for discrete state reconstruction: ['zika'] | ||
|
||
$ python3 "$TESTDIR/../../scripts/diff_jsons.py" "traits/traits_virus.json" "$TMP/traits.json" --significant-digits 5 | ||
{} | ||
$ rm -f "$TMP/traits.json" | ||
|
||
Repeat inference of a trait with a single value, but request confidence intervals. | ||
This should similarly warn the user through stderr, but it should produce an error. | ||
|
||
$ ${AUGUR} traits \ | ||
> --metadata "traits/metadata.tsv" \ | ||
> --tree "traits/tree.nwk" \ | ||
> --columns virus \ | ||
> --confidence \ | ||
> --output-node-data "$TMP/traits.json" > /dev/null | ||
WARNING: only one state found for discrete state reconstruction: ['zika'] | ||
|
||
$ python3 "$TESTDIR/../../scripts/diff_jsons.py" "traits/traits_virus.json" "$TMP/traits.json" --significant-digits 5 | ||
{} | ||
$ rm -f "$TMP/traits.json" | ||
|
||
Switch back to the original directory where testing started. | ||
|
||
$ popd > /dev/null |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
strain virus accession date region country division city db segment authors url title journal paper_url | ||
PAN/CDC_259359_V1_V3/2015 zika KX156774 2015-12-18 North America Panama Panama Panama genbank genome Shabman et al https://www.ncbi.nlm.nih.gov/nuccore/KX156774 Direct Submission Submitted (29-APR-2016) J. Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD 20850, USA https://www.ncbi.nlm.nih.gov/pubmed/ | ||
COL/FLR_00024/2015 zika MF574569 2015-12-XX South America Colombia Colombia Colombia genbank genome Pickett et al https://www.ncbi.nlm.nih.gov/nuccore/MF574569 Direct Submission Submitted (28-JUL-2017) J. Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD 20850, USA https://www.ncbi.nlm.nih.gov/pubmed/ | ||
PRVABC59 zika KU501215 2015-12-XX North America Puerto Rico Puerto Rico Puerto Rico genbank genome Lanciotti et al https://www.ncbi.nlm.nih.gov/nuccore/KU501215 Phylogeny of Zika Virus in Western Hemisphere, 2015 Emerging Infect. Dis. 22 (5), 933-935 (2016) https://www.ncbi.nlm.nih.gov/pubmed/27088323 | ||
COL/FLR_00008/2015 zika MF574562 2015-12-XX South America Colombia Colombia Colombia genbank genome Pickett et al https://www.ncbi.nlm.nih.gov/nuccore/MF574562 Direct Submission Submitted (28-JUL-2017) J. Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD 20850, USA https://www.ncbi.nlm.nih.gov/pubmed/ | ||
Colombia/2016/ZC204Se zika KY317939 2016-01-06 South America Colombia Colombia Colombia genbank genome Quick et al https://www.ncbi.nlm.nih.gov/nuccore/KY317939 Multiplex PCR method for MinION and Illumina sequencing of Zika and other virus genomes directly from clinical samples Nat Protoc 12 (6), 1261-1276 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28538739 | ||
ZKC2/2016 zika KX253996 2016-02-16 Oceania American Samoa American Samoa American Samoa genbank genome Wu et al https://www.ncbi.nlm.nih.gov/nuccore/KX253996 Direct Submission Submitted (18-MAY-2016) Center for Diseases Control and Prevention of Guangdong Province; National Institute of Viral Disease Control and Prevention, China https://www.ncbi.nlm.nih.gov/pubmed/ | ||
VEN/UF_1/2016 zika KX702400 2016-03-25 South America Venezuela Venezuela Venezuela genbank genome Blohm et al https://www.ncbi.nlm.nih.gov/nuccore/KX702400 Complete Genome Sequences of Identical Zika virus Isolates in a Nursing Mother and Her Infant Genome Announc 5 (17), e00231-17 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28450510 | ||
DOM/2016/BB_0059 zika KY785425 2016-04-04 North America Dominican Republic Dominican Republic Dominican Republic genbank genome Metsky et al https://www.ncbi.nlm.nih.gov/nuccore/KY785425 Zika virus evolution and spread in the Americas Nature 546 (7658), 411-415 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28538734 | ||
BRA/2016/FC_6706 zika KY785433 2016-04-08 South America Brazil Brazil Brazil genbank genome Metsky et al https://www.ncbi.nlm.nih.gov/nuccore/KY785433 Zika virus evolution and spread in the Americas Nature 546 (7658), 411-415 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28538734 | ||
DOM/2016/BB_0183 zika KY785420 2016-04-18 North America Dominican Republic Dominican Republic Dominican Republic genbank genome Metsky et al https://www.ncbi.nlm.nih.gov/nuccore/KY785420 Zika virus evolution and spread in the Americas Nature 546 (7658), 411-415 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28538734 | ||
EcEs062_16 zika KX879603 2016-04-XX South America Ecuador Ecuador Ecuador genbank genome Marquez et al https://www.ncbi.nlm.nih.gov/nuccore/KX879603 First Complete Genome Sequences of Zika Virus Isolated from Febrile Patient Sera in Ecuador Genome Announc 5 (8), e01673-16 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28232448 | ||
HND/2016/HU_ME59 zika KY785418 2016-05-13 North America Honduras Honduras Honduras genbank genome Metsky et al https://www.ncbi.nlm.nih.gov/nuccore/KY785418 Zika virus evolution and spread in the Americas Nature 546 (7658), 411-415 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28538734 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
{ | ||
"generated_by": { | ||
"program": "augur", | ||
"version": "14.1.0" | ||
}, | ||
"models": { | ||
"region": { | ||
"alphabet": [ | ||
"North America", | ||
"Oceania", | ||
"South America", | ||
"?" | ||
], | ||
"equilibrium_probabilities": [ | ||
0.31005064170488, | ||
0.28662652026143665, | ||
0.40332283803368346 | ||
], | ||
"rate": 218.0279017803387, | ||
"transition_matrix": [ | ||
[ | ||
0.0, | ||
1.0971786118353992, | ||
2.204495361871908 | ||
], | ||
[ | ||
1.0971786118353992, | ||
0.0, | ||
1.0970462842057542 | ||
], | ||
[ | ||
2.204495361871908, | ||
1.0970462842057542, | ||
0.0 | ||
] | ||
] | ||
} | ||
}, | ||
"nodes": { | ||
"BRA/2016/FC_6706": { | ||
"region": "South America" | ||
}, | ||
"COL/FLR_00008/2015": { | ||
"region": "South America" | ||
}, | ||
"Colombia/2016/ZC204Se": { | ||
"region": "South America" | ||
}, | ||
"DOM/2016/BB_0183": { | ||
"region": "North America" | ||
}, | ||
"EcEs062_16": { | ||
"region": "South America" | ||
}, | ||
"HND/2016/HU_ME59": { | ||
"region": "North America" | ||
}, | ||
"NODE_0000001": { | ||
"region": "South America" | ||
}, | ||
"NODE_0000002": { | ||
"region": "North America" | ||
}, | ||
"NODE_0000003": { | ||
"region": "North America" | ||
}, | ||
"NODE_0000004": { | ||
"region": "North America" | ||
}, | ||
"NODE_0000005": { | ||
"region": "South America" | ||
}, | ||
"NODE_0000006": { | ||
"region": "South America" | ||
}, | ||
"NODE_0000007": { | ||
"region": "South America" | ||
}, | ||
"NODE_0000008": { | ||
"region": "South America" | ||
}, | ||
"PAN/CDC_259359_V1_V3/2015": { | ||
"region": "North America" | ||
}, | ||
"PRVABC59": { | ||
"region": "North America" | ||
}, | ||
"VEN/UF_1/2016": { | ||
"region": "South America" | ||
}, | ||
"ZKC2/2016": { | ||
"region": "Oceania" | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
{ | ||
"generated_by": { | ||
"program": "augur", | ||
"version": "14.1.0" | ||
}, | ||
"models": {}, | ||
"nodes": { | ||
"BRA/2016/FC_6706": { | ||
"virus": "zika" | ||
}, | ||
"COL/FLR_00008/2015": { | ||
"virus": "zika" | ||
}, | ||
"Colombia/2016/ZC204Se": { | ||
"virus": "zika" | ||
}, | ||
"DOM/2016/BB_0183": { | ||
"virus": "zika" | ||
}, | ||
"EcEs062_16": { | ||
"virus": "zika" | ||
}, | ||
"HND/2016/HU_ME59": { | ||
"virus": "zika" | ||
}, | ||
"NODE_0000001": { | ||
"virus": "zika" | ||
}, | ||
"NODE_0000002": { | ||
"virus": "zika" | ||
}, | ||
"NODE_0000003": { | ||
"virus": "zika" | ||
}, | ||
"NODE_0000004": { | ||
"virus": "zika" | ||
}, | ||
"NODE_0000005": { | ||
"virus": "zika" | ||
}, | ||
"NODE_0000006": { | ||
"virus": "zika" | ||
}, | ||
"NODE_0000007": { | ||
"virus": "zika" | ||
}, | ||
"NODE_0000008": { | ||
"virus": "zika" | ||
}, | ||
"PAN/CDC_259359_V1_V3/2015": { | ||
"virus": "zika" | ||
}, | ||
"PRVABC59": { | ||
"virus": "zika" | ||
}, | ||
"VEN/UF_1/2016": { | ||
"virus": "zika" | ||
}, | ||
"ZKC2/2016": { | ||
"virus": "zika" | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
((Colombia/2016/ZC204Se:0.00105368,(PAN/CDC_259359_V1_V3/2015:0.00076051,(COL/FLR_00008/2015:0.00044440,VEN/UF_1/2016:0.00089377)NODE_0000008:0.00038502)NODE_0000007:0.00019253)NODE_0000001:0.00080159,(BRA/2016/FC_6706:0.00214920,(ZKC2/2016:0.00173693,(HND/2016/HU_ME59:0.00206150,PRVABC59:0.00135309)NODE_0000004:0.00013537,(EcEs062_16:0.00175918,DOM/2016/BB_0183:0.00184905)NODE_0000002:0.00021565)NODE_0000003:0.00013737)NODE_0000005:0.00019772)NODE_0000006:0.00100000; |