Skip to content

ENH: attached rate and motif parameters to cogent3 tree object #49

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 79 additions & 4 deletions src/piqtree2/iqtree/_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,66 @@

from piqtree2.exceptions import ParseIqTreeError
from piqtree2.iqtree._decorator import iqtree_func
from piqtree2.model import Model
from piqtree2.model import DnaModel, Model

iq_build_tree = iqtree_func(iq_build_tree, hide_files=True)
iq_fit_tree = iqtree_func(iq_fit_tree, hide_files=True)

# the order defined in IQ-TREE
RATE_PARS = "A/C", "A/G", "A/T", "C/G", "C/T", "G/T"
MOTIF_PARS = "A", "C", "G", "T"


def _rename_iq_tree(tree: cogent3.PhyloNode, names: Sequence[str]) -> None:
for tip in tree.tips():
tip.name = names[int(tip.name)]


def _process_tree_yaml(tree_yaml: dict, names: Sequence[str]) -> cogent3.PhyloNode:
def _insert_edge_pars(tree: cogent3.PhyloNode, **kwargs: dict) -> None:
# inserts the edge parameters into each edge to match the structure of
# cogent3.PhyloNode
for node in tree.get_edge_vector():
# skip the rate parameters when node is the root
if node.is_root():
kwargs = {k: v for k, v in kwargs.items() if k == "mprobs"}
del node.params["edge_pars"]
node.params.update(kwargs)


def _edge_pars_for_cogent3(tree: cogent3.PhyloNode, model: Model) -> None:
rate_pars = tree.params["edge_pars"]["rates"]
motif_pars = {"mprobs": tree.params["edge_pars"]["mprobs"]}
# renames parameters to conform to cogent3's naming conventions
if model.substitution_model in {DnaModel.JC, DnaModel.F81}:
# skip rate_pars since rate parameters are constant in JC and F81
_insert_edge_pars(
tree,
**motif_pars,
)
return
if model.substitution_model in {DnaModel.K80, DnaModel.HKY}:
rate_pars = {"kappa": rate_pars["A/G"]}

elif model.substitution_model is DnaModel.TN:
rate_pars = {"kappa_r": rate_pars["A/G"], "kappa_y": rate_pars["C/T"]}

elif model.substitution_model is DnaModel.GTR:
del rate_pars["G/T"]

# applies global rate parameters to each edge
_insert_edge_pars(
tree,
**rate_pars,
**motif_pars,
)


def _process_tree_yaml(
tree_yaml: dict,
names: Sequence[str],
) -> cogent3.PhyloNode:
newick = tree_yaml["PhyloTree"]["newick"]

tree = cogent3.make_tree(newick)

candidates = tree_yaml["CandidateSet"]
Expand All @@ -36,7 +83,23 @@ def _process_tree_yaml(tree_yaml: dict, names: Sequence[str]) -> cogent3.PhyloNo

tree.params["lnL"] = likelihood

# parse only DNA model which is not in Lie model list
if "ModelDNA" in tree_yaml:
# converts the strings of rate and motif parameters into dictionary
tree.params["edge_pars"] = {
"rates": dict(
zip(RATE_PARS, map(float, tree_yaml["ModelDNA"]["rates"].split(", "))),
),
"mprobs": dict(
zip(
MOTIF_PARS,
map(float, tree_yaml["ModelDNA"]["state_freq"].split(", ")),
),
),
}

_rename_iq_tree(tree, names)

return tree


Expand Down Expand Up @@ -71,7 +134,13 @@ def build_tree(
seqs = [str(seq) for seq in aln.iter_seqs(names)]

yaml_result = yaml.safe_load(iq_build_tree(names, seqs, str(model), rand_seed))
return _process_tree_yaml(yaml_result, names)
tree = _process_tree_yaml(yaml_result, names)

# if edge parameters been extracted from IQ-TREE output,
# modify tree to mimic cogent3.PhyloNode
if "edge_pars" in tree.params:
_edge_pars_for_cogent3(tree, model)
return tree


def fit_tree(
Expand Down Expand Up @@ -112,4 +181,10 @@ def fit_tree(
yaml_result = yaml.safe_load(
iq_fit_tree(names, seqs, str(model), newick, rand_seed),
)
return _process_tree_yaml(yaml_result, names)
tree = _process_tree_yaml(yaml_result, names)

# if edge parameters been extracted from IQ-TREE output,
# modify tree to mimic cogent3.PhyloNode
if "edge_pars" in tree.params:
_edge_pars_for_cogent3(tree, model)
return tree
19 changes: 19 additions & 0 deletions tests/test_iqtree/test_fit_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,22 @@ def test_fit_tree(three_otu: ArrayAlignment, iq_model: DnaModel, c3_model: str)
# Should be within an approximation for any seed
got2 = piqtree2.fit_tree(three_otu, tree_topology, Model(iq_model), rand_seed=None)
assert got2.params["lnL"] == pytest.approx(expected.lnL)

# Check motif and rate parameters
expected_motif_prob = expected.tree.params["mprobs"]
for k, v in expected_motif_prob.items():
assert k in got2.params["mprobs"]
assert got2.params["mprobs"][k] == pytest.approx(v)

exclude = {"length", "ENS", "paralinear", "mprobs"}
expected_rates = {
k: v
for k, v in expected.tree.get_edge_vector()[0].params.items()
if k not in exclude
}
for k, v in expected_rates.items():
assert k in got2.get_edge_vector()[0].params
assert got2.get_edge_vector()[0].params[k] == pytest.approx(
v,
rel=1e-2,
)
57 changes: 56 additions & 1 deletion tests/test_iqtree/test_tree_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,61 @@ def newick_not_in_candidates() -> dict[str, Any]:
}


@pytest.fixture()
def standard_yaml() -> dict[str, Any]:
return {
"CandidateSet": {
0: "-6736.94578464 (0:0.0063211201,1:0.0029675780,(2:0.0228519739,3:0.3072009029):0.01373649616);",
1: "-6757.78815651 (0:0.0063607954,(1:0.0030079874,2:0.0365597715):2.296825575e-06,3:0.3208135518);",
2: "-6758.07765021 (0:0.0063826033,(1:0.0021953253,3:0.3207201830):0.0001145372551,2:0.0365362763);",
},
"ModelDNA": {
"rates": "1, 3.82025079, 1, 1, 3.82025079, 1",
"state_freq": "0.3628523161, 0.1852938562, 0.2173913044, 0.2344625233",
},
"PhyloTree": {
"newick": "(0:0.0063211201,1:0.0029675780,(2:0.0228519739,3:0.3072009029):0.01373649616);",
},
"StopRule": {
"curIteration": 101,
"start_real_time": 1724397157,
"time_vec": None,
},
"boot_consense_logl": 0,
"contree_rfdist": -1,
"finished": True,
"finishedCandidateSet": True,
"finishedModelFinal": True,
"finishedModelInit": True,
"initTree": "(0:0.0063680036,1:0.0026681490,(2:0.0183861083,3:0.3034074996):0.01838610827);",
"iqtree": {"seed": 95633264, "start_time": 1724397157, "version": "2.3.6.lib"},
}


def test_newick_not_in_candidates(newick_not_in_candidates: dict[str, Any]) -> None:
with pytest.raises(ParseIqTreeError):
_ = _process_tree_yaml(newick_not_in_candidates, ["a", "b", "c"])
_ = _process_tree_yaml(
newick_not_in_candidates,
["a", "b", "c"],
)


def test_edge_params(standard_yaml: dict[str, Any]) -> None:
params = {
"rates": {
"A/C": 1.0,
"A/G": 3.82025079,
"A/T": 1.0,
"C/G": 1.0,
"C/T": 3.82025079,
"G/T": 1,
},
"mprobs": {
"A": 0.3628523161,
"C": 0.1852938562,
"G": 0.2173913044,
"T": 0.2344625233,
},
}
tree = _process_tree_yaml(standard_yaml, ["a", "b", "c", "d"])
assert tree.params["edge_pars"] == params