Skip to content

ENH: add freq rate type including rate het #39

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 25 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
98d1ddc
DEV: add freq type enum
rmcar17 Aug 7, 2024
5f50c2f
DEV: fix descriptions being a member of enum
rmcar17 Aug 7, 2024
c25be5c
TST: add tests for FreqType
rmcar17 Aug 7, 2024
a2d3070
DEV: use typing_extensions for Self
rmcar17 Aug 7, 2024
16361b6
REF: _model -> _substitution_model
rmcar17 Aug 7, 2024
1989090
DEV: use model class for storing Substitution Model and FreqType
rmcar17 Aug 7, 2024
5597b96
REF: rename test_model.py -> test_substitution_model.py
rmcar17 Aug 7, 2024
e5541d2
TST: add tests for freq type when building tree
rmcar17 Aug 7, 2024
f3326a7
MAINT: remove dead code for models and descriptions
rmcar17 Aug 7, 2024
bec32f1
DOC: document frequency types
rmcar17 Aug 7, 2024
bdcc395
MAINT: ignore name issues in substitution model (follows IQ-TREE conv…
rmcar17 Aug 7, 2024
ef34449
DEV: add rate type classes
rmcar17 Aug 8, 2024
0381944
DEV: add rate_heterogenity to model
rmcar17 Aug 8, 2024
8c55eb1
DOC: document RateType models
rmcar17 Aug 8, 2024
71af264
DOC: add references for Discrete Gamma and FreeRate models
rmcar17 Aug 8, 2024
5897304
DOC: add rate het to build and fit tree documentation
rmcar17 Aug 8, 2024
34cd5a2
DEV: add `_rate_type` to model `__init__.py`
rmcar17 Aug 8, 2024
3e26c6e
TST: test rate het iqtree settings construction
rmcar17 Aug 8, 2024
fd2c881
TST: add tests for invariable sites and rate het models
rmcar17 Aug 8, 2024
79ad555
TST: add test for invalid model specification segfault
rmcar17 Aug 9, 2024
9bc42b5
TST: remove unused import
rmcar17 Aug 9, 2024
48b9f88
DOC: update readme to new model specification
rmcar17 Aug 9, 2024
c8e9301
DEV: add available_freq_type and available_rate_type for getting avai…
rmcar17 Aug 14, 2024
7b38d72
TST: rate type now tests no number for default string, also tests def…
rmcar17 Aug 14, 2024
6479de4
TST: add tests for freq type and rate type model
rmcar17 Aug 14, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@ This project is still in early development, if you encounter any problems or hav

```python
from piqtree2 import build_tree
from piqtree2.model import DnaModel
from piqtree2.model import DnaModel, Model
from cogent3 import load_aligned_seqs # Included with piqtree2!

# Load Sequences
aln = load_aligned_seqs("tests/data/example.fasta", moltype="dna")
aln = aln.take_seqs(["Human", "Chimpanzee", "Rhesus", "Mouse"])

# Reconstruct a phylogenetic tree with IQ-TREE!
tree = build_tree(aln, DnaModel.JC, rand_seed=1) # Optionally specify a random seed.
tree = build_tree(aln, Model(DnaModel.JC), rand_seed=1) # Optionally specify a random seed.

print("Tree topology:", tree) # A cogent3 tree object
print("Log-likelihood:", tree.params["lnL"])
Expand All @@ -41,7 +41,7 @@ print("Log-likelihood:", tree.params["lnL"])

```python
from piqtree2 import fit_tree
from piqtree2.model import DnaModel
from piqtree2.model import DnaModel, Model
from cogent3 import load_aligned_seqs, make_tree # Included with piqtree2!

# Load Sequences
Expand All @@ -52,7 +52,7 @@ aln = aln.take_seqs(["Human", "Chimpanzee", "Rhesus", "Mouse"])
tree = make_tree("(Human, Chimpanzee, (Rhesus, Mouse));")

# Fit branch lengths with IQ-TREE!
tree = fit_tree(aln, tree, DnaModel.JC, rand_seed=1) # Optionally specify a random seed.
tree = fit_tree(aln, tree, Model(DnaModel.JC), rand_seed=1) # Optionally specify a random seed.

print("Tree with branch lengths:", tree) # A cogent3 tree object
print("Log-likelihood:", tree.params["lnL"])
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ target-version = "py39"
# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
# McCabe complexity (`C901`) by default.
select = ["ALL"]
ignore = ["EXE002", "FA100", "E501", "D"]
ignore = ["EXE002", "FA100", "E501", "D", "PLR2004"]

# Allow fix for all enabled rules (when `--fix`) is provided.
fixable = ["ALL"]
Expand All @@ -120,7 +120,7 @@ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
"N803",
"D"
]
"src/piqtree2/model/_model.py" = ["N815"] # use IQ-TREE naming scheme
"src/piqtree2/model/_substitution_model.py" = ["N815"] # use IQ-TREE naming scheme

[tool.ruff.format]
# Like Black, use double quotes for strings.
Expand Down
4 changes: 3 additions & 1 deletion src/piqtree2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@
random_trees,
robinson_foulds,
)
from piqtree2.model import available_models
from piqtree2.model import available_freq_type, available_models, available_rate_type

__version__ = "0.2.0"

__all__ = [
"available_freq_type",
"available_models",
"available_rate_type",
"build_tree",
"fit_tree",
"random_trees",
Expand Down
18 changes: 9 additions & 9 deletions src/piqtree2/iqtree/_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from piqtree2.exceptions import ParseIqTreeError
from piqtree2.iqtree._decorator import iqtree_func
from piqtree2.model import SubstitutionModel
from piqtree2.model import Model

iq_build_tree = iqtree_func(iq_build_tree, hide_files=True)
iq_fit_tree = iqtree_func(iq_fit_tree, hide_files=True)
Expand Down Expand Up @@ -42,7 +42,7 @@ def _process_tree_yaml(tree_yaml: dict, names: Sequence[str]) -> cogent3.PhyloNo

def build_tree(
aln: Union[cogent3.Alignment, cogent3.ArrayAlignment],
model: SubstitutionModel,
model: Model,
rand_seed: Optional[int] = None,
) -> cogent3.PhyloNode:
"""Reconstruct a phylogenetic tree.
Expand All @@ -53,8 +53,8 @@ def build_tree(
----------
aln : Union[cogent3.Alignment, cogent3.ArrayAlignment]
The sequence alignment.
model : SubstitutionModel
The substitution model.
model : Model
The substitution model with base frequencies and rate heterogeneity.
rand_seed : Optional[int], optional
The random seed - 0 or None means no seed, by default None.

Expand All @@ -70,14 +70,14 @@ def build_tree(
names = aln.names
seqs = [str(seq) for seq in aln.iter_seqs(names)]

yaml_result = yaml.safe_load(iq_build_tree(names, seqs, model.value, rand_seed))
yaml_result = yaml.safe_load(iq_build_tree(names, seqs, str(model), rand_seed))
return _process_tree_yaml(yaml_result, names)


def fit_tree(
aln: Union[cogent3.Alignment, cogent3.ArrayAlignment],
tree: cogent3.PhyloNode,
model: SubstitutionModel,
model: Model,
rand_seed: Optional[int] = None,
) -> cogent3.PhyloNode:
"""Fit branch lengths to a tree.
Expand All @@ -91,8 +91,8 @@ def fit_tree(
The sequence alignment.
tree : cogent3.PhyloNode
The topology to fit branch lengths to.
model : SubstitutionModel
The substitution model.
model : Model
The substitution model with base frequencies and rate heterogeneity.
rand_seed : Optional[int], optional
The random seed - 0 or None means no seed, by default None.

Expand All @@ -110,6 +110,6 @@ def fit_tree(
newick = str(tree)

yaml_result = yaml.safe_load(
iq_fit_tree(names, seqs, model.value, newick, rand_seed),
iq_fit_tree(names, seqs, str(model), newick, rand_seed),
)
return _process_tree_yaml(yaml_result, names)
22 changes: 19 additions & 3 deletions src/piqtree2/model/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,22 @@
"""Models available in IQ-TREE."""

from ._model import AaModel, DnaModel, SubstitutionModel
from ._options import available_models
from ._freq_type import FreqType
from ._model import Model
from ._options import available_freq_type, available_models, available_rate_type
from ._rate_type import DiscreteGammaModel, FreeRateModel, RateModel, RateType
from ._substitution_model import AaModel, DnaModel, SubstitutionModel

__all__ = ["available_models", "AaModel", "DnaModel", "SubstitutionModel"]
__all__ = [
"available_freq_type",
"available_models",
"available_rate_type",
"AaModel",
"DiscreteGammaModel",
"DnaModel",
"FreeRateModel",
"FreqType",
"Model",
"RateModel",
"RateType",
"SubstitutionModel",
]
34 changes: 34 additions & 0 deletions src/piqtree2/model/_freq_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import functools
from enum import Enum, unique

from typing_extensions import Self


@unique
class FreqType(Enum):
"""Types of base frequencies."""

F = "F"
FO = "FO"
FQ = "FQ"

@staticmethod
@functools.cache
def _descriptions() -> dict[Self, str]:
return {
FreqType.F: "Empirical state frequency observed from the data.",
FreqType.FO: "State frequency optimized by maximum-likelihood from the data. Note that this is with letter-O and not digit-0.",
FreqType.FQ: "Equal state frequency.",
}

@property
def description(self) -> str:
"""The description of the FreqType.

Returns
-------
str
The description of the FreqType.

"""
return self._descriptions()[self]
Loading