Skip to content

Support Exporting Model and Partition Selections to MrBayes #29

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ tags
test_scripts/iqtree_test_cmds.txt
pllrepo/
build/
cmake-build-debug/
/Default-clang
.idea/
.idea/codeStyleSettings.xml
Expand Down
66 changes: 43 additions & 23 deletions alignment/alignment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include <Eigen/LU>
#ifdef USE_BOOST
#include <boost/bimap.hpp>
#include <boost/math/distributions/binomial.hpp>
#endif

Expand Down Expand Up @@ -62,6 +63,8 @@ char genetic_code23[] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSS
char genetic_code24[] = "KNKNTTTTSSKSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF"; // Pterobranchia mitochondrial
char genetic_code25[] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSGCWCLFLF"; // Candidate Division SR1 and Gracilibacteria

boost::bimap<int, char*> genetic_code_map;

Alignment::Alignment()
: vector<Pattern>()
{
Expand Down Expand Up @@ -1892,7 +1895,32 @@ void Alignment::convertStateStr(string &str, SeqType seq_type) {
(*it) = convertState(*it, seq_type);
}
*/


boost::bimap<int, char*> getGeneticCodeMap() {
if (!genetic_code_map.empty()) return genetic_code_map;

genetic_code_map.insert({1, genetic_code1});
genetic_code_map.insert({2, genetic_code2});
genetic_code_map.insert({3, genetic_code3});
genetic_code_map.insert({4, genetic_code4});
genetic_code_map.insert({5, genetic_code5});
genetic_code_map.insert({6, genetic_code6});
genetic_code_map.insert({9, genetic_code9});
genetic_code_map.insert({10, genetic_code10});
genetic_code_map.insert({11, genetic_code11});
genetic_code_map.insert({12, genetic_code12});
genetic_code_map.insert({13, genetic_code13});
genetic_code_map.insert({14, genetic_code14});
genetic_code_map.insert({16, genetic_code16});
genetic_code_map.insert({21, genetic_code21});
genetic_code_map.insert({22, genetic_code22});
genetic_code_map.insert({23, genetic_code23});
genetic_code_map.insert({24, genetic_code24});
genetic_code_map.insert({25, genetic_code25});

return genetic_code_map;
}

void Alignment::initCodon(char *gene_code_id) {
// build index from 64 codons to non-stop codons
int transl_table = 1;
Expand All @@ -1902,30 +1930,13 @@ void Alignment::initCodon(char *gene_code_id) {
} catch (string &str) {
outError("Wrong genetic code ", gene_code_id);
}
switch (transl_table) {
case 1: genetic_code = genetic_code1; break;
case 2: genetic_code = genetic_code2; break;
case 3: genetic_code = genetic_code3; break;
case 4: genetic_code = genetic_code4; break;
case 5: genetic_code = genetic_code5; break;
case 6: genetic_code = genetic_code6; break;
case 9: genetic_code = genetic_code9; break;
case 10: genetic_code = genetic_code10; break;
case 11: genetic_code = genetic_code11; break;
case 12: genetic_code = genetic_code12; break;
case 13: genetic_code = genetic_code13; break;
case 14: genetic_code = genetic_code14; break;
case 15: genetic_code = genetic_code15; break;
case 16: genetic_code = genetic_code16; break;
case 21: genetic_code = genetic_code21; break;
case 22: genetic_code = genetic_code22; break;
case 23: genetic_code = genetic_code23; break;
case 24: genetic_code = genetic_code24; break;
case 25: genetic_code = genetic_code25; break;
default:
auto code_map = getGeneticCodeMap();
auto found = code_map.left.find(transl_table);
if (found == code_map.left.end()) {
outError("Wrong genetic code ", gene_code_id);
break;
return;
}
genetic_code = found->second;
} else {
genetic_code = genetic_code1;
}
Expand Down Expand Up @@ -1958,6 +1969,15 @@ void Alignment::initCodon(char *gene_code_id) {
// cout << "num_states = " << num_states << endl;
}

int Alignment::getGeneticCodeId() {
if (seq_type != SEQ_CODON || genetic_code == nullptr) return 0;

auto code_map = getGeneticCodeMap();
auto found = code_map.right.find(genetic_code);
if (found == code_map.right.end()) return 0;
return found->second;
}

int getMorphStates(StrVector &sequences) {
char maxstate = 0;
for (StrVector::iterator it = sequences.begin(); it != sequences.end(); it++)
Expand Down
6 changes: 6 additions & 0 deletions alignment/alignment.h
Original file line number Diff line number Diff line change
Expand Up @@ -1059,6 +1059,12 @@ class Alignment : public vector<Pattern>, public CharSet, public StateSpace {
*/
void extractMapleFile(const std::string& aln_name, const InputType& format);

/**
* Get the numerical id of the genetic code
* @return id the genetic code id, or 0 if not a codon type
*/
int getGeneticCodeId();

protected:


Expand Down
99 changes: 97 additions & 2 deletions main/phyloanalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1330,6 +1330,10 @@ void printOutfilesInfo(Params &params, IQTree &tree) {
if (params.optimize_linked_gtr) {
cout << " GTRPMIX nex file: " << params.out_prefix << ".GTRPMIX.nex" << endl;
}

if (params.mr_bayes_output) {
cout << " MrBayes block written to: " << params.out_prefix << ".mr_bayes.nex" << endl;
}
cout << endl;

}
Expand Down Expand Up @@ -3303,6 +3307,91 @@ SuperNeighbor* findRootedNeighbour(SuperNeighbor* super_root, int part_id) {
return nullptr;
}

void printMrBayesBlockFile(Params &params, IQTree* &iqtree) {
ofstream out;
string filename = string(params.out_prefix) + ".mr_bayes.nex";
try {
out.exceptions(ios::failbit | ios::badbit);
out.open(filename);

// Write Warning
out << "#nexus" << endl << endl
<<"[This MrBayes Block Declaration provides the basic "
<< (iqtree->isSuperTree() ? "partition structure and models" : "models")
<< " from the IQTree Run.]" << endl
<< "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'wag' for Protein will be used if a model that does not exist in MrBayes is used.]" << endl
<< "[Furthermore, the Model Parameter '+R' will be replaced by '+G+I'.]" << endl
<< "[This should be used as a Template Only.]" << endl << endl;

// Begin File, Print Charsets
out << "begin mrbayes;" << endl;
} catch (ios::failure &) {
outError(ERR_WRITE_OUTPUT, filename);
}

if (!iqtree->isSuperTree()) {
out << " [IQTree inferred model " << iqtree->getModelName() << ", ";
iqtree->getModel()->printMrBayesModelText(out, "all", "");

out << endl << "end;" << endl;
out.close();
return;
}

auto stree = (PhyloSuperTree*) iqtree;
auto saln = (SuperAlignment*) stree->aln;
auto size = stree->size();

for (int part = 0; part < size; part++) {
string name = saln->partitions[part]->name;
replace(name.begin(), name.end(), '+', '_');
out << " charset " << name << " = ";

string pos = saln->partitions[part]->position_spec;
replace(pos.begin(), pos.end(), ',' , ' ');
out << pos << ";" << endl;
}

// Create Partition
out << endl << " partition iqtree = " << size << ": ";
for (int part = 0; part < size; part++) {
if (part != 0) out << ", ";

string name = saln->partitions[part]->name;
replace(name.begin(), name.end(), '+', '_');
out << name;
}
out << ";" << endl;

// Set Partition for Use
out << " set partition = iqtree;" << endl << endl;

// Partition-Specific Model Information
for (int part = 0; part < size; part++) {
PhyloTree* curr_tree = stree->at(part);

out << " [Subset #" << part + 1 << ": IQTree inferred model " << curr_tree->getModelName() << ", ";
curr_tree->getModel()->printMrBayesModelText(out,
convertIntToString(part + 1),
saln->partitions[part]->name);
out << endl;
}

// Partition Type Settings
if (params.partition_type != TOPO_UNLINKED) {
out << "unlink statefreq=(all) revmat=(all) shape=(all) pinvar=(all) tratio=(all);" << endl;
if (params.partition_type != BRLEN_FIX) {
out << "prset applyto=(all) ratepr=variable;" << endl;
if (params.partition_type != BRLEN_SCALE) {
out << "unlink brlens=(all);" << endl;
}
}
}

out << "end;" << endl;
out.close();
}

/************************************************************
* MAIN TREE RECONSTRUCTION
***********************************************************/
Expand Down Expand Up @@ -3820,8 +3909,14 @@ void runTreeReconstruction(Params &params, IQTree* &iqtree) {

}
if (iqtree->isSuperTree()) {
((PhyloSuperTree*) iqtree)->computeBranchLengths();
((PhyloSuperTree*) iqtree)->printBestPartitionParams((string(params.out_prefix) + ".best_model.nex").c_str());
auto stree = (PhyloSuperTree*) iqtree;
stree->computeBranchLengths();
stree->printBestPartitionParams((string(params.out_prefix) + ".best_model.nex").c_str());
}
if (params.mr_bayes_output) {
cout << endl << "Writing MrBayes Block Files..." << endl;
printMrBayesBlockFile(params, iqtree);
cout << endl;
}

cout << "BEST SCORE FOUND : " << iqtree->getCurScore() << endl;
Expand Down
7 changes: 7 additions & 0 deletions main/phylotesting.h
Original file line number Diff line number Diff line change
Expand Up @@ -811,6 +811,13 @@ string convertSeqTypeToSeqTypeName(SeqType seq_type);

string detectSeqTypeName(string model_name);

/**
* get string name from a SeqType object
* @param seq_type input sequence type
* @return name
*/
string getSeqTypeName(SeqType seq_type);

/****************************************************/
/* Q MATRICES NESTING CHECK */
/****************************************************/
Expand Down
28 changes: 28 additions & 0 deletions model/modelbin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,31 @@ string ModelBIN::getNameParams(bool show_fixed_params) {
}
return retname.str();
}

void ModelBIN::printMrBayesModelText(ofstream& out, string partition, string charset) {
RateHeterogeneity* rate = phylo_tree->getRate();
bool equal_freq = freq_type == FREQ_EQUAL;

// Free Rate should be substituted by +G (+I not supported)
bool has_gamma = rate->getGammaShape() != 0.0 || rate->isFreeRate();

// MrBayes's Binary Model is 'F81-like'.
out << "using MrBayes model F81" << (has_gamma ? "+G" : "") << (equal_freq ? "+FQ" : "") << "]" << endl;
if (rate->isFreeRate() || rate->getPInvar() > 0.0) {
out << " [+I modifier ignored, not supported by MrBayes for binary data]" << endl;
outWarning("MrBayes does not support Invariable Sites with Binary Data! +I has been ignored!");
}

// Lset Parameters
out << " lset applyto=(" << partition << ") rates=";
if (has_gamma) {
// Rate Categories + Gamma
out << "gamma";
} else
out << "equal";

out << ";" << endl;

if (equal_freq)
out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl;
}
8 changes: 8 additions & 0 deletions model/modelbin.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@ class ModelBIN : public ModelMarkov
*/
virtual void startCheckpoint();

/**
* Print the model information in a format that can be accepted by MrBayes, using lset and prset.<br>
* By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes.
* @param out the ofstream to print the result to
* @param partition the partition to apply lset and prset to
* @param charset the current partition's charset.
*/
virtual void printMrBayesModelText(ofstream& out, string partition, string charset);
};

#endif
35 changes: 35 additions & 0 deletions model/modelcodon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1182,6 +1182,41 @@ double ModelCodon::optimizeParameters(double gradient_epsilon) {
return score;
}

void ModelCodon::printMrBayesModelText(ofstream& out, string partition, string charset) {
// NST should be 1 if fix_kappa is true (no ts/tv ratio), else it should be 2
// GTR codon is not available in IQTREE
int nst = fix_kappa ? 1 : 2;
int code = phylo_tree->aln->getGeneticCodeId();
string mr_bayes_code = getMrBayesGeneticCode(code);
bool code_not_supported = mr_bayes_code.empty();
RateHeterogeneity* rate = phylo_tree->getRate();

string model_name = fix_kappa ? "JC" : "HKY";

// If this model is a Empirical / Semi-Empirical Model
if (num_params == 0 || name.find('_') != string::npos) {
nst = 6;
model_name = "GTR";
}

out << "using MrBayes model " << model_name << "]" << endl;

if (rate->isFreeRate() || rate->getGammaShape() > 0.0 || rate->getPInvar() > 0.0) {
out << " [Rate modifiers (+I, +G, +R) ignored, not supported by MrBayes codon models]" << endl;
outWarning("MrBayes Codon Models do not support any Heterogenity Rate Modifiers! (+I, +G, +R) They have been ignored!");
}

if (code_not_supported) {
outWarning("MrBayes Does Not Support Codon Code " + convertIntToString(code) + "! Defaulting to Universal (Code 1).");
mr_bayes_code = "universal";
}

out << " [IQTree genetic code " << code << ", using MrBayes genetic code " << mr_bayes_code << "]" << endl;
if (code_not_supported)
out << " [Genetic code " << code << " is not supported by MrBayes, defaulting to universal (code 1)]" << endl;

out << " lset applyto=(" << partition << ") nucmodel=codon omegavar=equal nst=" << nst << " code=" << mr_bayes_code << ";" << endl;
}

void ModelCodon::writeInfo(ostream &out) {
if (name.find('_') == string::npos)
Expand Down
9 changes: 9 additions & 0 deletions model/modelcodon.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,15 @@ class ModelCodon: public ModelMarkov {
*/
virtual bool getVariables(double *variables);

/**
* Print the model information in a format that can be accepted by MrBayes, using lset and prset.<br>
* By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes.
* @param out the ofstream to print the result to
* @param partition the partition to apply lset and prset to
* @param charset the current partition's charset.
*/
virtual void printMrBayesModelText(ofstream& out, string partition, string charset);

};

#endif /* MODELCODON_H_ */
Loading
Loading