Skip to content

Support Exporting Analysis to MrBayes #267

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ tags
test_scripts/iqtree_test_cmds.txt
pllrepo/
build/
cmake-build-debug/
/Default-clang
.idea/
.idea/codeStyleSettings.xml
Expand Down
66 changes: 43 additions & 23 deletions alignment/alignment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include <Eigen/LU>
#ifdef USE_BOOST
#include <boost/bimap.hpp>
#include <boost/math/distributions/binomial.hpp>
#endif

Expand Down Expand Up @@ -62,6 +63,8 @@ char genetic_code23[] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSS
char genetic_code24[] = "KNKNTTTTSSKSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF"; // Pterobranchia mitochondrial
char genetic_code25[] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSGCWCLFLF"; // Candidate Division SR1 and Gracilibacteria

boost::bimap<int, char*> genetic_code_map;

Alignment::Alignment()
: vector<Pattern>()
{
Expand Down Expand Up @@ -1603,7 +1606,32 @@ void Alignment::convertStateStr(string &str, SeqType seq_type) {
(*it) = convertState(*it, seq_type);
}
*/


boost::bimap<int, char*> getGeneticCodeMap() {
if (!genetic_code_map.empty()) return genetic_code_map;

genetic_code_map.insert({1, genetic_code1});
genetic_code_map.insert({2, genetic_code2});
genetic_code_map.insert({3, genetic_code3});
genetic_code_map.insert({4, genetic_code4});
genetic_code_map.insert({5, genetic_code5});
genetic_code_map.insert({6, genetic_code6});
genetic_code_map.insert({9, genetic_code9});
genetic_code_map.insert({10, genetic_code10});
genetic_code_map.insert({11, genetic_code11});
genetic_code_map.insert({12, genetic_code12});
genetic_code_map.insert({13, genetic_code13});
genetic_code_map.insert({14, genetic_code14});
genetic_code_map.insert({16, genetic_code16});
genetic_code_map.insert({21, genetic_code21});
genetic_code_map.insert({22, genetic_code22});
genetic_code_map.insert({23, genetic_code23});
genetic_code_map.insert({24, genetic_code24});
genetic_code_map.insert({25, genetic_code25});

return genetic_code_map;
}

void Alignment::initCodon(char *gene_code_id) {
// build index from 64 codons to non-stop codons
int transl_table = 1;
Expand All @@ -1613,30 +1641,13 @@ void Alignment::initCodon(char *gene_code_id) {
} catch (string &str) {
outError("Wrong genetic code ", gene_code_id);
}
switch (transl_table) {
case 1: genetic_code = genetic_code1; break;
case 2: genetic_code = genetic_code2; break;
case 3: genetic_code = genetic_code3; break;
case 4: genetic_code = genetic_code4; break;
case 5: genetic_code = genetic_code5; break;
case 6: genetic_code = genetic_code6; break;
case 9: genetic_code = genetic_code9; break;
case 10: genetic_code = genetic_code10; break;
case 11: genetic_code = genetic_code11; break;
case 12: genetic_code = genetic_code12; break;
case 13: genetic_code = genetic_code13; break;
case 14: genetic_code = genetic_code14; break;
case 15: genetic_code = genetic_code15; break;
case 16: genetic_code = genetic_code16; break;
case 21: genetic_code = genetic_code21; break;
case 22: genetic_code = genetic_code22; break;
case 23: genetic_code = genetic_code23; break;
case 24: genetic_code = genetic_code24; break;
case 25: genetic_code = genetic_code25; break;
default:
auto codeMap = getGeneticCodeMap();
auto found = codeMap.left.find(transl_table);
if (found == codeMap.left.end()) {
outError("Wrong genetic code ", gene_code_id);
break;
return;
}
genetic_code = found->second;
} else {
genetic_code = genetic_code1;
}
Expand Down Expand Up @@ -1669,6 +1680,15 @@ void Alignment::initCodon(char *gene_code_id) {
// cout << "num_states = " << num_states << endl;
}

int Alignment::getGeneticCodeId() {
if (seq_type != SEQ_CODON || genetic_code == nullptr) return 0;

auto code_map = getGeneticCodeMap();
auto found = code_map.right.find(genetic_code);
if (found == code_map.right.end()) return 0;
return found->second;
}

int getMorphStates(StrVector &sequences) {
char maxstate = 0;
for (StrVector::iterator it = sequences.begin(); it != sequences.end(); it++)
Expand Down
6 changes: 6 additions & 0 deletions alignment/alignment.h
Original file line number Diff line number Diff line change
Expand Up @@ -1010,6 +1010,12 @@ class Alignment : public vector<Pattern>, public CharSet, public StateSpace {
*/
void extractMapleFile(const std::string& aln_name, const InputType& format);

/**
* Get the numerical id of the genetic code
* @return id the genetic code id, or 0 if not a codon type
*/
int getGeneticCodeId();

protected:


Expand Down
103 changes: 101 additions & 2 deletions main/phyloanalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1156,6 +1156,12 @@ void printOutfilesInfo(Params &params, IQTree &tree) {
if (params.optimize_linked_gtr) {
cout << " GTRPMIX nex file: " << params.out_prefix << ".GTRPMIX.nex" << endl;
}

if (params.mr_bayes_output) {
cout << endl << "MrBayes block written to:" << endl;
cout << " Base template file: " << params.out_prefix << ".mr_bayes_scheme.nex" << endl;
cout << " Template file with parameters: " << params.out_prefix << ".mr_bayes_model.nex" << endl;
}
cout << endl;

}
Expand Down Expand Up @@ -2596,6 +2602,92 @@ void printTrees(vector<string> trees, Params &params, string suffix) {
treesOut.close();
}

void printMrBayesBlockFile(const char* filename, IQTree* &iqtree, bool inclParams) {
ofstream out;
try {
out.exceptions(ios::failbit | ios::badbit);
out.open(filename);

string provide = "basic models";
if (inclParams) provide = "optimized values";
else if (iqtree->isSuperTree()) provide = "basic partition structure and models";

// Write Warning
out << "#nexus" << endl << endl
<<"[This MrBayes Block Declaration provides the " << provide << " from the IQTree Run.]" << endl
<< "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'wag' for Protein will be used if a model that does not exist in MrBayes is used.]" << endl;

if (inclParams)
out << "[However, for those cases, there will still be a rate matrix provided.]" << endl
<< "[For DNA, this will still mean the rates may vary outside the restrictions of the model.]" << endl
<< "[For Protein, this is essentially a perfect replacement.]" << endl;

out << "[Furthermore, the Model Parameter '+R' will be replaced by '+G'.]" << endl
<< "[This should be used as a Template Only.]" << endl << endl;

// Begin File, Print Charsets
out << "begin mrbayes;" << endl;
} catch (ios::failure &) {
outError(ERR_WRITE_OUTPUT, filename);
}

if (!iqtree->isSuperTree()) {
// Set Outgroup (if available)
if (!iqtree->rooted) out << " outgroup " << iqtree->root->name << ";" << endl << endl;

out << " [Using Model '" << iqtree->getModelName() << "']" << endl;
iqtree->getModel()->printMrBayesModelText(out, "all", "", false, inclParams);

out << endl << "end;" << endl;
out.close();
return;
}

auto superTree = (PhyloSuperTree*) iqtree;
auto saln = (SuperAlignment*) superTree->aln;
auto size = superTree->size();

for (int part = 0; part < size; part++) {
string name = saln->partitions[part]->name;
replace(name.begin(), name.end(), '+', '_');
out << " charset " << name << " = ";

string pos = saln->partitions[part]->position_spec;
replace(pos.begin(), pos.end(), ',' , ' ');
out << pos << ";" << endl;
}

// Create Partition
out << endl << " partition iqtree = " << size << ": ";
for (int part = 0; part < size; part++) {
if (part != 0) out << ", ";

string name = saln->partitions[part]->name;
replace(name.begin(), name.end(), '+', '_');
out << name;
}
out << ";" << endl;

// Set Partition for Use
out << " set partition = iqtree;" << endl;

// Set Outgroup (if available)
if (!superTree->rooted) out << " outgroup " << superTree->root->name << ";" << endl << endl;

// Partition-Specific Model Information
for (int part = 0; part < size; part++) {
PhyloTree* currentTree = superTree->at(part);

// MrBayes Partitions are 1-indexed
out << " [Partition No. " << convertIntToString(part + 1) << ", Using Model '" << currentTree->getModelName() << "']" << endl;
currentTree->getModel()->printMrBayesModelText(out,
convertIntToString(part + 1), saln->partitions[part]->name, true, inclParams);
out << endl;
}
out << "end;" << endl;
out.close();
}

/************************************************************
* MAIN TREE RECONSTRUCTION
***********************************************************/
Expand Down Expand Up @@ -3100,8 +3192,15 @@ void runTreeReconstruction(Params &params, IQTree* &iqtree) {

}
if (iqtree->isSuperTree()) {
((PhyloSuperTree*) iqtree)->computeBranchLengths();
((PhyloSuperTree*) iqtree)->printBestPartitionParams((string(params.out_prefix) + ".best_model.nex").c_str());
auto superTree = (PhyloSuperTree*) iqtree;
superTree->computeBranchLengths();
superTree->printBestPartitionParams((string(params.out_prefix) + ".best_model.nex").c_str());
}
if (params.mr_bayes_output) {
cout << endl << "Writing MrBayes Block Files..." << endl;
printMrBayesBlockFile((string(params.out_prefix) + ".mr_bayes_scheme.nex").c_str(), iqtree, false);
printMrBayesBlockFile((string(params.out_prefix) + ".mr_bayes_model.nex").c_str(), iqtree, true);
cout << endl;
}

cout << "BEST SCORE FOUND : " << iqtree->getCurScore() << endl;
Expand Down
7 changes: 7 additions & 0 deletions main/phylotesting.h
Original file line number Diff line number Diff line change
Expand Up @@ -390,5 +390,12 @@ string convertSeqTypeToSeqTypeName(SeqType seq_type);

string detectSeqTypeName(string model_name);

/**
* get string name from a SeqType object
* @param seq_type input sequence type
* @return name
*/
string getSeqTypeName(SeqType seq_type);


#endif /* PHYLOTESTING_H_ */
55 changes: 55 additions & 0 deletions model/modelbin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,58 @@ string ModelBIN::getNameParams(bool show_fixed_params) {
}
return retname.str();
}

void ModelBIN::printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) {
RateHeterogeneity* rate = phylo_tree->getRate();

// MrBayes does not support Invariable Modifier for Binary data
if (rate->isFreeRate() || rate->getPInvar() > 0.0) {
warnLogStream("MrBayes does not support Invariable Sites with Binary Data! +I has been ignored!", out);
}

// Lset Parameters
out << " lset applyto=(" << partition << ") rates=";

// Free Rate should be substituted by +G (+I not supported)
bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate();
if (hasGamma) {
// Rate Categories + Gamma
out << "gamma ngammacat=" << rate->getNRate();
} else
out << "equal";

out << ";" << endl;

if (!inclParams) {
if (freq_type == FREQ_EQUAL) out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl;
return;
}

// Prset Parameters
out << " prset applyto=(" << partition << ")";

// Freerate (+R)
// Get replacement Gamma Shape
if (rate->isFreeRate()) {
printMrBayesFreeRateReplacement(rate, charset, out, false);
}

// Gamma Distribution (+G/+R)
// Dirichlet is not available here, use fixed
if (rate->getGammaShape() > 0.0)
out << " shapepr=fixed(" << minValueCheckMrBayes(rate->getGammaShape()) << ")";

// State Frequencies
if (freq_type == FREQ_EQUAL)
out << " statefreqpr=fixed(equal)";
else {
out << " statefreqpr=dirichlet(";
for (int i = 0; i < num_states; ++i) {
if (i != 0) out << ", ";
out << minValueCheckMrBayes(state_freq[i]);
}
out << ")";
}

out << ";" << endl;
}
12 changes: 12 additions & 0 deletions model/modelbin.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,18 @@ class ModelBIN : public ModelMarkov
*/
virtual void startCheckpoint();

/**
* Print the model information in a format that can be accepted by MrBayes, using lset and prset.<br>
* By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes.
* @param rate the rate information
* @param out the ofstream to print the result to
* @param partition the partition to apply lset and prset to
* @param charset the current partition's charset. Useful for getting information from the checkpoint file
* @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree
* @param inclParams whether to include IQTree optimized parameters for the model
*/
virtual void printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams);

};

#endif
Loading