Skip to content

PMSR model implementation #391

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
393 changes: 273 additions & 120 deletions alignment/alignment.cpp

Large diffs are not rendered by default.

88 changes: 43 additions & 45 deletions alignment/alignment.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//
// C++ Interface: alignment
//
// Description:
// Description:
//
//
// Author: BUI Quang Minh, Steffen Klaere, Arndt von Haeseler <minh.bui@univie.ac.at>, (C) 2008
Expand Down Expand Up @@ -319,21 +319,20 @@ class Alignment : public vector<Pattern>, public CharSet, public StateSpace {
virtual void orderPatternByNumChars(int pat_type);

/**
* un-group site-patterns, i.e., making #sites = #patterns and pattern frequency = 1 for all patterns
* un-group site-patterns, i.e. making #sites = #patterns and pattern frequency = 1 for all patterns
*/
void ungroupSitePattern();


/**
* re-group site-patterns
* re-group site-patterns so that sites within each pattern fall into the same group
* @param groups number of groups
* @param site_group group ID (0, 1, ...ngroups-1; must be continuous) of all sites
*/
void regroupSitePattern(int groups, IntVector &site_group);
void regroupSitePattern(int groups, const IntVector &site_group);


/****************************************************************************
output alignment
output alignment
****************************************************************************/
SeqType detectSequenceType(StrVector &sequences);

Expand Down Expand Up @@ -535,9 +534,11 @@ class Alignment : public vector<Pattern>, public CharSet, public StateSpace {
*/
bool isGapOnlySeq(size_t seq_id);

virtual bool isSuperAlignment() {
return false;
}
bool isSSF() { return !ptn_state_freq.empty(); }

bool isSSR() { return !ptn_rate_scaler.empty(); }

virtual bool isSuperAlignment() { return false; }

/****************************************************************************
alignment general processing
Expand Down Expand Up @@ -916,13 +917,13 @@ class Alignment : public vector<Pattern>, public CharSet, public StateSpace {
vector<uint32_t> pomo_sampled_states;
IntIntMap pomo_sampled_states_index; // indexing, to quickly find if a PoMo-2-state is already present

/* for site-specific state frequency model with Huaichun, Edward, Andrew */
/* site to model ID map */
IntVector site_model;
/** site to state frequency vector */
vector<double*> site_state_freq;
/* for site-specific models with Huaichun, Edward, Andrew */

/** pattern index to state frequency vector map */
vector<double*> ptn_state_freq;

/** pattern index to rate scaler map */
vector<double> ptn_rate_scaler;

/**
* @return true if data type is SEQ_CODON and state is a stop codon
Expand Down Expand Up @@ -985,14 +986,20 @@ class Alignment : public vector<Pattern>, public CharSet, public StateSpace {
void getAppearance(StateType state, StateBitset &state_app);

/**
* read site specific state frequency vectors from a file to create corresponding model
* update site_model and site_state_freq variables for this class
* @param aln input alignment
* @param site_freq_file file name
* @return TRUE if alignment needs to be changed, FALSE otherwise
* read site-specific state frequency vectors or site-specific rate (branch length) scalers
* from a file to create a site-specific model
* @param site_param_file input file name
* @param param_type should be set to "freq" or "rate"
* @return TRUE if alignment patterns need to be changed, FALSE otherwise
*/
bool readSiteStateFreq(const char* site_freq_file);

bool readSiteParamFile(const char* site_param_file, const string &param_type);

/**
* normalize site-specific rates by their mean value
* @return mean rate before normalization
*/
double normalizePtnRateScaler();

/**
* special initialization for codon sequences, e.g., setting #states, genetic_code
* @param sequence_type user-defined sequence type
Expand All @@ -1011,32 +1018,23 @@ class Alignment : public vector<Pattern>, public CharSet, public StateSpace {
void extractMapleFile(const std::string& aln_name, const InputType& format);

protected:
/** sequence names */
vector<string> seq_names;

/** expected num_sites */
int expected_num_sites = -1;

/**
sequence names
*/
vector<string> seq_names;

/**
expected num_sites
*/
int expected_num_sites = -1;
/** site to pattern index map */
IntVector site_pattern;

/**
Site to pattern index
*/
IntVector site_pattern;
/** pattern index to first pattern site map */
IntVector pattern_first_site;

/**
hash map from pattern to index in the vector of patterns (the alignment)
*/
PatternIntMap pattern_index;

/**
alisim: caching ntfreq if it has already randomly initialized
*/
double* cache_ntfreq = NULL;
/** hash map from pattern to index in the vector of patterns (the alignment) */
PatternIntMap pattern_index;

/** alisim: caching ntfreq if it has already randomly initialized */
double* cache_ntfreq = NULL;

private:
/**
Expand Down
11 changes: 5 additions & 6 deletions alignment/alignmentpairwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,6 @@ double AlignmentPairwise::computeFunction(double value) {
auto sequence2 = tree->getConvertedSequenceByNumber(seq_id2);
auto frequencies = tree->getConvertedSequenceFrequencies();
size_t sequenceLength = tree->getConvertedSequenceLength();

if (site_rate->isSiteSpecificRate()) {
for (int i = 0; i < sequenceLength; i++) {
int state1 = sequence1[i];
Expand All @@ -221,7 +220,7 @@ double AlignmentPairwise::computeFunction(double value) {
if (state1 >= num_states || state2 >= num_states) {
continue;
}
double trans = tree->getModelFactory()->computeTrans(value * site_rate->getPtnRate(i), state1, state2);
double trans = tree->getModel()->computeTrans(value, model->getPtnModelID(i), state1, state2);
lh -= log(trans) * frequencies[i];
}
return lh;
Expand Down Expand Up @@ -358,10 +357,10 @@ void AlignmentPairwise::computeFuncDerv(double value, double &df, double &ddf) {
continue;
}
double freq = frequencies[i];
double rate_val = site_rate->getPtnRate(i);
double rate_val = 1.0;
double rate_sqr = rate_val * rate_val;
double derv1, derv2;
double trans = tree->getModel()->computeTrans(value * rate_val,model->getPtnModelID(i), state1, state2, derv1, derv2);
double trans = tree->getModel()->computeTrans(value * rate_val, model->getPtnModelID(i), state1, state2, derv1, derv2);
double d1 = derv1 / trans;
df -= rate_val * d1 * freq;
ddf -= rate_sqr * (derv2/trans - d1*d1) * freq;
Expand All @@ -376,10 +375,10 @@ void AlignmentPairwise::computeFuncDerv(double value, double &df, double &ddf) {
if (num_states<=state2) {
continue;
}
double rate_val = site_rate->getPtnRate(i);
double rate_val = 1.0;
double rate_sqr = rate_val * rate_val;
double derv1, derv2;
double trans = tree->getModel()->computeTrans(value * rate_val,model->getPtnModelID(i), state1, state2, derv1, derv2);
double trans = tree->getModel()->computeTrans(value * rate_val, model->getPtnModelID(i), state1, state2, derv1, derv2);
double d1 = derv1 / trans;
double freq = tree->aln->at(i).frequency;
df -= rate_val * d1 * freq;
Expand Down
20 changes: 9 additions & 11 deletions alignment/superalignment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,22 +51,20 @@ SuperAlignment::SuperAlignment() : Alignment() {
SuperAlignment::SuperAlignment(Params &params) : Alignment()
{
readFromParams(params);

init();

// only show Degree of missing data if AliSim is inactive or an input alignment is supplied
if (!(Params::getInstance().alisim_active && !Params::getInstance().alisim_inference_mode))
cout << "Degree of missing data: " << computeMissingData() << endl;

#ifdef _OPENMP
if (params.num_threads > partitions.size()) {
cout << "Info: multi-threading strategy over alignment sites" << endl;
} else {
cout << "Info: multi-threading strategy over partitions" << endl;
}
#endif
cout << endl;

}

void SuperAlignment::readFromParams(Params &params) {
Expand Down Expand Up @@ -298,9 +296,9 @@ void SuperAlignment::readPartitionRaxml(Params &params) {
input_aln->sequence_type = params.sequence_type;
((SuperAlignment*) input_aln)->init();
}
cout << endl << "Partition file is not in NEXUS format, assuming RAxML-style partition file..." << endl;

cout << "Partition file is not in NEXUS format, assuming RAxML-style partition file..." << endl;

size_t pos = params.model_name.find_first_of("+*");
string rate_type = "";
if (pos != string::npos) rate_type = params.model_name.substr(pos);
Expand Down Expand Up @@ -499,9 +497,9 @@ void SuperAlignment::readPartitionNexus(Params &params) {
if (empty_partition) {
cout << "NOTE: No CharPartition defined, use all CharSets" << endl;
}
cout << endl << "Loading " << sets_block->charsets.size() << " partitions..." << endl;

cout << "Loading " << sets_block->charsets.size() << " partitions..." << endl;

for (it = sets_block->charsets.begin(); it != sets_block->charsets.end(); it++)
if (empty_partition || (*it)->char_partition != "") {
if ((*it)->model_name == "")
Expand Down
Loading