Skip to content

Commit

Permalink
bootstrapping implemented
Browse files Browse the repository at this point in the history
  • Loading branch information
stevemussmann committed Aug 4, 2018
1 parent 71ca3a3 commit 5b6f0b4
Show file tree
Hide file tree
Showing 5 changed files with 242 additions and 24 deletions.
20 changes: 19 additions & 1 deletion fnFiles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,24 @@ fnFiles::fnFiles(std::string i, std::string p, std::string abcd, int vectorsize)
{
data[it->second].resize(vectorsize);
}
}

// copy constructor for bootstrap replication
fnFiles::fnFiles(fnFiles f, std::vector<int> &v, std::unordered_map<std::string,std::string> m)
{
for(std::unordered_map<std::string,std::string>::iterator it = m.begin(); it != m.end(); it++)
{
data[it->second].resize(v.size());
}

for(unsigned int i=0; i<v.size(); i++)
{
for(std::unordered_map<std::string,std::string>::iterator it = m.begin(); it!=m.end(); it++)
{
std::unordered_map<std::string,int> la = f.data[it->second][v[i]];
data[it->second][i] = la;
}
}

}

Expand Down Expand Up @@ -80,7 +98,7 @@ unsigned int fnFiles::getLength()
if(counter == 0)
{
now = data[it->second].size();
std::cout << now << std::endl;
//std::cout << now << std::endl;
}
else
{
Expand Down
1 change: 1 addition & 0 deletions fnFiles.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
class fnFiles {
public:
fnFiles(std::string i, std::string p, std::string a, int vectorsize);
fnFiles(fnFiles f, std::vector<int> &v, std::unordered_map<std::string,std::string> m);
void readfiles();
unsigned int getLength();
std::unordered_map <std::string,int> getLocus(std::string s, int i);
Expand Down
158 changes: 147 additions & 11 deletions fnStats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,24 @@ fnStats::fnStats(int l, std::unordered_map<std::string,std::string> m)
ancestral.resize(l);
}

// copy constructor (sort of) for bootstrap replication
fnStats::fnStats(fnStats &fs, std::vector<int> &v, int l, std::unordered_map<std::string,std::string> m)
{
for(std::unordered_map<std::string,std::string>::iterator it = m.begin(); it != m.end(); it++)
{
freqs[it->second].resize(l);
}
ancestral.resize(l);

for(int i=0; i<l; i++){
for(std::unordered_map<std::string,std::string>::iterator it = m.begin(); it!=m.end(); it++)
{
std::unordered_map<std::string,double> la = fs.freqs[it->second][v[i]];
freqs[it->second][i] = la;
}
}
}

void fnStats::calcAllFreqs(fnFiles &f, std::unordered_map<std::string,std::string> m)
{
for(unsigned int i=0; i<ancestral.size(); i++)
Expand Down Expand Up @@ -98,24 +116,141 @@ void fnStats::findAncestral(fnFiles &f)
}
ancestral[i] = anc;
}

//std::cout << ancestral[i] << std::endl;
//std::cout << l.size() << std::endl;
}
}

/*
void fnStats::findAncestral(fnFiles &f)
void fnStats::calcF2(fnFiles &f)
{
double f2total = 0.0;

for(unsigned int i=0; i<ancestral.size(); i++)
{
std::unordered_map<std::string,int> l = f.getALocus(i); //get locus for outgroup
std::unordered_map<std::string,int>::iterator it = l.begin(); //start iterator
ancestral[i] = it->first; //if only one allele, set it as ancestral state.
//get locus counts
std::unordered_map<std::string,int> al = f.getLocus("A",i);
std::unordered_map<std::string,int> bl = f.getLocus("B",i);

//initialize iterators
std::unordered_map<std::string,int>::iterator ait = al.begin();
std::unordered_map<std::string,int>::iterator bit = bl.begin();

std::vector<int> avec;
std::vector<int> bvec;

int atotal=0;
while(ait != al.end())
{
atotal += ait->second;
avec.push_back(ait->second);
ait++;
}

int btotal=0;
while(bit != bl.end())
{
btotal += bit->second;
bvec.push_back(bit->second);
bit++;
}

double ahz = hz(avec, atotal);
double bhz = hz(bvec, btotal);

double f2i = f2(freqs["A"][i]["anc"], freqs["B"][i]["anc"], ahz, bhz, atotal, btotal);
f2total+=f2i;
double fsti = fst(freqs["A"][i]["der"], freqs["B"][i]["der"]);
/*
//std::cout << std::fixed;
std::cout << "len(a) = " << avec.size() << std::endl;
std::cout << "len(b) = " << bvec.size() << std::endl;
std::cout << "ahz = " << ahz << std::endl;
std::cout << "bhz = " << bhz << std::endl;
std::cout << "f2 = " << f2i << std::endl;
std::cout << "fst = " << fsti << std::endl;
std::cout << std::endl;
*/
}
std::cout << std::endl << std::endl;
double f2avg = f2total/(double)ancestral.size();
std::cout << "f2avg = " << f2avg << std::endl;
}

void fnStats::calcF3(fnFiles &f)
{
double f2total = 0.0;
double f3total = 0.0;

for(unsigned int i=0; i<ancestral.size(); i++)
{
//get locus counts
std::unordered_map<std::string,int> al = f.getLocus("A",i);
std::unordered_map<std::string,int> bl = f.getLocus("B",i);
std::unordered_map<std::string,int> cl = f.getLocus("C",i);

//initialize iterators
std::unordered_map<std::string,int>::iterator ait = al.begin();
std::unordered_map<std::string,int>::iterator bit = bl.begin();
std::unordered_map<std::string,int>::iterator cit = cl.begin();

std::vector<int> avec;
std::vector<int> bvec;
std::vector<int> cvec;

int atotal=0;
while(ait != al.end())
{
atotal += ait->second;
avec.push_back(ait->second);
ait++;
}

int btotal=0;
while(bit != bl.end())
{
btotal += bit->second;
bvec.push_back(bit->second);
bit++;
}

int ctotal=0;
while(cit != cl.end())
{
ctotal += cit->second;
cvec.push_back(cit->second);
cit++;
}

double ahz = hz(avec, atotal);
double bhz = hz(bvec, btotal);
double chz = hz(cvec, ctotal);

double f2i = f2(freqs["A"][i]["anc"], freqs["B"][i]["anc"], ahz, bhz, atotal, btotal);
f2total+=f2i;
double f3i = f3(freqs["A"][i]["anc"], freqs["B"][i]["anc"], freqs["C"][i]["anc"], chz, ctotal);
f3total+=f3i;
double fsti = fst(freqs["A"][i]["der"], freqs["B"][i]["der"]);
/*
//std::cout << std::fixed;
std::cout << "len(a) = " << avec.size() << std::endl;
std::cout << "len(b) = " << bvec.size() << std::endl;
std::cout << "len(c) = " << cvec.size() << std::endl;
std::cout << "ahz = " << ahz << std::endl;
std::cout << "bhz = " << bhz << std::endl;
std::cout << "chz = " << chz << std::endl;
std::cout << "f2 = " << f2i << std::endl;
std::cout << "f3 = " << f3i << std::endl;
//std::cout << std::setprecision(10) << "f4 = " << f4i << std::endl;
std::cout << "fst = " << fsti << std::endl;
std::cout << std::endl;
*/
}
std::cout << std::endl << std::endl;
double f2avg = f2total/(double)ancestral.size();
double f3avg = f3total/(double)ancestral.size();
std::cout << "f2avg = " << f2avg << std::endl;
std::cout << "f3avg = " << f3avg << std::endl;
}
*/
void fnStats::calcFstats(fnFiles &f)

void fnStats::calcF4(fnFiles &f)
{
double f2total = 0.0;
double f3total = 0.0;
Expand Down Expand Up @@ -185,6 +320,7 @@ void fnStats::calcFstats(fnFiles &f)
f4total+=f4i;
double fsti = fst(freqs["A"][i]["der"], freqs["B"][i]["der"]);

/*
//std::cout << std::fixed;
std::cout << "len(a) = " << avec.size() << std::endl;
std::cout << "len(b) = " << bvec.size() << std::endl;
Expand All @@ -198,7 +334,7 @@ void fnStats::calcFstats(fnFiles &f)
std::cout << "f4 = " << f4i << std::endl;
std::cout << "fst = " << fsti << std::endl;
std::cout << std::endl;

*/
}
std::cout << std::endl << std::endl;
double f2avg = f2total/(double)ancestral.size();
Expand Down
5 changes: 4 additions & 1 deletion fnStats.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,12 @@
class fnStats {
public:
fnStats(int l, std::unordered_map<std::string,std::string> m);
fnStats(fnStats &fs, std::vector<int> &v, int l, std::unordered_map<std::string,std::string> m);
void calcAllFreqs(fnFiles &f, std::unordered_map<std::string,std::string> m);
void findAncestral(fnFiles &f);
void calcFstats(fnFiles &f);
void calcF2(fnFiles &f);
void calcF3(fnFiles &f);
void calcF4(fnFiles &f);
private:
//record alleles as anc (ancestral) and derived (der)
std::unordered_map<std::string,std::vector<std::unordered_map<std::string,double> > > freqs;
Expand Down
82 changes: 71 additions & 11 deletions fn_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
#include "fnStats.h"

#include <iostream>
#include <random>
#include <string>

namespace opt = boost::program_options;

//void parseComLine(int argc, char **argv, std::string &infile, std::string &popmap, std::string &outgroup, std::string &abcd, int &vsize);
void parseComLine(int argc, char **argv, std::string &infile, std::string &popmap, std::string &abcd, int &vsize, bool &two, bool &three, bool &four);
void parseComLine(int argc, char **argv, std::string &infile, std::string &popmap, std::string &abcd, int &vsize, bool &two, bool &three, bool &four, int &bootstrap);

int main(int argc, char** argv) {

Expand All @@ -21,43 +22,101 @@ int main(int argc, char** argv) {
bool two = false;
bool three = false;
bool four = false;
int bootstrap = 0;

//parseComLine(argc,argv,infile,popmap,outgroup,abcd,vsize);
parseComLine(argc,argv,infile,popmap,abcd,vsize,two,three,four);
parseComLine(argc,argv,infile,popmap,abcd,vsize,two,three,four,bootstrap);

//std::cout << "Hello World!" << std::endl;

//start random number generator
std::default_random_engine generator;
unsigned int seed = time(0);
generator.seed(seed);


//fnFiles f(infile, popmap, outgroup, abcd, vsize);
fnFiles f(infile, popmap, abcd, vsize);
f.readfiles();
if(two == true)
{

f.checkF2(); // check to see if all taxa are present in input
}
else if (three == true)
{

f.checkF3(); // check to see if all taxa are present in input
}
else if(four == true)
{
fnFiles f(infile, popmap, abcd, vsize);
f.readfiles();
f.checkF4(); // check to see if all taxa are present in input
}
else
{
std::cerr << "No F statistic option was selected." << std::endl;
exit(EXIT_FAILURE);
}

fnStats fs(f.getLength(), f.ABCDmap);
fs.findAncestral(f);
fs.calcAllFreqs(f, f.ABCDmap);

fnStats fs(f.getLength(), f.ABCDmap);
fs.findAncestral(f);
fs.calcAllFreqs(f, f.ABCDmap);
fs.calcFstats(f);
if(two == true)
{
fs.calcF2(f);
}
else if (three == true)
{
fs.calcF3(f);
}
else if(four == true)
{
fs.calcF4(f);
}
else
{
std::cerr << "No F statistic option was selected." << std::endl;
exit(EXIT_FAILURE);
}
// do bootstrapping
for(int i=0; i<bootstrap; i++)
{
std::vector<int> bootv;
bootv.resize(f.getLength());
std::uniform_int_distribution<int> uniform(0,f.getLength()-1);
for(unsigned int i=0; i<f.getLength(); i++){
bootv[i] = uniform(generator);
}
if(two == true)
{
fnStats fsboot(fs,bootv,f.getLength(),f.ABCDmap);
fnFiles fboot(f,bootv,f.ABCDmap);
fsboot.calcF2(fboot);
}
else if (three == true)
{
fnStats fsboot(fs,bootv,f.getLength(),f.ABCDmap);
fnFiles fboot(f,bootv,f.ABCDmap);
fsboot.calcF3(fboot);
}
else if(four == true)
{
fnStats fsboot(fs,bootv,f.getLength(),f.ABCDmap);
fnFiles fboot(f,bootv,f.ABCDmap);
fsboot.calcF4(fboot);
}
else
{
std::cerr << "No F statistic option was selected." << std::endl;
exit(EXIT_FAILURE);
}

}

return 0;
}

//void parseComLine(int argc, char **argv, std::string &infile, std::string &popmap, std::string &outgroup, std::string &abcd, int &vsize)
void parseComLine(int argc, char **argv, std::string &infile, std::string &popmap, std::string &abcd, int &vsize, bool &two, bool &three, bool &four)
void parseComLine(int argc, char **argv, std::string &infile, std::string &popmap, std::string &abcd, int &vsize, bool &two, bool &three, bool &four, int &bootstrap)
{
opt::options_description desc("--- Option Descriptions ---");
desc.add_options()
Expand All @@ -69,6 +128,7 @@ void parseComLine(int argc, char **argv, std::string &infile, std::string &popma
("two,2", opt::bool_switch(&two), "Use this option to run only the F2 statistic.")
("three,3", opt::bool_switch(&three), "Use this option to run the F2 and F3 statistics.")
("four,4", opt::bool_switch(&four), "Use this option to run the F2, F3, and F4 statistics.")
("boot,b", opt::value<int>(&bootstrap)->required(), "specify the number of bootstrap replicates to perform.")
;

opt::variables_map vm;
Expand Down

0 comments on commit 5b6f0b4

Please sign in to comment.