Skip to content

Commit cd709d8

Browse files
committed
Merge pull request #26 from camaclean/issue_21
Added max-extend support
2 parents 85b33a8 + 51834eb commit cd709d8

File tree

9 files changed

+78
-31
lines changed

9 files changed

+78
-31
lines changed

src/calcmpiselect.hpp

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,14 @@ ParameterStream& operator>>(ParameterStream& in, XIhsScore& info)
5555
return in;
5656
}
5757

58-
void calcIhsMpi(const std::string& hapfile, const std::string& mapfile, const std::string& outfile, double cutoff, double minMAF, double scale, int binFactor)
58+
void calcIhsMpi(const std::string& hapfile,
59+
const std::string& mapfile,
60+
const std::string& outfile,
61+
double cutoff,
62+
double minMAF,
63+
double scale,
64+
unsigned long long maxExtend,
65+
int binFactor)
5966
{
6067
std::cout << "Calculating iHS using MPI." << std::endl;
6168
HapMap hap;
@@ -64,9 +71,9 @@ void calcIhsMpi(const std::string& hapfile, const std::string& mapfile, const st
6471
return;
6572
}
6673
std::cout << "Loaded " << hap.numSnps() << " snps." << std::endl;
67-
std::cout << "Haplotype count: " << hap.snpLength() << std::endl;
74+
std::cout << "Haplotype count: " << hap.snpLength() << " " << maxExtend << std::endl;
6875
hap.loadMap(mapfile.c_str());
69-
IHSFinder *ihsfinder = new IHSFinder(hap.snpLength(), cutoff, minMAF, scale, binFactor);
76+
IHSFinder *ihsfinder = new IHSFinder(hap.snpLength(), cutoff, minMAF, scale, maxExtend, binFactor);
7077
mpirpc::Manager *manager = new mpirpc::Manager();
7178
int procsToGo = manager->numProcs();
7279
std::cout << "Processes: " << procsToGo << std::endl;
@@ -169,7 +176,15 @@ void calcIhsMpi(const std::string& hapfile, const std::string& mapfile, const st
169176
delete manager;
170177
}
171178

172-
void calcXpehhMpi(const std::string& hapA, const std::string& hapB, const std::string& mapfile, const std::string& outfile, double cutoff, double minMAF, double scale, int binFactor)
179+
void calcXpehhMpi(const std::string& hapA,
180+
const std::string& hapB,
181+
const std::string& mapfile,
182+
const std::string& outfile,
183+
double cutoff,
184+
double minMAF,
185+
double scale,
186+
unsigned long long maxExtend,
187+
int binFactor)
173188
{
174189
std::cout << "Calculating XPEHH using MPI." << std::endl;
175190
HapMap mA, mB;
@@ -186,7 +201,7 @@ void calcXpehhMpi(const std::string& hapA, const std::string& hapB, const std::s
186201
std::cout << "Population A haplotype count: " << mA.snpLength() << std::endl;
187202
std::cout << "Population B haplotype count: " << mB.snpLength() << std::endl;
188203
mA.loadMap(mapfile.c_str());
189-
IHSFinder *ihsfinder = new IHSFinder(mA.snpLength(), cutoff, minMAF, scale, binFactor);
204+
IHSFinder *ihsfinder = new IHSFinder(mA.snpLength(), cutoff, minMAF, scale, maxExtend, binFactor);
190205
mpirpc::Manager *manager = new mpirpc::Manager();
191206
int procsToGo = manager->numProcs();
192207
std::cout << "Processes: " << procsToGo << std::endl;

src/calcnompiselect.hpp

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,14 @@
2525
#include <omp.h>
2626
#endif
2727

28-
void calcIhsNoMpi(const std::string& hap, const std::string& map, const std::string& outfile, double cutoff, double minMAF, double scale, int bins)
28+
void calcIhsNoMpi(const std::string& hap,
29+
const std::string& map,
30+
const std::string& outfile,
31+
double cutoff,
32+
double minMAF,
33+
double scale,
34+
unsigned long long maxExtend,
35+
int bins)
2936
{
3037
HapMap ctcmap;
3138
if (!ctcmap.loadHap(hap.c_str()))
@@ -38,7 +45,7 @@ void calcIhsNoMpi(const std::string& hap, const std::string& map, const std::str
3845
#endif
3946
ctcmap.loadMap(map.c_str());
4047
auto start = std::chrono::high_resolution_clock::now();
41-
IHSFinder *ihsfinder = new IHSFinder(ctcmap.snpLength(), cutoff, minMAF, scale, bins);
48+
IHSFinder *ihsfinder = new IHSFinder(ctcmap.snpLength(), cutoff, minMAF, scale, maxExtend, bins);
4249
ihsfinder->run(&ctcmap, 0ULL, ctcmap.numSnps());
4350
IHSFinder::LineMap res = ihsfinder->normalize();
4451

@@ -69,7 +76,15 @@ void calcIhsNoMpi(const std::string& hap, const std::string& map, const std::str
6976
delete ihsfinder;
7077
}
7178

72-
void calcXpehhNoMpi(const std::string& hapA, const std::string& hapB, const std::string& map, const std::string& outfile, double cutoff, double minMAF, double scale, int bins)
79+
void calcXpehhNoMpi(const std::string& hapA,
80+
const std::string& hapB,
81+
const std::string& map,
82+
const std::string& outfile,
83+
double cutoff,
84+
double minMAF,
85+
double scale,
86+
unsigned long long maxExtend,
87+
int bins)
7388
{
7489
HapMap hA, hB;
7590
if (!hA.loadHap(hapA.c_str()))
@@ -87,7 +102,7 @@ void calcXpehhNoMpi(const std::string& hapA, const std::string& hapB, const std:
87102
#endif
88103
hA.loadMap(map.c_str());
89104
auto start = std::chrono::high_resolution_clock::now();
90-
IHSFinder *ihsfinder = new IHSFinder(hA.snpLength(), cutoff, minMAF, scale, bins);
105+
IHSFinder *ihsfinder = new IHSFinder(hA.snpLength(), cutoff, minMAF, scale, maxExtend, bins);
91106
ihsfinder->runXpehh(&hA, &hB, 0ULL, hA.numSnps());
92107

93108
auto tend = std::chrono::high_resolution_clock::now();

src/ehhfinder.cpp

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,10 @@
2121
#include "hapmap.hpp"
2222
#include <algorithm>
2323

24-
EHHFinder::EHHFinder(std::size_t snpDataSizeA, std::size_t snpDataSizeB, std::size_t maxBreadth, double cutoff, double minMAF, double scale)
24+
EHHFinder::EHHFinder(std::size_t snpDataSizeA, std::size_t snpDataSizeB, std::size_t maxBreadth, double cutoff, double minMAF, double scale, long long unsigned int maxExtend)
2525
: m_maxBreadth(maxBreadth)
2626
, m_bufferSize(snpDataSizeA*maxBreadth)
27+
, m_maxExtend(maxExtend)
2728
, m_cutoff(cutoff)
2829
, m_minMAF(minMAF)
2930
, m_scale(scale)
@@ -301,6 +302,7 @@ std::pair< EHH, EHH > EHHFinder::findXPEHH(HapMap* hmA, HapMap* hmB, std::size_t
301302
double lastEhhB = f*f+(1.0-f)*(1.0-f);
302303
f = (ret.first.num+ret.second.num)*m_freqP;
303304
double lastEhhP = f*f+(1.0-f)*(1.0-f);
305+
unsigned long long locusPysPos = hmA->physicalPosition(focus);
304306

305307
setInitialXPEHH(focus);
306308
calcBranchesXPEHH(focus-1);
@@ -310,8 +312,8 @@ std::pair< EHH, EHH > EHHFinder::findXPEHH(HapMap* hmA, HapMap* hmB, std::size_t
310312
{
311313
for (std::size_t currLine = focus - 2;; --currLine)
312314
{
313-
314-
double scale = (double)(m_scale) / (double)(hmA->physicalPosition(currLine+2) - hmA->physicalPosition(currLine+1));
315+
unsigned long long currPhysPos = hmA->physicalPosition(currLine+1);
316+
double scale = (double)(m_scale) / (double)(hmA->physicalPosition(currLine+2) - currPhysPos);
315317
if (scale > 1)
316318
scale=1;
317319

@@ -332,6 +334,8 @@ std::pair< EHH, EHH > EHHFinder::findXPEHH(HapMap* hmA, HapMap* hmB, std::size_t
332334

333335
if ((m_single0count+m_single1count) == (hmA->snpLength()+hmB->snpLength()))
334336
break;
337+
if (m_maxExtend != 0 && locusPysPos - currPhysPos > m_maxExtend)
338+
break;
335339
if(currLine == 0)
336340
{
337341
++(*reachedEnd);
@@ -353,7 +357,8 @@ std::pair< EHH, EHH > EHHFinder::findXPEHH(HapMap* hmA, HapMap* hmB, std::size_t
353357
m_single1count = 0ULL;
354358
for (std::size_t currLine = focus + 2; currLine < hmA->numSnps(); ++currLine)
355359
{
356-
double scale = (double)(m_scale) / (double)(hmA->physicalPosition(currLine-1) - hmA->physicalPosition(currLine-2));
360+
unsigned long long currPhysPos = hmA->physicalPosition(currLine-1);
361+
double scale = (double)(m_scale) / (double)(currPhysPos - hmA->physicalPosition(currLine-2));
357362
if (scale > 1)
358363
scale=1;
359364

@@ -374,6 +379,8 @@ std::pair< EHH, EHH > EHHFinder::findXPEHH(HapMap* hmA, HapMap* hmB, std::size_t
374379

375380
if ((m_single0count+m_single1count) == (hmA->snpLength()+hmB->snpLength()))
376381
break;
382+
if (m_maxExtend != 0 && currPhysPos - locusPysPos > m_maxExtend)
383+
break;
377384
if (currLine == hmA->numSnps()-1)
378385
{
379386
++(*reachedEnd);
@@ -477,15 +484,16 @@ EHH EHHFinder::find(HapMap* hapmap, std::size_t focus, std::atomic<unsigned long
477484
double freq1 = 1.0/(double)ret.num;
478485
double probSingle = freq1*freq1;
479486
double probNotSingle = freq0*freq0;
480-
double probs, probsNot;
481487
double lastProbs = 1.0, lastProbsNot = 1.0;
488+
unsigned long long locusPysPos = hapmap->physicalPosition(focus);
482489

483490
setInitial(focus, focus-1);
484491

485492
for (std::size_t currLine = focus - 2;; --currLine)
486493
{
487494
HapStats stats;
488-
double scale = (double)(m_scale) / (double)(hapmap->physicalPosition(currLine+2) - hapmap->physicalPosition(currLine+1));
495+
unsigned long long currPhysPos = hapmap->physicalPosition(currLine+1);
496+
double scale = (double)(m_scale) / (double)(hapmap->physicalPosition(currLine+2) - currPhysPos);
489497
if (scale > 1)
490498
scale=1;
491499

@@ -504,7 +512,10 @@ EHH EHHFinder::find(HapMap* hapmap, std::size_t focus, std::atomic<unsigned long
504512
if (ehhsave)
505513
ret.upstream.push_back(std::move(stats));
506514

507-
515+
if (m_maxExtend != 0 && locusPysPos - currPhysPos > m_maxExtend) {
516+
//std::cout << m_maxExtend << std::endl;
517+
break;
518+
}
508519
if (lastProbs <= m_cutoff + 1e-15 && lastProbsNot <= m_cutoff + 1e-15)
509520
break;
510521
if ((m_single0count+m_single1count) == hapmap->snpLength())
@@ -521,12 +532,11 @@ EHH EHHFinder::find(HapMap* hapmap, std::size_t focus, std::atomic<unsigned long
521532
for (std::size_t currLine = focus + 2; currLine < hapmap->numSnps(); ++currLine)
522533
{
523534
HapStats stats;
535+
unsigned long long currPhysPos = hapmap->physicalPosition(currLine-1);
524536
double scale = double(m_scale) / double(hapmap->physicalPosition(currLine-1) - hapmap->physicalPosition(currLine-2));
525537
if (scale > 1)
526538
scale=1;
527539

528-
int core0 = 0, core1 = 0;
529-
530540
calcBranches(hapmap, focus, currLine, freq0, freq1, stats);
531541

532542
stats.probs += probSingle*m_single1count;
@@ -544,7 +554,9 @@ EHH EHHFinder::find(HapMap* hapmap, std::size_t focus, std::atomic<unsigned long
544554
if (ehhsave)
545555
ret.downstream.push_back(std::move(stats));
546556

547-
if (lastProbs <= m_cutoff + 1e-15 && lastProbsNot <= m_cutoff + 1e-15 || (m_single0count+m_single1count) == hapmap->snpLength())
557+
if (m_maxExtend != 0 && currPhysPos - locusPysPos > m_maxExtend)
558+
break;
559+
if ((lastProbs <= m_cutoff + 1e-15 && lastProbsNot <= m_cutoff + 1e-15) || (m_single0count+m_single1count) == hapmap->snpLength())
548560
break;
549561

550562
if (currLine == hapmap->numSnps()-1)

src/ehhfinder.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
class EHHFinder
2727
{
2828
public:
29-
EHHFinder(std::size_t SnpDataSizeA, std::size_t snpDataSizeB, std::size_t maxBreadth, double cutoff, double minMAF, double scale);
29+
explicit EHHFinder(std::size_t snpDataSizeA, std::size_t snpDataSizeB, std::size_t maxBreadth, double cutoff, double minMAF, double scale, unsigned long long maxExtend);
3030
EHH find(HapMap* hapmap, std::size_t focus, std::atomic<unsigned long long>* reachedEnd, std::atomic<unsigned long long>* outsideMaf, bool ehhsave = false);
3131
std::pair<EHH,EHH> findXPEHH(HapMap* hmA, HapMap *hmB, std::size_t focus, std::atomic<unsigned long long>* reachedEnd);
3232
~EHHFinder();
@@ -40,6 +40,7 @@ class EHHFinder
4040

4141
std::size_t m_maxBreadth;
4242
std::size_t m_bufferSize;
43+
unsigned long long m_maxExtend;
4344
HapMap::PrimitiveType *m_parent0;
4445
HapMap::PrimitiveType *m_parent1;
4546
HapMap::PrimitiveType *m_branch0;

src/ihsfinder.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919

2020
#include "ihsfinder.hpp"
2121

22-
IHSFinder::IHSFinder(std::size_t snpLength, double cutoff, double minMAF, double scale, int bins)
23-
: m_snpLength(snpLength), m_cutoff(cutoff), m_minMAF(minMAF), m_scale(scale), m_bins(bins), m_counter{}, m_reachedEnd{}, m_outsideMaf{}, m_nanResults{}
22+
IHSFinder::IHSFinder(std::size_t snpLength, double cutoff, double minMAF, double scale, unsigned long long maxExtend, int bins)
23+
: m_snpLength(snpLength), m_cutoff(cutoff), m_minMAF(minMAF), m_scale(scale), m_maxExtend(maxExtend), m_bins(bins), m_counter{}, m_reachedEnd{}, m_outsideMaf{}, m_nanResults{}
2424
{}
2525

2626
void IHSFinder::processEHH(const EHH& ehh, std::size_t line)
@@ -139,7 +139,7 @@ void IHSFinder::runXpehh(HapMap* mA, HapMap* mB, std::size_t start, std::size_t
139139
{
140140
#pragma omp parallel shared(mA,mB,start,end)
141141
{
142-
EHHFinder finder(mA->snpDataSize(), mB->snpDataSize(), 2000, m_cutoff, m_minMAF, m_scale);
142+
EHHFinder finder(mA->snpDataSize(), mB->snpDataSize(), 2000, m_cutoff, m_minMAF, m_scale, m_maxExtend);
143143
#pragma omp for schedule(dynamic,10)
144144
for(size_t i = start; i < end; ++i)
145145
{
@@ -160,7 +160,7 @@ void IHSFinder::run(HapMap* map, std::size_t start, std::size_t end)
160160
{
161161
#pragma omp parallel shared(map, start, end)
162162
{
163-
EHHFinder finder(map->snpDataSize(), map->snpDataSize(), 2000, m_cutoff, m_minMAF, m_scale);
163+
EHHFinder finder(map->snpDataSize(), map->snpDataSize(), 2000, m_cutoff, m_minMAF, m_scale, m_maxExtend);
164164
#pragma omp for schedule(dynamic,10)
165165
for(size_t i = start; i < end; ++i)
166166
{

src/ihsfinder.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class IHSFinder
4040
using FreqVecMap = std::map<double, std::vector<double>>;
4141
using StatsMap = std::map<double, Stats>;
4242

43-
IHSFinder(std::size_t snpLength, double cutoff, double minMAF, double scale, int bins);
43+
IHSFinder(std::size_t snpLength, double cutoff, double minMAF, double scale, unsigned long long maxExtend, int bins);
4444
FreqVecMap unStdIHSByFreq() const { return m_unStandIHSByFreq; }
4545
IhsInfoMap unStdIHSByLine() const { return m_unStandIHSByLine; }
4646
XIhsInfoMap unStdXIHSByLine() const { return m_unStandXIHSByLine; }
@@ -65,6 +65,7 @@ class IHSFinder
6565
double m_cutoff;
6666
double m_minMAF;
6767
double m_scale;
68+
unsigned long long m_maxExtend;
6869
int m_bins;
6970

7071
std::mutex m_mutex;

src/main-ehh.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,9 @@ int main(int argc, char** argv)
3535
Argument<double> cutoff('c', "cutoff", "EHH cutoff value (default: 0.05)", false, false, 0.05);
3636
Argument<double> minMAF('b', "minmaf", "Minimum allele frequency (default: 0.05)", false, false, 0.05);
3737
Argument<unsigned long long> scale('s', "scale", "Gap scale parameter in bp, used to scale gaps > scale parameter as in Voight, et al.", false, false, 20000);
38+
Argument<unsigned long long> maxExtend('e', "max-extend", "Maximum distance in bp to traverse when calculating EHH (default: 0 (disabled))", false, false, 0);
3839
Argument<const char*> locus('l', "locus", "Locus", false, false, 0);
39-
ArgParse argparse({&help, &version, &hap, &map, &locus, &cutoff, &minMAF, &scale}, "Usage: ehhbin --map input.map --hap input.hap --locus id");
40+
ArgParse argparse({&help, &version, &hap, &map, &locus, &cutoff, &minMAF, &scale, &maxExtend}, "Usage: ehhbin --map input.map --hap input.hap --locus id");
4041
if (!argparse.parseArguments(argc, argv))
4142
{
4243
return 3;
@@ -72,7 +73,7 @@ int main(int argc, char** argv)
7273
}
7374
std::atomic<unsigned long long> reachedEnd{};
7475
std::atomic<unsigned long long> outsideMaf{};
75-
EHHFinder finder(hmap.snpDataSize(), hmap.snpDataSize(), 1000, cutoff.value(), minMAF.value(), (double) scale.value());
76+
EHHFinder finder(hmap.snpDataSize(), hmap.snpDataSize(), 1000, maxExtend.value(), cutoff.value(), minMAF.value(), (double) scale.value());
7677
e = finder.find(&hmap, l, &reachedEnd, &outsideMaf, true);
7778
e.printEHH(&hmap);
7879
std::cout << "iHS: " << log(e.iHH_0/e.iHH_1) << std::endl;

src/main-ihs.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,9 @@ int main(int argc, char** argv)
4545
Argument<double> minMAF('f', "minmaf", "Minimum allele frequency (default: 0.05)", false, false, 0.05);
4646
Argument<int> binfac('b', "bin", "Number of frequency bins for iHS normalization (default: 50)", false, false, 50);
4747
Argument<unsigned long long> scale('s', "scale", "Gap scale parameter in bp, used to scale gaps > scale parameter as in Voight, et al.", false, false, 20000);
48+
Argument<unsigned long long> maxExtend('e', "max-extend", "Maximum distance in bp to traverse when calculating EHH (default: 0 (disabled))", false, false, 0);
4849
Argument<std::string> outfile('o', "out", "Output file", false, false, "out.txt");
49-
ArgParse argparse({&help, &version, &hap, &map, &outfile, &cutoff, &minMAF, &scale, &binfac}, "Usage: ihsbin --map input.map --hap input.hap [--ascii] [--out outfile]");
50+
ArgParse argparse({&help, &version, &hap, &map, &outfile, &cutoff, &minMAF, &scale, &maxExtend, &binfac}, "Usage: ihsbin --map input.map --hap input.hap [--ascii] [--out outfile]");
5051
if (!argparse.parseArguments(argc, argv))
5152
{
5253
ret = 1;
@@ -72,7 +73,7 @@ int main(int argc, char** argv)
7273
numSnps = HapMap::querySnpLength(hap.value().c_str());
7374
std::cout << "Chromosomes per SNP: " << numSnps << std::endl;
7475

75-
calcIhs(hap.value(), map.value(), outfile.value(), cutoff.value(), minMAF.value(), (double) scale.value(), binfac.value());
76+
calcIhs(hap.value(), map.value(), outfile.value(), cutoff.value(), minMAF.value(), (double) scale.value(), maxExtend.value(), binfac.value());
7677

7778
out:
7879
#if MPI_FOUND

src/main-xpehh.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,9 @@ int main(int argc, char** argv)
4747
Argument<double> minMAF('f', "minmaf", "Minimum allele frequency (default: 0.05)", false, false, 0.00);
4848
Argument<int> binfac('b', "bin", "Number of frequency bins for iHS normalization (default: 50)", false, false, 50);
4949
Argument<unsigned long long> scale('s', "scale", "Gap scale parameter in bp, used to scale gaps > scale parameter as in Voight, et al.", false, false, 20000);
50+
Argument<unsigned long long> maxExtend('e', "max-extend", "Maximum distance in bp to traverse when calculating EHH (default: 0 (disabled))", false, false, 0);
5051
Argument<std::string> outfile('o', "out", "Output file", false, false, "out.txt");
51-
ArgParse argparse({&help, &version, &hapA, &hapB, &map, &outfile, &cutoff, &minMAF, &scale, &binfac}, "Usage: xpehhbin --map input.map --hapA inputA.hap --hapB inputB.hap");
52+
ArgParse argparse({&help, &version, &hapA, &hapB, &map, &outfile, &cutoff, &minMAF, &maxExtend, &scale, &binfac}, "Usage: xpehhbin --map input.map --hapA inputA.hap --hapB inputB.hap");
5253
if (!argparse.parseArguments(argc, argv))
5354
{
5455
ret = 1;
@@ -76,7 +77,7 @@ int main(int argc, char** argv)
7677
numSnps = HapMap::querySnpLength(hapB.value().c_str());
7778
std::cout << "Haplotypes in population B: " << numSnps << std::endl;
7879

79-
calcXpehh(hapA.value(), hapB.value(), map.value(), outfile.value(), cutoff.value(), minMAF.value(), (double) scale.value(), binfac.value());
80+
calcXpehh(hapA.value(), hapB.value(), map.value(), outfile.value(), cutoff.value(), minMAF.value(), (double) scale.value(), maxExtend.value(), binfac.value());
8081

8182
out:
8283
#if MPI_FOUND

0 commit comments

Comments
 (0)