diff --git a/ngsLD.cpp b/ngsLD.cpp index c62e317..7f24c8a 100644 --- a/ngsLD.cpp +++ b/ngsLD.cpp @@ -21,7 +21,7 @@ #include #include "ngsLD.hpp" -char const* version = "1.2.1"; +char const* version = "1.2.0"; int main (int argc, char** argv) { @@ -82,7 +82,7 @@ int main (int argc, char** argv) { ///////////////////// // Read input data // ///////////////////// - // Read data from GENO file + // Read GENO file if(pars->verbose >= 1) fprintf(stderr, "> Reading data from file...\n"); double ***tmp = read_geno(pars->in_geno, pars->in_bin, pars->in_probs, &pars->in_logscale, pars->n_ind, pars->n_sites); @@ -114,14 +114,13 @@ int main (int argc, char** argv) { pars->expected_geno[s][i] = pars->geno_lkl[s][i][1] + 2*pars->geno_lkl[s][i][2]; } - // Read positions from file if(pars->verbose >= 1) fprintf(stderr, "==> Getting sites coordinates\n"); if(pars->in_pos){ pars->pos_dist = read_dist(pars->in_pos, (pars->in_pos_header ? 1 : 0), pars->n_sites); if(pars->verbose >= 6) - for(uint64_t s = 0; s < pars->n_sites; s++) + for(uint64_t s = 0; s < min(10, pars->n_sites); s++) fprintf(stderr, "%lu\t%f\n", s, pars->pos_dist[s]); if(read_file(pars->in_pos, &pars->labels, (pars->in_pos_header ? 1 : 0)) != pars->n_sites) error(__FUNCTION__, "invalid number of lines in POS file"); @@ -137,12 +136,12 @@ int main (int argc, char** argv) { pars->labels = init_ptr(pars->n_sites, 0, "Site:#"); } - - // DEBUG - if(pars->verbose >= 7) + if(pars->verbose >= 7){ + fprintf(stderr, "==> Geno data\n"); for(uint64_t s = 0; s < min(10, pars->n_sites); s++) fprintf(stderr, "%lu\t%s\t%f (%f %f %f)\n", s, pars->labels[s], pars->maf[s], pars->geno_lkl[s][0][0], pars->geno_lkl[s][0][1], pars->geno_lkl[s][0][2]); + } @@ -265,13 +264,13 @@ void calc_pair_LD (void *pth){ // Stop if site 1 is < min_maf if(p->pars->maf[s1] < p->pars->min_maf) { if(p->pars->verbose > 8) - fprintf(stderr, "\tLow MAF: %f\n", p->pars->maf[s1]); + fprintf(stderr, "\tLow MAF on site1: %f\n", p->pars->maf[s1]); break; } // Skip if site 2 is < min_maf if(p->pars->maf[s2] < p->pars->min_maf) { if(p->pars->verbose > 8) - fprintf(stderr, "\tLow MAF: %f\n", p->pars->maf[s2]); + fprintf(stderr, "\tLow MAF on site2: %f\n", p->pars->maf[s2]); s2++; continue; } diff --git a/shared/gen_func.cpp b/shared/gen_func.cpp index 4b28576..6d4952a 100644 --- a/shared/gen_func.cpp +++ b/shared/gen_func.cpp @@ -915,9 +915,8 @@ void call_geno(double *geno, int n_geno, bool log_scale, double N_prob_thresh, d -// Calculate posterio probabilities (PP) from GLs and prior -// GL in log-space by default, but can be in normal-space if flag set -// prior and PP always given log-space +// Calculate posterior probabilities (PP) from GLs and prior +// GL, prior and PP always given log-space void post_prob(double *pp, double *lkl, double *prior, uint64_t n_geno){ for(uint64_t cnt = 0; cnt < n_geno; cnt++){ pp[cnt] = lkl[cnt]; @@ -936,11 +935,10 @@ void post_prob(double *pp, double *lkl, double *prior, uint64_t n_geno){ // Calculate HWE genotype frequencies // MAF and F in normal-space -// Genotype frequencies in log-space -void calc_HWE(double *genot_freq, double freq, double F, bool log_scale){ - genot_freq[0] = pow(1-freq,2) + (1-freq)*freq*F; - genot_freq[1] = 2*(1-freq)*freq - 2*(1-freq)*freq*F; - genot_freq[2] = pow(freq,2) + (1-freq)*freq*F; +void calc_HWE(double *genot_freq, double maf, double F, bool log_scale){ + genot_freq[0] = pow(1-maf,2) + (1-maf)*maf*F; + genot_freq[1] = 2*(1-maf)*maf - 2*(1-maf)*maf*F; + genot_freq[2] = pow(maf,2) + (1-maf)*maf*F; if(log_scale) conv_space(genot_freq, N_GENO, log); @@ -1001,7 +999,7 @@ double est_maf(uint64_t n_ind, double **pdg, double *indF, bool ignore_miss_data num += pp[1] + pp[2]*(2-F); den += 2*pp[1] + (pp[0]+pp[2])*(2-F); - //printf("Ind: %lu; num: %f; den: %f; pp: %f %f %f; IBD: %f\n", i, num, den, pp[0], pp[1], pp[2], F); + //printf("Ind: %lu; gl: %f %f %f; pp: %f %f %f; IBD: %f; num: %f; den: %f\n", i, pdg[i][0], pdg[i][1], pdg[i][2], pp[0], pp[1], pp[2], F, num, den); } freq = num/den;