diff --git a/Assembly.cpp b/Assembly.cpp index 9741079..3335655 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -649,13 +649,6 @@ static void worker_ovec_cal0(void *data, long i, int tid) { ha_ovec_buf_t *b = ((ha_ovec_buf_t**)data)[tid]; int fully_cov, abnormal; - // if(i != 33) return; - // fprintf(stderr, "[M::%s-beg] rid->%ld\n", __func__, i); - // if (memcmp("m64012_190920_173625/88015004/ccs", Get_NAME((R_INF), i), Get_NAME_LENGTH((R_INF),i)) == 0) { - // fprintf(stderr, "[M::%s-beg] rid->%ld\n", __func__, i); - // } else { - // return; - // } ha_get_candidates_interface(b->ab, i, &b->self_read, &b->olist, &b->olist_hp, &b->clist, 0.02, asm_opt.max_n_chain, 1, NULL/**&(b->k_flag)**/, &b->r_buf, &(R_INF.paf[i]), &(R_INF.reverse_paf[i]), &(b->tmp_region), NULL, &(b->sp)); @@ -664,17 +657,24 @@ static void worker_ovec_cal0(void *data, long i, int tid) clear_Round2_alignment(&b->round2); correct_overlap(&b->olist, &R_INF, &b->self_read, &b->correct, &b->ovlp_read, &b->POA_Graph, &b->DAGCon, - &b->cigar1, &b->hap, &b->round2, &b->r_buf, &(b->tmp_region.w_list), 0, 0, &fully_cov, &abnormal); + &b->cigar1, &b->hap, &b->round2, &b->r_buf, &(b->tmp_region.w_list), 0, 0/**1***/, &fully_cov, &abnormal); b->num_read_base += b->self_read.length; b->num_correct_base += b->correct.corrected_base; b->num_recorrect_base += b->round2.dumy.corrected_base; - - R_INF.paf[i].is_fully_corrected = 0; - R_INF.paf[i].is_abnormal = abnormal; + // R_INF.paf[i].is_fully_corrected = 0; + // R_INF.paf[i].is_abnormal = abnormal; R_INF.trio_flag[i] = AMBIGU; + + // R_INF.paf[i].is_fully_corrected = 0; + // if (fully_cov) { + // if (get_cigar_errors(&b->cigar1) == 0 && get_cigar_errors(&b->round2.cigar) == 0) + // R_INF.paf[i].is_fully_corrected = 1; + // } + // R_INF.paf[i].is_abnormal = abnormal; + // R_INF.trio_flag[i] = AMBIGU; push_final_overlaps(&(R_INF.paf[i]), R_INF.reverse_paf, &b->olist, 1); push_final_overlaps(&(R_INF.reverse_paf[i]), R_INF.reverse_paf, &b->olist, 2); @@ -920,7 +920,7 @@ void rescue_hp_reads(ha_ovec_buf_t **b) int hom_cov, het_cov; ha_flt_tab_hp = ha_idx_hp = NULL; if (!(asm_opt.flag & HA_F_NO_KMER_FLT)) { - ha_flt_tab_hp = ha_ft_gen(&asm_opt, &R_INF, &hom_cov, 1); + ha_flt_tab_hp = ha_ft_gen(&asm_opt, &R_INF, &hom_cov, 1, 0); } ha_idx_hp = ha_pt_gen(&asm_opt, ha_flt_tab, 1, 1, &R_INF, &hom_cov, &het_cov); @@ -1038,7 +1038,7 @@ void ha_overlap_and_correct(int round) ///debug_print_pob_regions(); } -void ha_overlap_cal(int round) +void ha_overlap_cal(int round, int read_from_store) { int i, hom_cov, het_cov; ha_ovec_buf_t **b; @@ -1049,9 +1049,9 @@ void ha_overlap_cal(int round) for (i = 0; i < asm_opt.thread_num; ++i) b[i] = ha_ovec_init(0, (round == asm_opt.number_of_round - 1),0); if(ha_idx) hom_cov = asm_opt.hom_cov; - if(ha_idx == NULL) ha_idx = ha_pt_gen(&asm_opt, ha_flt_tab, round == 0? 0 : 1, 0, &R_INF, &hom_cov, &het_cov); // build the index + if(ha_idx == NULL) ha_idx = ha_pt_gen(&asm_opt, ha_flt_tab, ((round == 0)&&(read_from_store == 0))?0:1, 0, &R_INF, &hom_cov, &het_cov); // build the index ///debug_adapter(&asm_opt, &R_INF); - if (round == 0 && ha_flt_tab == 0) // then asm_opt.hom_cov hasn't been updated + if (/**round == 0 &&**/ ha_flt_tab == 0) // then asm_opt.hom_cov hasn't been updated ha_opt_update_cov(&asm_opt, hom_cov); het_cnt = NULL; if(round == asm_opt.number_of_round-1 && asm_opt.is_dbg_het_cnt) CALLOC(het_cnt, R_INF.total_reads); @@ -1075,6 +1075,8 @@ void ha_overlap_cal(int round) ha_ovec_destroy(b[i]); } free(b); + asm_opt.hom_cov = hom_cov; + asm_opt.het_cov = het_cov; } @@ -1787,7 +1789,7 @@ void hap_recalculate_peaks(char* output_file_name) int hom_cov, het_cov; // construct hash table for high occurrence k-mers if (!(asm_opt.flag & HA_F_NO_KMER_FLT)) { - ha_flt_tab = ha_ft_gen(&asm_opt, &R_INF, &hom_cov, 0); + ha_flt_tab = ha_ft_gen(&asm_opt, &R_INF, &hom_cov, 0, 0); ha_opt_update_cov(&asm_opt, hom_cov); } free(R_INF.read_length); @@ -1898,13 +1900,13 @@ int ha_assemble_ovec(void) // construct hash table for high occurrence k-mers if (!(asm_opt.flag & HA_F_NO_KMER_FLT) && ha_flt_tab == NULL) { - ha_flt_tab = ha_ft_gen(&asm_opt, &R_INF, &hom_cov, 0); + ha_flt_tab = ha_ft_gen(&asm_opt, &R_INF, &hom_cov, 0, 0); ha_opt_update_cov(&asm_opt, hom_cov); } // error correction assert(asm_opt.number_of_round > 0); ha_opt_reset_to_round(&asm_opt, r); // this update asm_opt.roundID and a few other fields - ha_overlap_cal(r); + ha_overlap_cal(r, 0); fprintf(stderr, "[M::%s] size of buffer: %.3fGB\n", __func__, asm_opt.mem_buf / 1073741824.0); @@ -1944,7 +1946,7 @@ int ha_assemble(void) // construct hash table for high occurrence k-mers if (!(asm_opt.flag & HA_F_NO_KMER_FLT) && ha_flt_tab == NULL) { - ha_flt_tab = ha_ft_gen(&asm_opt, &R_INF, &hom_cov, 0); + ha_flt_tab = ha_ft_gen(&asm_opt, &R_INF, &hom_cov, 0, 0); ha_opt_update_cov(&asm_opt, hom_cov); } // error correction @@ -1978,3 +1980,69 @@ int ha_assemble(void) destory_All_reads(&R_INF); return 0; } + + +int ha_assemble_pair(void) +{ + extern void ha_extract_print_list(const All_reads *rs, int n_rounds, const char *o); + int r = 0, hom_cov = -1, ovlp_loaded = 0; memset((&R_INF), 0, sizeof(R_INF)); + + if (asm_opt.load_index_from_disk && load_all_data_from_disk(&R_INF.paf, &R_INF.reverse_paf, asm_opt.output_file_name)) { + ovlp_loaded = 1; + fprintf(stderr, "[M::%s::%.3f*%.2f] ==> loaded corrected reads and overlaps from disk\n", __func__, yak_realtime(), yak_cpu_usage()); + if (asm_opt.extract_list) { + ha_extract_print_list(&R_INF, asm_opt.extract_iter, asm_opt.extract_list); + exit(0); + } + if (asm_opt.flag & HA_F_WRITE_EC) Output_corrected_reads(); + if (asm_opt.flag & HA_F_WRITE_PAF) Output_PAF(); + if (asm_opt.het_cov == -1024) hap_recalculate_peaks(asm_opt.output_file_name), ovlp_loaded = 2; + } + + + if (!ovlp_loaded) { + + if(!append_All_reads(&R_INF, asm_opt.output_file_name, 0)) { + fprintf(stderr, "[M::%s::] Cannot load %s.0\n", __func__, asm_opt.output_file_name); + exit(1); + } + + if(!append_All_reads(&R_INF, asm_opt.output_file_name, 1)) { + fprintf(stderr, "[M::%s::] Cannot load %s.1\n", __func__, asm_opt.output_file_name); + exit(1); + } + // Output_corrected_reads(); exit(0); + + ha_flt_tab = ha_idx = NULL; r = asm_opt.number_of_round - 1; + if((asm_opt.flag & HA_F_VERBOSE_GFA)) load_pt_index(&ha_flt_tab, &ha_idx, &R_INF, &asm_opt, asm_opt.output_file_name), load_ct_index(&ha_ct_table, asm_opt.output_file_name); + + // construct hash table for high occurrence k-mers + if (!(asm_opt.flag & HA_F_NO_KMER_FLT) && ha_flt_tab == NULL) { + ha_flt_tab = ha_ft_gen(&asm_opt, &R_INF, &hom_cov, 0, 1); + ha_opt_update_cov(&asm_opt, hom_cov); + } + // error correction + assert(asm_opt.number_of_round > 0); + ha_opt_reset_to_round(&asm_opt, r); // this update asm_opt.roundID and a few other fields + ha_overlap_cal(r, 1); + fprintf(stderr, "[M::%s] size of buffer: %.3fGB\n", __func__, asm_opt.mem_buf / 1073741824.0); + fprintf(stderr, "[M::%s::%.3f*%.2f@%.3fGB] ==> found overlaps for the final round\n", __func__, yak_realtime(), + yak_cpu_usage(), yak_peakrss_in_gb()); + + + if (asm_opt.flag & HA_F_WRITE_EC) Output_corrected_reads(); + ha_print_ovlp_stat(R_INF.paf, R_INF.reverse_paf, R_INF.total_reads); + ha_ft_destroy(ha_flt_tab); + if (asm_opt.flag & HA_F_WRITE_PAF) Output_PAF(); + ha_triobin(&asm_opt); + + } + if(ovlp_loaded == 2) ovlp_loaded = 0; + ha_opt_update_cov_min(&asm_opt, asm_opt.hom_cov, MIN_N_CHAIN); + + build_string_graph_without_clean(asm_opt.min_overlap_coverage, R_INF.paf, R_INF.reverse_paf, + R_INF.total_reads, R_INF.read_length, asm_opt.min_overlap_Len, asm_opt.max_hang_Len, asm_opt.clean_round, + asm_opt.gap_fuzz, asm_opt.min_drop_rate, asm_opt.max_drop_rate, asm_opt.output_file_name, asm_opt.large_pop_bubble_size, 0, !ovlp_loaded); + destory_All_reads(&R_INF); + return 0; +} \ No newline at end of file diff --git a/Assembly.h b/Assembly.h index 37ea0ec..0a0baae 100644 --- a/Assembly.h +++ b/Assembly.h @@ -44,6 +44,7 @@ typedef struct { } ha_ovec_buf_t; int ha_assemble(void); +int ha_assemble_pair(void); void ug_idx_build(ma_ug_t *ug, int hap_n); ha_ovec_buf_t *ha_ovec_init(int is_final, int save_ov, int is_ug); ha_ovec_buf_t *ha_ovec_buf_init(void *km, int is_final, int save_ov, int is_ug); diff --git a/CommandLines.cpp b/CommandLines.cpp index 1bed13d..33499e5 100644 --- a/CommandLines.cpp +++ b/CommandLines.cpp @@ -64,6 +64,7 @@ static ko_longopt_t long_options[] = { { "ul-cut", ko_required_argument, 349}, { "dual-scaf", ko_no_argument, 350}, { "scaf-gap", ko_required_argument, 351}, + { "sec-in", ko_required_argument, 352}, // { "path-round", ko_required_argument, 348}, { 0, 0, 0 } }; @@ -305,6 +306,7 @@ void init_opt(hifiasm_opt_t* asm_opt) asm_opt->self_scaf_min = 250000; asm_opt->self_scaf_reliable_min = 5000000; asm_opt->self_scaf_gap_max = 3000000; + asm_opt->sec_in = NULL; } void destory_enzyme(enzyme* f) @@ -853,6 +855,7 @@ int CommandLine_process(int argc, char *argv[], hifiasm_opt_t* asm_opt) else if (c == 349) asm_opt->ul_min_base = atol(opt.arg); else if (c == 350) asm_opt->self_scaf = 1; else if (c == 351) asm_opt->self_scaf_gap_max = atol(opt.arg); + else if (c == 352) get_hic_enzymes(opt.arg, &(asm_opt->sec_in), 0); else if (c == 'l') { ///0: disable purge_dup; 1: purge containment; 2: purge overlap asm_opt->purge_level_primary = asm_opt->purge_level_trio = atoi(opt.arg); } diff --git a/CommandLines.h b/CommandLines.h index c66f190..9cc99d1 100644 --- a/CommandLines.h +++ b/CommandLines.h @@ -5,7 +5,7 @@ #include #include -#define HA_VERSION "0.19.7-r598" +#define HA_VERSION "0.19.8-r602" #define VERBOSE 0 @@ -45,6 +45,7 @@ typedef struct { enzyme *hic_reads[2]; enzyme *hic_enzymes; enzyme *ar; + enzyme *sec_in; int extract_iter; int thread_num; int k_mer_length; diff --git a/Overlaps.cpp b/Overlaps.cpp index 3bd0420..e546e4a 100644 --- a/Overlaps.cpp +++ b/Overlaps.cpp @@ -861,6 +861,8 @@ void init_ma_hit_t_alloc(ma_hit_t_alloc* x) x->size = 0; x->buffer = NULL; x->length = 0; + x->is_fully_corrected = 0; + x->is_abnormal = 0; } void clear_ma_hit_t_alloc(ma_hit_t_alloc* x) @@ -10743,6 +10745,7 @@ ma_hit_t_alloc* sources, R_to_U* ruIndex, int print_seq, const char* prefix, FIL uint32_t x = p->a[j]>>33; if(xtotal_reads) { + fprintf(fp, "A\t%s\t%d\t%c\t%.*s\t%d\t%d\tid:i:%d\tHG:A:%c\n", name, l, "+-"[p->a[j]>>32&1], (int)Get_NAME_LENGTH((*RNF), x), Get_NAME((*RNF), x), coverage_cut?coverage_cut[x].s:0, coverage_cut?coverage_cut[x].e:(int)Get_READ_LENGTH((*RNF), x), x, @@ -10817,6 +10820,23 @@ ma_hit_t_alloc* sources, R_to_U* ruIndex, const char* prefix, FILE *fp) ma_ug_print2(ug, &R_INF, read_g, coverage_cut, sources, ruIndex, 1, prefix, fp); } +void prt_scaf_stats(sec_t *scp, ma_ug_t *ctg, const char* prefix, uint64_t id) +{ + uint64_t k, tl0, tl1; ma_utg_t *z = NULL; char name[32]; + for (k = tl0 = tl1 = 0; k < scp->n; k++) { + z = &(ctg->u.a[((uint32_t)scp->a[k])>>1]); tl0 += z->len; tl1 += (scp->a[k]>>32); + } + sprintf(name, "%s%.6lu%c", prefix, id + 1, "lc"[scp->is_c]); + + fprintf(stderr, "[M::%s] [M::%s] tot::%lu\tel::%lu\tnl::%lu\tis_c::%lu\t#::%lu\t%s\n", __func__, name, tl0+tl1, tl0, tl1, scp->is_c, (uint64_t)scp->n, ((scp->n>1)?"scf":"ctg")); + for (k = 0; k < scp->n; k++) { + z = &(ctg->u.a[((uint32_t)scp->a[k])>>1]); + fprintf(stderr, "%s%.6u%c(%c)\tlen::%u\tNs::%lu\n", + prefix, (((uint32_t)scp->a[k])>>1)+1, "lc"[z->circ], "+-"[(((uint32_t)scp->a[k])&1)], z->len, (scp->a[k]>>32)); + } + +} + void ma_scg_print(const kvect_sec_t *sc, All_reads *RNF, asg_t* read_g, const ma_sub_t *coverage_cut, ma_hit_t_alloc* sources, R_to_U* ruIndex, int print_seq, const char* prefix, FILE *fp) { uint8_t* primary_flag = read_g?(uint8_t*)calloc(read_g->n_seq, sizeof(uint8_t)):NULL; @@ -10824,6 +10844,9 @@ void ma_scg_print(const kvect_sec_t *sc, All_reads *RNF, asg_t* read_g, const ma char name[32]; uint64_t Rb, Cb, tRb, tCb, Cov; ma_utg_t *z = NULL; uint8_t c; for (i = 0; i < sc->n; ++i) { // the Segment lines in GFA scp = &(sc->a[i]); + ///debug + prt_scaf_stats(scp, sc->ctg, prefix, i); + for (j = tl = tRb = tCb = 0; j < scp->n; j++) { z = &(sc->ctg->u.a[((uint32_t)scp->a[j])>>1]); tl += z->len; tl += (scp->a[j]>>32); if(pc) { @@ -22760,31 +22783,6 @@ void output_read_graph(asg_t *sg, ma_sub_t* coverage_cut, char* output_file_name } -void read_ma(ma_hit_t* x, FILE* fp) -{ - int f_flag; - f_flag = fread(&(x->qns), sizeof(x->qns), 1, fp); - f_flag += fread(&(x->qe), sizeof(x->qe), 1, fp); - f_flag += fread(&(x->tn), sizeof(x->tn), 1, fp); - f_flag += fread(&(x->ts), sizeof(x->ts), 1, fp); - f_flag += fread(&(x->te), sizeof(x->te), 1, fp); - f_flag += fread(&(x->el), sizeof(x->el), 1, fp); - f_flag += fread(&(x->no_l_indel), sizeof(x->no_l_indel), 1, fp); - - uint32_t t; - f_flag += fread(&(t), sizeof(t), 1, fp); - x->ml = t; - - f_flag += fread(&(t), sizeof(t), 1, fp); - x->rev = t; - - f_flag += fread(&(t), sizeof(t), 1, fp); - x->bl = t; - - f_flag += fread(&(t), sizeof(t), 1, fp); - x->del = t; -} - int load_ma_hit_ts(ma_hit_t_alloc** x, char* read_file_name) { fprintf(stderr, "Loading ma_hit_ts from disk... \n"); diff --git a/Overlaps.h b/Overlaps.h index 7c5d46d..899bfe1 100644 --- a/Overlaps.h +++ b/Overlaps.h @@ -133,8 +133,8 @@ void add_ma_hit_t_alloc(ma_hit_t_alloc* x, ma_hit_t* element); void ma_hit_sort_tn(ma_hit_t *a, long long n); void ma_hit_sort_qns(ma_hit_t *a, long long n); -int load_all_data_from_disk(ma_hit_t_alloc **sources, ma_hit_t_alloc **reverse_sources, -char* output_file_name); +int load_all_data_from_disk(ma_hit_t_alloc **sources, ma_hit_t_alloc **reverse_sources, char* output_file_name); + typedef struct { uint32_t s:31, del:1, e; diff --git a/Process_Read.cpp b/Process_Read.cpp index 64f8933..8c17fa5 100644 --- a/Process_Read.cpp +++ b/Process_Read.cpp @@ -208,6 +208,157 @@ int load_All_reads(All_reads* r, char* read_file_name) return 1; } +void read_ma(ma_hit_t* x, FILE* fp) +{ + int f_flag; + f_flag = fread(&(x->qns), sizeof(x->qns), 1, fp); + f_flag += fread(&(x->qe), sizeof(x->qe), 1, fp); + f_flag += fread(&(x->tn), sizeof(x->tn), 1, fp); + f_flag += fread(&(x->ts), sizeof(x->ts), 1, fp); + f_flag += fread(&(x->te), sizeof(x->te), 1, fp); + f_flag += fread(&(x->el), sizeof(x->el), 1, fp); + f_flag += fread(&(x->no_l_indel), sizeof(x->no_l_indel), 1, fp); + + uint32_t t; + f_flag += fread(&(t), sizeof(t), 1, fp); + x->ml = t; + + f_flag += fread(&(t), sizeof(t), 1, fp); + x->rev = t; + + f_flag += fread(&(t), sizeof(t), 1, fp); + x->bl = t; + + f_flag += fread(&(t), sizeof(t), 1, fp); + x->del = t; +} + + +int append_All_reads(All_reads* r, char *idx, uint32_t id) +{ + char *gfa_name = NULL; MALLOC(gfa_name, (strlen(idx)+100)); + FILE *fp = NULL, *fpo = NULL; + + sprintf(gfa_name, "%s.ec%u.bin", idx, id); + fp = fopen(gfa_name, "r"); + if (!fp) {free(gfa_name); return 0;} + + sprintf(gfa_name, "%s.ovlp%u.source.bin", idx, id); + fpo = fopen(gfa_name, "r"); + if (!fpo) {free(gfa_name); fclose(fp); return 0;} + free(gfa_name); + + int local_adapterLen, f_flag; + f_flag = fread(&local_adapterLen, sizeof(local_adapterLen), 1, fp); + if(local_adapterLen != asm_opt.adapterLen) { + fprintf(stderr, "the adapterLen of index is: %d, but the adapterLen set by user is: %d\n", + local_adapterLen, asm_opt.adapterLen); + exit(1); + } + uint64_t index_size0, name_index_size0, total_reads0, total_reads_bases0, total_name_length0; + index_size0 = r->index_size; + total_reads0 = r->total_reads; + total_reads_bases0 = r->total_reads_bases; + total_name_length0 = r->total_name_length; + name_index_size0 = ((total_reads0)?(total_reads0+1):(0));///r->name_index_size; + + f_flag += fread(&r->index_size, sizeof(r->index_size), 1, fp); + f_flag += fread(&r->name_index_size, sizeof(r->name_index_size), 1, fp); + f_flag += fread(&r->total_reads, sizeof(r->total_reads), 1, fp); + f_flag += fread(&r->total_reads_bases, sizeof(r->total_reads_bases), 1, fp); + f_flag += fread(&r->total_name_length, sizeof(r->total_name_length), 1, fp); + + r->index_size += index_size0; + r->name_index_size += name_index_size0; if(name_index_size0) r->name_index_size--; + r->total_reads += total_reads0; + r->total_reads_bases += total_reads_bases0; + r->total_name_length += total_name_length0; + + + uint64_t i = 0, zero = 0, k; + REALLOC(r->N_site, r->total_reads); + + for (i = total_reads0; i < r->total_reads; i++) { + f_flag += fread(&zero, sizeof(zero), 1, fp); + + if (zero) { + r->N_site[i] = (uint64_t*)malloc(sizeof(uint64_t)*(zero + 1)); + r->N_site[i][0] = zero; + if (r->N_site[i][0]) { + f_flag += fread(r->N_site[i]+1, sizeof(r->N_site[i][0]), r->N_site[i][0], fp); + } + } else { + r->N_site[i] = NULL; + } + } + + REALLOC(r->read_length, r->total_reads); + f_flag += fread(r->read_length + total_reads0, sizeof(uint64_t), r->total_reads-total_reads0, fp); + + REALLOC(r->read_size, r->total_reads); + memcpy (r->read_size + total_reads0, r->read_length + total_reads0, sizeof(uint64_t)*(r->total_reads-total_reads0)); + + REALLOC(r->read_sperate, r->total_reads); + for (i = total_reads0; i < r->total_reads; i++) { + r->read_sperate[i] = (uint8_t*)malloc(sizeof(uint8_t)*(r->read_length[i]/4+1)); + f_flag += fread(r->read_sperate[i], sizeof(uint8_t), r->read_length[i]/4+1, fp); + } + + + REALLOC(r->name, r->total_name_length); + f_flag += fread(r->name + total_name_length0, sizeof(char), r->total_name_length - total_name_length0, fp); + + REALLOC(r->name_index, r->name_index_size); uint64_t sft = 0; + if(name_index_size0) sft = r->name_index[--name_index_size0]; + // fprintf(stderr, "name_index_size0::%lu, total_reads0::%lu, sft::%lu\n", name_index_size0, total_reads0, sft); + f_flag += fread(r->name_index + name_index_size0, sizeof(uint64_t), r->name_index_size-name_index_size0, fp); + if(sft) { + for (i = name_index_size0; i < r->name_index_size; i++) r->name_index[i] += sft; + } + + /****************************may have bugs********************************/ + REALLOC(r->trio_flag, r->total_reads); + f_flag += fread(r->trio_flag + total_reads0, sizeof(uint8_t), r->total_reads - total_reads0, fp); + + int hom_cov0 = asm_opt.hom_cov, het_cov0 = asm_opt.het_cov; + f_flag += fread(&(asm_opt.hom_cov), sizeof(asm_opt.hom_cov), 1, fp); asm_opt.hom_cov += hom_cov0; + f_flag += fread(&(asm_opt.het_cov), sizeof(asm_opt.het_cov), 1, fp); asm_opt.het_cov += het_cov0; + /****************************may have bugs********************************/ + + REALLOC(r->cigars, r->total_reads); + REALLOC(r->second_round_cigar, r->total_reads); + for (i = total_reads0; i < r->total_reads; i++) { + r->second_round_cigar[i].size = r->cigars[i].size = 0; + r->second_round_cigar[i].length = r->cigars[i].length = 0; + r->second_round_cigar[i].record = r->cigars[i].record = NULL; + + r->second_round_cigar[i].lost_base_size = r->cigars[i].lost_base_size = 0; + r->second_round_cigar[i].lost_base_length = r->cigars[i].lost_base_length = 0; + r->second_round_cigar[i].lost_base = r->cigars[i].lost_base = NULL; + } + ///r->pb_regions = NULL; + + REALLOC(r->paf, r->total_reads); + memset(r->paf+total_reads0, 0, sizeof((*(r->paf)))*(r->total_reads - total_reads0)); + long long n_read; f_flag = fread(&n_read, sizeof(n_read), 1, fpo); ma_hit_t t; + for (i = total_reads0; i < r->total_reads; i++) { + f_flag += fread(&(r->paf[i].is_fully_corrected), sizeof(r->paf[i].is_fully_corrected), 1, fpo); + f_flag += fread(&(r->paf[i].is_abnormal), sizeof(r->paf[i].is_abnormal), 1, fpo); + f_flag += fread(&(r->paf[i].length), sizeof(r->paf[i].length), 1, fpo); + + if(r->paf[i].length == 0) continue; + for (k = 0; k < r->paf[i].length; k++) read_ma(&t, fpo); + r->paf[i].length = 0; + } + + REALLOC(r->reverse_paf, r->total_reads); + memset(r->reverse_paf+total_reads0, 0, sizeof((*(r->reverse_paf)))*(r->total_reads - total_reads0)); + + fclose(fp); fclose(fpo); + fprintf(stderr, "Reads has been loaded.\n"); + return 1; +} + int destory_read_bin(All_reads* r) { diff --git a/Process_Read.h b/Process_Read.h index 2e1acbd..a0f3693 100644 --- a/Process_Read.h +++ b/Process_Read.h @@ -229,6 +229,7 @@ void destory_UC_Read(UC_Read* r); void reverse_complement(char* pattern, uint64_t length); void write_All_reads(All_reads* r, char* read_file_name); int load_All_reads(All_reads* r, char* read_file_name); +int append_All_reads(All_reads* r, char *idx, uint32_t id); void destory_All_reads(All_reads* r); int destory_read_bin(All_reads* r); void init_Debug_reads(Debug_reads* x, const char* file); @@ -249,5 +250,6 @@ void write_compress_base_disk(FILE *fp, uint64_t ul_rid, char *str, uint32_t len int64_t load_compress_base_disk(FILE *fp, uint64_t *ul_rid, char *dest, uint32_t *len, ul_vec_t *buf); scaf_res_t *init_scaf_res_t(uint32_t n); void destroy_scaf_res_t(scaf_res_t *p); +void read_ma(ma_hit_t* x, FILE* fp); #endif diff --git a/htab.cpp b/htab.cpp index e147153..306619a 100644 --- a/htab.cpp +++ b/htab.cpp @@ -1096,14 +1096,14 @@ void *ha_ft_ug_gen(const hifiasm_opt_t *asm_opt, ma_utg_v *us, int is_HPC, int k return (void*)flt_tab; } -void *ha_ft_gen(const hifiasm_opt_t *asm_opt, All_reads *rs, int *hom_cov, int is_hp_mode) +void *ha_ft_gen(const hifiasm_opt_t *asm_opt, All_reads *rs, int *hom_cov, int is_hp_mode, int read_from_store) { yak_ft_t *flt_tab; int64_t cnt[YAK_N_COUNTS]; int peak_hom, peak_het, cutoff = YAK_MAX_COUNT - 1, ex_flag = 0; if(is_hp_mode) ex_flag = HAF_RS_READ|HAF_SKIP_READ; - ha_ct_t *h; - h = ha_count(asm_opt, HAF_COUNT_ALL|HAF_RS_WRITE_LEN|ex_flag, !(asm_opt->flag&HA_F_NO_HPC), asm_opt->k_mer_length, asm_opt->mz_win, NULL, NULL, rs, NULL, 1, NULL, 0); + ha_ct_t *h; + h = ha_count(asm_opt, HAF_COUNT_ALL|ex_flag|((read_from_store)?(HAF_RS_READ):(HAF_RS_WRITE_LEN)), !(asm_opt->flag&HA_F_NO_HPC), asm_opt->k_mer_length, asm_opt->mz_win, NULL, NULL, rs, NULL, 1, NULL, 0); if((asm_opt->flag & HA_F_VERBOSE_GFA)) { write_ct_index((void*)h, asm_opt->output_file_name); diff --git a/htab.h b/htab.h index afe44b7..b501690 100644 --- a/htab.h +++ b/htab.h @@ -74,7 +74,7 @@ extern void *ha_ct_table; void *ha_ft_ul_gen(const hifiasm_opt_t *asm_opt, ma_utg_v *us, int k, int w, int cutoff); void *ha_ft_ug_gen(const hifiasm_opt_t *asm_opt, ma_utg_v *us, int is_HPC, int k, int w, int min_freq, int max_freq); -void *ha_ft_gen(const hifiasm_opt_t *asm_opt, All_reads *rs, int *hom_cov, int is_hp_mode); +void *ha_ft_gen(const hifiasm_opt_t *asm_opt, All_reads *rs, int *hom_cov, int is_hp_mode, int read_from_store); int32_t ha_ft_cnt(const void *hh, uint64_t y); void ha_ft_destroy(void *h); diff --git a/inter.cpp b/inter.cpp index 8ef5250..93d13d0 100644 --- a/inter.cpp +++ b/inter.cpp @@ -18178,7 +18178,9 @@ const ug_opt_t *uopt, const asg_t *rg, R_to_U *ri, int64_t bw, double diff_ec_ul { uint64_t dn = dump->n, i, mqs, mqe; int64_t iqs, iqe, its, ite; idx->n = 0; collect_nc_ovlps(rch, rch_i, idx, uopt, rg, ri, ulid, &mqs, &mqe); - assert(idx->n > 1); assert((mqs != ((uint64_t)-1)) && (mqe != ((uint64_t)-1)) && (mqe > mqs)); + // assert(idx->n > 1); assert((mqs != ((uint64_t)-1)) && (mqe != ((uint64_t)-1)) && (mqe > mqs)); + if((idx->n <= 1) || (mqs == ((uint64_t)-1)) || (mqe == ((uint64_t)-1)) || (mqe <= mqs)) return; + gen_linear_rchains(idx, dump, rg, uopt, bw, diff_ec_ul, rch->rlen, dp, ulid); assert(idx->n); idx->n = gen_cns_chain_linear(idx->a, idx->n, /**dump->a+dn,**/ rg, ri, rch->rlen, bw, diff_ec_ul, dp, UG_SKIP_N, UG_ITER_N, UG_DIS_N, mqs, mqe, ulid); diff --git a/main.cpp b/main.cpp index c4a0816..72acca3 100644 --- a/main.cpp +++ b/main.cpp @@ -61,8 +61,9 @@ int main(int argc, char *argv[]) // ed_band_cal_global((char*)"ACTTTTTT", 8, (char*)"AATTTT", 6, 3), // ed_band_cal_global_128bit((char*)"ACTTTTTT", 8, (char*)"AATTTT", 6, 3)); // exit(1); - if(!(asm_opt.dbg_ovec_cal)) ret = ha_assemble(); - else ret = ha_assemble_ovec(); + if(asm_opt.sec_in) ret = ha_assemble_pair(); + else if(asm_opt.dbg_ovec_cal) ret = ha_assemble_ovec(); + else ret = ha_assemble(); destory_opt(&asm_opt); fprintf(stderr, "[M::%s] Version: %s\n", __func__, HA_VERSION);