Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid potentially large stack allocation, clean up int usage #282

Merged
merged 1 commit into from
Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ CFLAGS = -fpic -std=c99 -O3 -I ${PREFIX}/include -L ${PREFIX}/lib
CPPFLAGS = -std=c++11 -Wall -O3 -I ${PREFIX}/include -L ${PREFIX}/lib -Wl,-rpath=${PREFIX}/lib
LP_CPPFLAGS = -std=c++11 -Wall -g -O3 -I ${PREFIX}/include -L ${PREFIX}/lib -Wl,-rpath=${PREFIX}/lib



samtools-$(SAMVER)/Makefile:
curl -L -o samtools-${SAMVER}.tar.bz2 https://github.com/samtools/samtools/releases/download/${SAMVER}/samtools-${SAMVER}.tar.bz2; \
tar -xjf samtools-${SAMVER}.tar.bz2; \
Expand All @@ -27,7 +29,6 @@ libhts.a: samtools-$(SAMVER)/Makefile
cd samtools-${SAMVER}/htslib-${SAMVER}; CFLAGS="${CFLAGS}" LDFLAGS="${LDFLAGS}" ./configure; make CFLAGS="${CFLAGS}" LDFLAGS="${LDFLAGS}"
cp samtools-${SAMVER}/htslib-${SAMVER}/$@ $@


longphase-$(LPVER)/Makefile:
curl -L -o longphase-${LPVER}.tar.gz https://github.com/twolinin/longphase/archive/refs/tags/v${LPVER}.tar.gz; \
tar -zxvf longphase-${LPVER}.tar.gz; \
Expand All @@ -39,7 +40,7 @@ longphase: longphase-$(LPVER)/Makefile
cp longphase-${LPVER}/$@ $@


libclair3.so: samtools-${SAMVER}/htslib-${SAMVER}
libclair3.so: samtools-${SAMVER}/htslib-${SAMVER} libhts.a
${PYTHON} build.py


Expand Down
42 changes: 23 additions & 19 deletions src/clair3_full_alignment.c
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ int realign_read(Variant *variant, Read *read, size_t i, size_t consumed, size_t
uint32_t *cigartuples = read->cigartuples;
uint8_t *seqi = read->seqi;
size_t n_cigar = read->n_cigar;
size_t middle_op = bam_cigar_op(cigartuples[i]);
//size_t middle_op = bam_cigar_op(cigartuples[i]); // unused
size_t middle_length = bam_cigar_oplen(cigartuples[i]);
size_t left_consumed = consumed > 0 ? consumed : 0;
size_t right_consumed = consumed < middle_length ? middle_length - consumed : 0;
Expand Down Expand Up @@ -258,15 +258,15 @@ int haplotag_read(Variants_info *variants_info, Read *read, char *ref_seq, size_
size_t query_pos = 0;
size_t v_position = 0;
Variant **variants = variants_info->variants;
uint8_t *seqi = read->seqi;
//uint8_t *seqi = read->seqi; // unused
uint32_t *cigartuples = read->cigartuples;
size_t n_cigar = read->n_cigar;
size_t j = variants_info->variant_current_pos;
size_t ref_pos = read->read_start;
khash_t(KH_INT_COUNTER) *haplotype_cost = kh_init(KH_INT_COUNTER);
int allele = 0;

while (j < n && variants[j]->position < ref_pos)
while (j < n && (size_t)variants[j]->position < ref_pos)
j += 1;

for (size_t i = 0; i < n_cigar; i++)
Expand Down Expand Up @@ -456,7 +456,8 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length)
}
}

size_t flanking_candidates[flanking_candidates_num];
size_t *flanking_candidates = malloc(flanking_candidates_num * sizeof(size_t));

for (khiter_t k = kh_begin(flanking_candidates_p); k != kh_end(flanking_candidates_p); ++k)
{
if (kh_exist(flanking_candidates_p, k))
Expand Down Expand Up @@ -517,13 +518,13 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length)
variant_current_pos++;
variants_info.variant_current_pos = variant_current_pos;

while (candidate_current_index < flanking_candidates_num && flanking_candidates[candidate_current_index] < pos)
while (candidate_current_index < flanking_candidates_num && flanking_candidates[candidate_current_index] < (size_t)pos)
candidate_current_index++;

read.read_end = get_read_end(cigartuples, n_cigar, read.read_start);

// get the overlap candidates number and skip the alignment if no flanking candidate overlapped
size_t overlap_candidates_num = get_overlap_candidate_num(pos, read.read_end, candidate_current_index, flanking_candidates_num, &flanking_candidates);
size_t overlap_candidates_num = get_overlap_candidate_num(pos, read.read_end, candidate_current_index, flanking_candidates_num, flanking_candidates);
read.overlap_candidates_num = overlap_candidates_num;
if (read.overlap_candidates_num == 0)
{
Expand Down Expand Up @@ -651,14 +652,14 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length)
}

// allocate memory of the input matrix of all candidates
int8_t *matrix = calloc(candidate_num * matrix_depth * no_of_positions * channel_size, sizeof(int8_t));

HAP read_hap_array[reads_num];
int matrix_read_index_array[matrix_depth];
Alt_info *alt_info = malloc(matrix_depth * sizeof(Alt_info));
HAP *read_hap_array = malloc(reads_num * sizeof(HAP));
int *matrix_read_index_array = malloc(matrix_depth * sizeof(int));

char **alt_info_p = calloc(candidate_num, sizeof(char*));
// allocate output
fa_data data = calloc(1, sizeof(_fa_data));
char **alt_info_p = calloc(candidate_num, sizeof(char*));
int8_t *matrix = calloc(candidate_num * matrix_depth * no_of_positions * channel_size, sizeof(int8_t));

// loop each candiate and generate full-alignment input matrix
for (size_t i = 0; i < candidate_num; i++)
Expand Down Expand Up @@ -690,7 +691,7 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length)
read_hap_array[overlap_read_num++].haplotype = read.haplotype;
}

sort_read_name_by_haplotype(&read_hap_array, &matrix_read_index_array, matrix_depth, overlap_read_num);
sort_read_name_by_haplotype(read_hap_array, matrix_read_index_array, matrix_depth, overlap_read_num);

// loop each overlapped read of a candidate
for (size_t d = 0; d < matrix_depth; d++)
Expand Down Expand Up @@ -718,7 +719,7 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length)
continue;
}

if (offset < 0 || offset >= read.overlap_candidates_num)
if (offset < 0 || (size_t)offset >= read.overlap_candidates_num)
continue;

int8_t alt_v = 0;
Expand All @@ -728,7 +729,7 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length)

if (is_center_pos)
candidate_depth++;
size_t alt_int = read.pos_info[offset].alt_base;
//size_t alt_int = read.pos_info[offset].alt_base; // unused
char alt_base = seq_nt16_str[read.pos_info[offset].alt_base];
if (read.pos_info[offset].ins_length > 0)
{
Expand Down Expand Up @@ -825,17 +826,17 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length)

int ref_count = pos_alt_info[i].acgt_count[acgt2num[center_ref_base - 'A']];

sprintf(alt_info_str, "%i-%i-%c-", candidate + 1, candidate_depth, center_ref_base);
for (size_t j = 0; j < 4; j++)
sprintf(alt_info_str, "%zu-%zu-%c-", candidate + 1, candidate_depth, center_ref_base);
for (int8_t j = 0; j < 4; j++)
{
if (j != acgt2num[center_ref_base - 'A'] && pos_alt_info[i].acgt_count[j] > 0)
sprintf(alt_info_str + strlen(alt_info_str), "X%c %i ", ACGT[j], pos_alt_info[i].acgt_count[j]);
sprintf(alt_info_str + strlen(alt_info_str), "X%c %zu ", ACGT[j], pos_alt_info[i].acgt_count[j]);
}
for (khiter_t k = kh_begin(pos_alt_info[i].ins_counter); k != kh_end(pos_alt_info[i].ins_counter); k++)
{
if (kh_exist(pos_alt_info[i].ins_counter, k))
{
char *key = kh_key(pos_alt_info[i].ins_counter, k);
const char *key = kh_key(pos_alt_info[i].ins_counter, k);
int val = kh_val(pos_alt_info[i].ins_counter, k);
ref_count -= val;
if (strlen(key) <= max_indel_length)
Expand All @@ -858,7 +859,7 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length)
int key = kh_key(pos_alt_info[i].del_counter, k);
int val = kh_val(pos_alt_info[i].del_counter, k);
ref_count -= val;
if (key <= max_indel_length)
if (key <= (int)max_indel_length) // don't set max_indel_length to >2^32
{
if (strlen(alt_info_str) + key + 32 >= max_alt_length)
{
Expand Down Expand Up @@ -904,6 +905,9 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length)
free(chr);
free(pos_alt_info);
free(alt_info);
free(read_hap_array);
free(matrix_read_index_array);
free(flanking_candidates);
kh_counter_destroy(read_name_set);
kh_int_counter_destroy(candidates_p);
kh_int_counter_destroy(flanking_candidates_p);
Expand Down
2 changes: 1 addition & 1 deletion src/clair3_full_alignment.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ static const int8_t HAP_TYPE[3] = {60, 30, 90};
#define normalize_hap(x) (HAP_TYPE[x])

static const size_t overhang = 10;
static const char *RN = "\0";
//static const char *RN = "\0"; // unused
static const size_t min_haplotag_mq = 20;
static const size_t expand_reference_region = 2000000;
static const size_t flanking_base_num = 16;
Expand Down
18 changes: 9 additions & 9 deletions src/clair3_pileup.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,8 +212,8 @@ plp_data calculate_clair3_pileup(const char *region, const bam_fset* bam_set, co
size_t ins_count = 0;

bool pass_af = false;
bool pass_snp_af = false;
bool pass_indel_af = false;
//bool pass_snp_af = false; // ununsed
//bool pass_indel_af = false; // unused

const char *c_name = data->hdr->target_name[tid];
if (strcmp(c_name, chr) != 0) continue;
Expand All @@ -222,7 +222,7 @@ plp_data calculate_clair3_pileup(const char *region, const bam_fset* bam_set, co
n_cols++;


if (pre_pos + 1 != pos || pre_pos == 0)
if (pre_pos + 1 != (size_t)pos || pre_pos == 0)
contiguous_flanking_num = 0;
else
contiguous_flanking_num++;
Expand Down Expand Up @@ -355,7 +355,7 @@ plp_data calculate_clair3_pileup(const char *region, const bam_fset* bam_set, co
for (size_t i = 0; i < 4; i++) {
forward_sum += pileup->matrix[major_col + i];
reverse_sum += pileup->matrix[major_col + i + reverse_pos_start];
if (i == ref_offset_forward) {
if (i == (size_t)ref_offset_forward) {
ref_count = pileup->matrix[major_col + i] + pileup->matrix[major_col + i + reverse_pos_start];
} else {
size_t current_count = pileup->matrix[major_col + i] + pileup->matrix[major_col + i + reverse_pos_start];
Expand Down Expand Up @@ -397,15 +397,15 @@ plp_data calculate_clair3_pileup(const char *region, const bam_fset* bam_set, co
size_t max_alt_length = 64;
char *alt_info_str = xalloc(max_alt_length, sizeof(char), "alt_info_str");

sprintf(alt_info_str, "%i-%i-%c-", pos+1, depth, ref_base);
sprintf(alt_info_str, "%i-%zu-%c-", pos+1, depth, ref_base);
//snp
for (size_t i = 0; i < 4; i++) {
forward_sum += pileup->matrix[major_col + i];
reverse_sum += pileup->matrix[major_col + i + reverse_pos_start];
size_t alt_sum = pileup->matrix[major_col + i] + pileup->matrix[major_col + i + reverse_pos_start];

if (alt_sum > 0 && i != ref_offset_forward)
sprintf(alt_info_str + strlen(alt_info_str), "X%c %i ", plp_bases_clair3[i], alt_sum);
if (alt_sum > 0 && i != (size_t)ref_offset_forward)
sprintf(alt_info_str + strlen(alt_info_str), "X%c %zu ", plp_bases_clair3[i], alt_sum);
}
//del
for (size_t i = 0; i < del_buf_size; i++) {
Expand All @@ -418,7 +418,7 @@ plp_data calculate_clair3_pileup(const char *region, const bam_fset* bam_set, co
max_alt_length = max_alt_length << 1;
alt_info_str = xrealloc(alt_info_str, max_alt_length*sizeof(char), "alt_info_str");
}
sprintf(alt_info_str + strlen(alt_info_str), "D%.*s %i ", i+1,ref_seq+offset+1, d);
sprintf(alt_info_str + strlen(alt_info_str), "D%.*s %zu ", (int)i+1, ref_seq+offset+1, d);
}

}
Expand All @@ -434,7 +434,7 @@ plp_data calculate_clair3_pileup(const char *region, const bam_fset* bam_set, co
max_alt_length = max_alt_length << 1;
alt_info_str = xrealloc(alt_info_str, max_alt_length *sizeof(char), "alt_info_str");
}
sprintf(alt_info_str + strlen(alt_info_str), "I%c%s %i ", ref_base, key, val);
sprintf(alt_info_str + strlen(alt_info_str), "I%c%s %zu ", ref_base, key, val);
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion src/medaka_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,10 @@ char *substring(char *string, int position, int length) {
char *ptr;
size_t i;

if(length < 0) return NULL;
ptr = malloc(length + 1);

for (i = 0 ; i < length ; i++) {
for (i = 0 ; i < (size_t) length ; i++) {
*(ptr + i) = *(string + position);
string++;
}
Expand Down
4 changes: 2 additions & 2 deletions src/medaka_khcounter.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ void kh_counter_print(khash_t(KH_COUNTER) *hash) {
printf("%s -> %i\n", key, val);
}
}
kh_counter_stats_t stats = kh_counter_stats(hash);
// printf("max: %i, sum: %i\n", stats.max, stats.sum);
//kh_counter_stats_t stats = kh_counter_stats(hash);
//printf("max: %i, sum: %i\n", stats.max, stats.sum);
}


Expand Down