Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
b0a1230
Implemented LCS
AndreaCampagner Dec 3, 2014
1e25d6d
Implemented LCS
AndreaCampagner Dec 3, 2014
adc4b97
Implemented LCS by adding a new scoring system
AndreaCampagner Dec 3, 2014
7d6be3c
Added options for gaps only at ends but not code, yet
AndreaCampagner Jan 9, 2015
6491c20
Options for gaps only at ends added, code not yet
AndreaCampagner Jan 9, 2015
ed7c4e8
Implemented gapsonlyatends and corrected error in other options
AndreaCampagner Jan 15, 2015
bcaa89c
Corrected bug in nogaps
AndreaCampagner Jan 22, 2015
ed3d4a0
Trying compilation
AndreaCampagner Jan 22, 2015
aef9371
Trying compilation
AndreaCampagner Jan 22, 2015
6368fe0
Adding tests
AndreaCampagner Jan 22, 2015
8523852
Writing tests
AndreaCampagner Jan 31, 2015
857d3d5
Writing tests
AndreaCampagner Jan 31, 2015
8215c84
Adding tests
AndreaCampagner Feb 3, 2015
74b7113
Corrected bug related to combined use of --nogaps, --freestartgap and…
AndreaCampagner Feb 3, 2015
3c89e3c
Deleted object files
AndreaCampagner Feb 3, 2015
0965513
Merge pull request #2 from AndreaCampagner/gapsends
AndreaCampagner Feb 3, 2015
e377cda
Removing objects and executables
AndreaCampagner Feb 3, 2015
32490fc
prova
AndreaCampagner Feb 5, 2015
a1c52f8
Corrected various bugs in original implementation and added new tests
AndreaCampagner Feb 5, 2015
e6f0cba
Corrected various bugs in original implementation and added new tests
AndreaCampagner Feb 5, 2015
e39e9f1
Added README
AndreaCampagner Feb 5, 2015
6143723
Added README
AndreaCampagner Feb 5, 2015
016cc9c
Corrected potential bug
AndreaCampagner Feb 5, 2015
a112873
Changed naming scheme for variables related to gapsonlyatends options…
AndreaCampagner Feb 6, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ REQ=$(LIBS_PATH)/bit_array/Makefile $(LIBS_PATH)/string_buffer/Makefile $(LIBS_P
SRCS=$(wildcard src/*.c)
OBJS=$(SRCS:.c=.o)

all: bin/needleman_wunsch bin/smith_waterman src/libalign.a examples
all: bin/needleman_wunsch bin/smith_waterman src/libalign.a examples tests

src/libalign.a: $(OBJS)
ar -csru src/libalign.a $(OBJS)
Expand All @@ -55,9 +55,13 @@ bin/smith_waterman: tools/sw_cmdline.c src/libalign.a

examples: src/libalign.a
cd examples; make LIBS_PATH=$(abspath $(LIBS_PATH))

tests: src/libalign.a
cd tests; make LIBS_PATH=$(abspath $(LIBS_PATH))

clean:
rm -rf bin src/*.o src/libalign.a
cd examples; make clean
cd tests; make clean

.PHONY: all clean examples
.PHONY: all clean examples tests
8 changes: 6 additions & 2 deletions examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,19 @@ LIBS=-L $(LIBS_PATH)/bit_array -L $(LIBS_PATH)/string_buffer -L ../src

LINK=-lalign -lstrbuf -lbitarr -lpthread -lz

all: nw_example sw_example
all: nw_example sw_example lcs_example

sw_example: sw_example.c ../src/libalign.a
$(CC) sw_example.c -o sw_example $(CFLAGS) $(INCS) $(LIBS) $(LINK)

nw_example: nw_example.c ../src/libalign.a
$(CC) nw_example.c -o nw_example $(CFLAGS) $(INCS) $(LIBS) $(LINK)

lcs_example: lcs_example.c ../src/libalign.a
$(CC) lcs_example.c -o lcs_example $(CFLAGS) $(INCS) $(LIBS) $(LINK)


clean:
rm -rf nw_example sw_example *.greg *.dSYM
rm -rf nw_example sw_example lcs_example *.greg *.dSYM

.PHONY: all clean
36 changes: 36 additions & 0 deletions examples/lcs_example.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include "needleman_wunsch.h"
#include "print_lcs.h"

void align(char* seq_a, char* seq_b)
{
// Variables to store alignment result
nw_aligner_t *nw = needleman_wunsch_new();
alignment_t *result = alignment_create(256);

scoring_t scoring;
scoring_system_lcs(&scoring);

needleman_wunsch_align(seq_a, seq_b, &scoring, nw, result);

print_lcs(result);

// Free memory for storing alignment results
needleman_wunsch_free(nw);
alignment_free(result);
}

int main(int argc, char* argv[])
{
if(argc != 3)
{
printf("usage: ./nw_example <seq1> <seq2>\n");
exit(EXIT_FAILURE);
}

align(argv[1], argv[2]);
exit(EXIT_SUCCESS);
}
2 changes: 1 addition & 1 deletion examples/nw_example.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ void align(char* seq_a, char* seq_b)
scoring_t scoring;
scoring_init(&scoring, match, mismatch, gap_open, gap_extend,
no_start_gap_penalty, no_end_gap_penalty,
no_gaps_in_a, no_gaps_in_b, no_mismatches, case_sensitive);
no_gaps_in_a, no_gaps_in_b, 0, 0, no_mismatches, case_sensitive);

// Add some special cases
// x -> y means x in seq1 changing to y in seq2
Expand Down
2 changes: 1 addition & 1 deletion examples/sw_example.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ void align(char* seq_a, char* seq_b)
scoring_t scoring;
scoring_init(&scoring, match, mismatch, gap_open, gap_extend,
no_start_gap_penalty, no_end_gap_penalty,
no_gaps_in_a, no_gaps_in_b, no_mismatches, case_sensitive);
no_gaps_in_a, no_gaps_in_b, 0, 0, no_mismatches, case_sensitive);

// Add some special cases
// x -> y means x in seq1 changing to y in seq2
Expand Down
1 change: 0 additions & 1 deletion libs/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

all: bit_array string_buffer seq_file
cd bit_array && git pull && make
cd string_buffer && git pull && make
Expand Down
32 changes: 17 additions & 15 deletions src/alignment.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ static void alignment_fill_matrices(aligner_t *aligner, char is_sw)
size_t seq_i, seq_j, len_i = score_width-1, len_j = score_height-1;
size_t index, index_left, index_up, index_upleft;

if(scoring->no_gaps_in_a) {
if(scoring->no_gaps_in_a || scoring->gaps_only_at_ends_in_a) {
for(i = 0; i < arr_size; i++) gap_a_scores[i] = min;
}

if(scoring->no_gaps_in_b) {
if(scoring->no_gaps_in_b || scoring->gaps_only_at_ends_in_b) {
for(i = 0; i < arr_size; i++) gap_b_scores[i] = min;
}

Expand All @@ -69,8 +69,9 @@ static void alignment_fill_matrices(aligner_t *aligner, char is_sw)

// Think carefully about which way round these are
gap_a_scores[i] = min;
gap_b_scores[i] = scoring->no_start_gap_penalty ? 0
: scoring->gap_open + (long)i * scoring->gap_extend;
if((len_j < len_i && scoring->no_gaps_in_b) || !scoring->no_gaps_in_b)
gap_b_scores[i] = (score_t)(scoring->no_start_gap_penalty ? 0
: scoring->gap_open + (long)i * scoring->gap_extend);
}

// work down first column -> [0][j]
Expand All @@ -79,8 +80,9 @@ static void alignment_fill_matrices(aligner_t *aligner, char is_sw)
match_scores[index] = min;

// Think carefully about which way round these are
gap_a_scores[index]
= (score_t)(scoring->no_start_gap_penalty ? 0
if((len_i < len_j && scoring->no_gaps_in_a) || !scoring->no_gaps_in_a)
gap_a_scores[index]
= (score_t)(scoring->no_start_gap_penalty ? 0
: scoring->gap_open + (long)j * scoring->gap_extend);
gap_b_scores[index] = min;
}
Expand Down Expand Up @@ -130,7 +132,7 @@ static void alignment_fill_matrices(aligner_t *aligner, char is_sw)
// Long arithmetic since some INTs are set to min and penalty is -ve
// (adding as ints would cause an integer overflow)

if(!scoring->no_gaps_in_a)
if(!scoring->no_gaps_in_a && !scoring->gaps_only_at_ends_in_a)
{
// Update gap_a_scores[i][j] from position [i][j-1]

Expand All @@ -150,7 +152,7 @@ static void alignment_fill_matrices(aligner_t *aligner, char is_sw)
}
}

if(!scoring->no_gaps_in_b)
if(!scoring->no_gaps_in_b && !scoring->gaps_only_at_ends_in_b)
{
// Update gap_b_scores[i][j] from position [i-1][j]

Expand Down Expand Up @@ -182,7 +184,7 @@ static void alignment_fill_matrices(aligner_t *aligner, char is_sw)
index_upleft++;
}

if(scoring->no_gaps_in_a)
if((scoring->no_gaps_in_a && len_i < len_j) || scoring->gaps_only_at_ends_in_a)
{
// Allow gaps only at the start/end of A
// Fill right hand column of traceback matrix
Expand All @@ -192,16 +194,16 @@ static void alignment_fill_matrices(aligner_t *aligner, char is_sw)
while(index < arr_size)
{
gap_a_scores[index]
= MAX3(match_scores[index_up] + gap_open_penalty,
gap_a_scores[index_up] + gap_extend_penalty,
= MAX3(match_scores[index_up] + ((scoring->no_end_gap_penalty) ? 0 : gap_open_penalty),
gap_a_scores[index_up] + ((scoring->no_end_gap_penalty) ? 0 : gap_extend_penalty),
min);

index_up = index;
index += score_width;
}
}

if(scoring->no_gaps_in_b)
if((scoring->no_gaps_in_b && len_j < len_i) || scoring->gaps_only_at_ends_in_b)
{
// Allow gaps only at the start/end of B
// Fill bottom row of traceback matrix
Expand All @@ -211,8 +213,8 @@ static void alignment_fill_matrices(aligner_t *aligner, char is_sw)
while(index < arr_size)
{
gap_b_scores[index]
= MAX3(match_scores[index_left] + gap_open_penalty,
gap_b_scores[index_left] + gap_extend_penalty,
= MAX3(match_scores[index_left] + ((scoring->no_end_gap_penalty) ? 0 : gap_open_penalty),
gap_b_scores[index_left] + ((scoring->no_end_gap_penalty) ? 0 : gap_extend_penalty),
min);

index_left++;
Expand Down Expand Up @@ -380,7 +382,7 @@ void alignment_reverse_move(enum Matrix *curr_matrix, score_t *curr_score,
*curr_matrix = GAP_B;
*curr_score = aligner->gap_b_scores[*arr_index];
}
else if((!scoring->no_mismatches || is_match) &&
else if((!scoring->no_mismatches || is_match) || scoring->no_mismatches &&
(long)aligner->match_scores[*arr_index] + prev_match_penalty == *curr_score)
{
*curr_matrix = MATCH;
Expand Down
28 changes: 27 additions & 1 deletion src/alignment_cmdline.c
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,22 @@ cmdline_t* cmdline_new(int argc, char **argv, scoring_t *scoring, char is_sw)
if(is_sw) usage("--freeendgap only valid with Needleman-Wunsch");
scoring->no_end_gap_penalty = 1;
}
else if(strcasecmp(argv[argi], "--gapsonlyatendsin1") == 0)
{
if(is_sw) usage("--gapsonlyatends only valid with Needleman-Wunsch");
scoring->gaps_only_at_ends_in_a = 1;
}
else if(strcasecmp(argv[argi], "--gapsonlyatendsin2") == 0)
{
if(is_sw) usage("--gapsonlyatends only valid with Needleman-Wunsch");
scoring->gaps_only_at_ends_in_b = 1;
}
else if(strcasecmp(argv[argi], "--gapsonlyatends") == 0)
{
if(is_sw) usage("--gapsonlyatends only valid with Needleman-Wunsch");
scoring->gaps_only_at_ends_in_a = 1;
scoring->gaps_only_at_ends_in_b = 1;
}
else if(strcasecmp(argv[argi], "--nogaps") == 0)
{
scoring->no_gaps_in_a = 1;
Expand Down Expand Up @@ -483,7 +499,17 @@ cmdline_t* cmdline_new(int argc, char **argv, scoring_t *scoring, char is_sw)
{
usage("--nogaps.. --nomismatches cannot be used at together");
}


if((scoring->gaps_only_at_ends_in_a || scoring->gaps_only_at_ends_in_b) && scoring->no_mismatches)
{
usage("--gapsonlyatends.. --nomismatches cannot be used at together");
}

if((scoring->gaps_only_at_ends_in_a && scoring->no_gaps_in_a) ||
(scoring->gaps_only_at_ends_in_b && scoring->no_gaps_in_b))
{
usage("--gapsonlyatends.. --nogaps cannot be used at together");
}
// Check for extra unused arguments
// and set seq1 and seq2 if they have been passed
if(argi < argc)
Expand Down
23 changes: 16 additions & 7 deletions src/alignment_scoring.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
void scoring_init(scoring_t* scoring, int match, int mismatch,
int gap_open, int gap_extend,
char no_start_gap_penalty, char no_end_gap_penalty,
char no_gaps_in_a, char no_gaps_in_b,
char no_gaps_in_a, char no_gaps_in_b, char gaps_only_at_ends_in_a, char gaps_only_at_ends_in_b,
char no_mismatches, char case_sensitive)
{
// Gap of length 1 has penalty (gap_open+gap_extend)
Expand All @@ -43,6 +43,9 @@ void scoring_init(scoring_t* scoring, int match, int mismatch,
scoring->no_gaps_in_a = no_gaps_in_a;
scoring->no_gaps_in_b = no_gaps_in_b;
scoring->no_mismatches = no_mismatches;

scoring->gaps_only_at_ends_in_a = gaps_only_at_ends_in_a;
scoring->gaps_only_at_ends_in_b = gaps_only_at_ends_in_b;

scoring->use_match_mismatch = 1;
scoring->match = match;
Expand Down Expand Up @@ -311,7 +314,7 @@ void scoring_system_PAM30(scoring_t* scoring)
// Gap open -9, gap extend -1
// no_start_gap_penalty, no_end_gap_penalty = 0
// case_sensitive = 0
scoring_init(scoring, 1, -17, -9, -1, 0, 0, 0, 0, 0, 0);
scoring_init(scoring, 1, -17, -9, -1, 0, 0, 0, 0, 0, 0, 0, 0);

// use_match_mismatch=1
scoring_add_mutations(scoring, amino_acids, pam30, 1);
Expand All @@ -325,7 +328,7 @@ void scoring_system_PAM70(scoring_t* scoring)
// Gap open -10, gap extend -1
// no_start_gap_penalty, no_end_gap_penalty = 0
// case_sensitive = 0
scoring_init(scoring, 1, -11, -10, -1, 0, 0, 0, 0, 0, 0);
scoring_init(scoring, 1, -11, -10, -1, 0, 0, 0, 0, 0, 0, 0, 0);

// use_match_mismatch=1
scoring_add_mutations(scoring, amino_acids, pam70, 1);
Expand All @@ -339,7 +342,7 @@ void scoring_system_BLOSUM80(scoring_t* scoring)
// Gap open -10, gap extend -1
// no_start_gap_penalty, no_end_gap_penalty = 0
// case_sensitive = 0
scoring_init(scoring, 1, -8, -10, -1, 0, 0, 0, 0, 0, 0);
scoring_init(scoring, 1, -8, -10, -1, 0, 0, 0, 0, 0, 0, 0, 0);

// use_match_mismatch=1
scoring_add_mutations(scoring, amino_acids, blosum80, 1);
Expand All @@ -353,7 +356,7 @@ void scoring_system_BLOSUM62(scoring_t* scoring)
// Gap open -10, gap extend -1
// no_start_gap_penalty, no_end_gap_penalty = 0
// case_sensitive = 0
scoring_init(scoring, 1, -4, -10, -1, 0, 0, 0, 0, 0, 0);
scoring_init(scoring, 1, -4, -10, -1, 0, 0, 0, 0, 0, 0, 0, 0);

// use_match_mismatch=1
scoring_add_mutations(scoring, amino_acids, blosum62, 1);
Expand All @@ -370,7 +373,7 @@ void scoring_system_DNA_hybridization(scoring_t* scoring)
// Gap open -10, gap extend -10
// no_start_gap_penalty, no_end_gap_penalty = 0
// case_sensitive = 0
scoring_init(scoring, 0, 0, -10, -10, 0, 0, 0, 0, 0, 0);
scoring_init(scoring, 0, 0, -10, -10, 0, 0, 0, 0, 0, 0, 0, 0);

// use_match_mismatch=0
scoring_add_mutations(scoring, dna_bases, sub_matrix, 0);
Expand All @@ -388,5 +391,11 @@ void scoring_system_default(scoring_t* scoring)
// case_sensitive = 0
scoring_init(scoring, match_default, mismatch_default,
gap_open_default, gap_extend_default,
0, 0, 0, 0, 0, 0);
0, 0, 0, 0, 0, 0, 0, 0);
}

//LCS
void scoring_system_lcs(scoring_t* scoring)
{
scoring_init(scoring, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0);
}
5 changes: 4 additions & 1 deletion src/alignment_scoring.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ typedef struct

// Turn at most one of these on at a time to prevent gaps/mismatches
char no_gaps_in_a, no_gaps_in_b, no_mismatches;

char gaps_only_at_ends_in_a, gaps_only_at_ends_in_b;

// If swap_score not set, should we use match/mismatch values?
char use_match_mismatch;
Expand All @@ -37,7 +39,7 @@ typedef struct
void scoring_init(scoring_t* scoring, int match, int mismatch,
int gap_open, int gap_extend,
char no_start_gap_penalty, char no_end_gap_penalty,
char no_gaps_in_a, char no_gaps_in_b,
char no_gaps_in_a, char no_gaps_in_b, char gaps_only_at_ends_in_a, char gaps_only_at_ends_in_b,
char no_mismatches, char case_sensitive);

void scoring_add_wildcard(scoring_t* scoring, char c, int s);
Expand All @@ -56,5 +58,6 @@ void scoring_system_BLOSUM80(scoring_t *scoring);
void scoring_system_BLOSUM62(scoring_t *scoring);
void scoring_system_DNA_hybridization(scoring_t *scoring);
void scoring_system_default(scoring_t *scoring); // DNA/RNA default
void scoring_system_lcs(scoring_t *scoring);

#endif
11 changes: 11 additions & 0 deletions src/print_lcs.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#include "print_lcs.h"

void print_lcs(alignment_t *result)
{
int i = 0;
printf("LCS: \n");
for(; i < strlen(result->result_a); i++)
if(result->result_a[i] == result->result_b[i])
printf("%c", result->result_a[i]);
printf("\n");
}
5 changes: 5 additions & 0 deletions src/print_lcs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "alignment.h"
#include <stdio.h>
#include <string.h>

void print_lcs(alignment_t *result);
Loading