From fbaefae48d8451848e126e378d2ff20068c0002f Mon Sep 17 00:00:00 2001 From: Douwe Schulte Date: Tue, 19 Dec 2023 16:51:26 +0100 Subject: [PATCH] Added maximal score to alignment, fixed local alignment bug, preferred smaller Isobaric/Rotated sets --- src/align/alignment.rs | 6 ++++-- src/align/mass_alignment.rs | 22 +++++++++++++++------- src/align/scoring.rs | 5 +++-- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/align/alignment.rs b/src/align/alignment.rs index b699252..d01a4dd 100644 --- a/src/align/alignment.rs +++ b/src/align/alignment.rs @@ -15,9 +15,11 @@ use crate::{LinearPeptide, MolecularFormula}; pub struct Alignment { /// The absolute score of this alignment pub absolute_score: isize, + /// The maximal score of this alignment: the average score of the sequence slices on sequence a and b if they were aligned to themself, rounded down. + /// Think of it like this: `align(sequence_a.sequence[start_a..len_a], sequence_a.sequence[start_a..len_a])`. + pub maximal_score: isize, /// The normalised score, normalised for the alignment length and for the used alphabet. - /// The normalisation is as follows `score / max_score` where max score is the average score of the sequence slices on sequence a and b if they were aligned to themself. - /// Think of it like this: `align(sequence_a.sequence[start_a..len_a], sequence_a.sequence[start_a..len_a])` + /// The normalisation is as follows `absolute_score / max_score`. pub normalised_score: f64, /// The path or steps taken for the alignment pub path: Vec, diff --git a/src/align/mass_alignment.rs b/src/align/mass_alignment.rs index 806c671..8c07055 100644 --- a/src/align/mass_alignment.rs +++ b/src/align/mass_alignment.rs @@ -140,7 +140,7 @@ pub fn align( let high_score = high.0; while ty == Type::Global || !(high.1 == 0 && high.2 == 0) { let value = matrix[high.1][high.2].clone(); - if value.step_a == 0 && value.step_b == 0 { + if value.step_a == 0 && value.step_b == 0 || ty == Type::Local && value.score < 0 { break; } high = ( @@ -152,7 +152,7 @@ pub fn align( } let path: Vec = path.into_iter().rev().collect(); - let max_score = seq_a.sequence + let max_score = (seq_a.sequence [high.1..high.1 + path.iter().map(|p| p.step_a as usize).sum::()] .iter() .map(|a| alphabet[a.aminoacid as usize][a.aminoacid as usize] as isize) @@ -160,10 +160,12 @@ pub fn align( + seq_b.sequence[high.2..high.2 + path.iter().map(|p| p.step_b as usize).sum::()] .iter() .map(|a| alphabet[a.aminoacid as usize][a.aminoacid as usize] as isize) - .sum::(); + .sum::()) + / 2; Alignment { absolute_score: high_score, - normalised_score: high_score as f64 / max_score as f64 * 2.0, + normalised_score: high_score as f64 / max_score as f64, + maximal_score: max_score, path, start_a: high.1, start_b: high.2, @@ -199,7 +201,13 @@ fn score_pair( 1, ) } - (false, true) => Piece::new(score + ISOMASS as isize, ISOMASS, MatchType::Isobaric, 1, 1), // TODO: I/L/J is now also scored as isobaric, which is correct but the score is considerably lower then in previous iterations + (false, true) => Piece::new( + score + ISOBARIC as isize, + ISOBARIC, + MatchType::Isobaric, + 1, + 1, + ), // TODO: I/L/J is now also scored as isobaric, which is correct but the score is considerably lower then in previous iterations (false, false) => Piece::new( score + MISMATCH as isize, MISMATCH, @@ -231,9 +239,9 @@ fn score( }); #[allow(clippy::cast_possible_wrap)] let local = if rotated { - SWITCHED * a.len() as i8 + BASE_SPECIAL + ROTATED * a.len() as i8 } else { - ISOMASS * (a.len() + b.len()) as i8 / 2 + BASE_SPECIAL + ISOBARIC * (a.len() + b.len()) as i8 / 2 }; Some(Piece::new( score + local as isize, diff --git a/src/align/scoring.rs b/src/align/scoring.rs index 023127e..77285b0 100644 --- a/src/align/scoring.rs +++ b/src/align/scoring.rs @@ -22,8 +22,9 @@ pub enum MatchType { pub const MISMATCH: i8 = -1; pub const MASS_MISMATCH_PENALTY: i8 = -1; -pub const SWITCHED: i8 = 3; -pub const ISOMASS: i8 = 2; +pub const BASE_SPECIAL: i8 = 1; +pub const ROTATED: i8 = 3; +pub const ISOBARIC: i8 = 2; pub const GAP_START_PENALTY: i8 = -5; pub const GAP_EXTEND_PENALTY: i8 = -1;