@@ -25,7 +25,7 @@ pub fn smith_waterman<const W: usize>(needle: &str, haystack: &str) -> (u16, u16
2525
2626 let mut left_gap_penalty_mask = true ;
2727 let mut delimiter_bonus_enabled_mask = false ;
28- let mut is_delimiter_mask = false ;
28+ let mut prev_is_delimiter_mask = false ;
2929
3030 for j in 0 ..haystack. len ( ) {
3131 let is_prefix = j == 0 ;
@@ -35,6 +35,7 @@ pub fn smith_waterman<const W: usize>(needle: &str, haystack: &str) -> (u16, u16
3535 let capital_mask = cased_haystack_simd. is_ascii_uppercase ( ) ;
3636 let haystack_simd = cased_haystack_simd. to_ascii_lowercase ( ) ;
3737
38+ let is_delimiter_mask = [ b' ' , b'/' , b',' , b'_' , b'-' , b':' ] . contains ( & haystack_simd) ;
3839 let matched_casing_mask = needle_cased_mask == capital_mask;
3940
4041 // Give a bonus for prefix matches
@@ -49,7 +50,7 @@ pub fn smith_waterman<const W: usize>(needle: &str, haystack: &str) -> (u16, u16
4950 let match_mask = needle_char == haystack_simd;
5051 let diag_score = if match_mask {
5152 diag + match_score
52- + if is_delimiter_mask && delimiter_bonus_enabled_mask { DELIMITER_BONUS } else { 0 }
53+ + if prev_is_delimiter_mask && delimiter_bonus_enabled_mask && !is_delimiter_mask { DELIMITER_BONUS } else { 0 }
5354 // ignore capitalization on the prefix
5455 + if !is_prefix && capital_mask { CAPITALIZATION_BONUS } else { 0 }
5556 + if matched_casing_mask { MATCHING_CASE_BONUS } else { 0 }
@@ -83,9 +84,9 @@ pub fn smith_waterman<const W: usize>(needle: &str, haystack: &str) -> (u16, u16
8384 left_gap_penalty_mask = max_score != left_score || diag_mask;
8485
8586 // Update delimiter mask
86- is_delimiter_mask = [ b' ' , b'/' , b',' , b'_' , b'-' , b':' ] . contains ( & haystack_simd ) ;
87+ prev_is_delimiter_mask = is_delimiter_mask ;
8788 // Only enable delimiter bonus if we've seen a non-delimiter char
88- delimiter_bonus_enabled_mask |= !is_delimiter_mask ;
89+ delimiter_bonus_enabled_mask |= !prev_is_delimiter_mask ;
8990
9091 // Store the scores for the next iterations
9192 up_score_simd = max_score;
@@ -235,8 +236,13 @@ mod tests {
235236 assert_eq ! ( get_score( "b" , "a--b" ) , CHAR_SCORE + DELIMITER_BONUS ) ;
236237 assert_eq ! ( get_score( "c" , "a--bc" ) , CHAR_SCORE ) ;
237238 assert_eq ! ( get_score( "a" , "-a--bc" ) , CHAR_SCORE ) ;
239+ }
240+
241+ #[ test]
242+ fn test_score_no_delimiter_for_delimiter_chars ( ) {
238243 assert_eq ! ( get_score( "-" , "a-bc" ) , CHAR_SCORE ) ;
239- assert_eq ! ( get_score( "-" , "a--bc" ) , CHAR_SCORE + DELIMITER_BONUS ) ;
244+ assert_eq ! ( get_score( "-" , "a--bc" ) , CHAR_SCORE ) ;
245+ assert ! ( get_score( "a_b" , "a_bb" ) > get_score( "a_b" , "a__b" ) ) ;
240246 }
241247
242248 #[ test]
0 commit comments