Skip to content

Commit bfc2193

Browse files
committed
feat(test): More test coverage for measures
1 parent 0976dbc commit bfc2193

File tree

3 files changed

+121
-10
lines changed

3 files changed

+121
-10
lines changed

.github/workflows/coverage.yml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,6 @@ jobs:
2121
target/
2222
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
2323

24-
- name: Install system dependencies
25-
run: |
26-
sudo apt-get update
27-
sudo apt-get install -y software-properties-common python3-dev
28-
2924
- name: Set up Python
3025
uses: actions/setup-python@v6
3126
with:

tests/python/test_bindings.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,19 @@ def test_word_ngram_edge_cases(self):
125125
features_comma = extractor_comma.apply("foo,bar")
126126
expected_comma = ["# foo1", "foo bar1", "bar #1"]
127127
assert Counter(features_comma) == Counter(expected_comma)
128+
129+
def test_word_ngrams_in_db(self):
130+
extractor = WordNgrams(n=2, splitter=" ", padder="#")
131+
db = HashDb(extractor)
132+
db.insert("foo bar")
133+
searcher = Searcher(db, Cosine())
134+
results = searcher.search("foo bar", 1.0)
135+
assert results == ["foo bar"]
136+
137+
def test_invalid_extractor_in_db(self):
138+
with pytest.raises(TypeError, match="Extractor must be CharacterNgrams, WordNgrams, or CustomExtractor"):
139+
HashDb("not an extractor")
140+
141+
def test_ranked_search_error_on_invalid_threshold(self):
142+
with pytest.raises(SearchError, match=r"Invalid threshold: 1\.1"):
143+
self.searcher.ranked_search("test", 1.1)

tests/test_measures.rs

Lines changed: 105 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
use lasso::Rodeo;
2-
use simstring_rust::{
3-
CharacterNgrams, Cosine, Database, Dice, ExactMatch, HashDb, Jaccard, Measure, Overlap,
4-
};
5-
use std::sync::Arc;
1+
use lasso::{Rodeo, Spur};
2+
use rustc_hash::FxHashSet;
3+
use simstring_rust::database::{Database, HashDb, StringId};
4+
use simstring_rust::extractors::{CharacterNgrams, FeatureExtractor};
5+
use simstring_rust::measures::{Cosine, Dice, ExactMatch, Jaccard, Measure, Overlap};
6+
use std::sync::{Arc, Mutex};
67

78
fn approx_eq(a: f64, b: f64) -> bool {
89
(a - b).abs() < 1e-9
@@ -350,3 +351,102 @@ fn test_overlap_edge_cases() {
350351
assert_eq!(measure.similarity(&x, &empty), 0.0);
351352
assert_eq!(measure.similarity(&empty, &x), 0.0);
352353
}
354+
355+
#[test]
356+
fn test_cosine_zero_alpha_max_feature_size() {
357+
let measure = Cosine;
358+
let db = MockDatabase;
359+
// When alpha is 0.0, max_feature_size should return db.max_feature_len()
360+
assert_eq!(measure.max_feature_size(5, 0.0, &db), 100);
361+
}
362+
363+
#[test]
364+
fn test_cosine_similarity_empty_inputs() {
365+
let measure = Cosine;
366+
let empty: &[Spur] = &[];
367+
let non_empty = &[Spur::default()];
368+
369+
assert_eq!(measure.similarity(empty, empty), 0.0);
370+
assert_eq!(measure.similarity(empty, non_empty), 0.0);
371+
assert_eq!(measure.similarity(non_empty, empty), 0.0);
372+
}
373+
374+
#[test]
375+
fn test_dice_zero_alpha_max_feature_size() {
376+
let measure = Dice;
377+
let db = MockDatabase;
378+
assert_eq!(measure.max_feature_size(5, 0.0, &db), 100);
379+
}
380+
381+
#[test]
382+
fn test_dice_similarity_empty_inputs() {
383+
let measure = Dice;
384+
let empty: &[Spur] = &[];
385+
let non_empty = &[Spur::default()];
386+
387+
// Dice: if both empty -> 1.0
388+
assert_eq!(measure.similarity(empty, empty), 1.0);
389+
// If one empty -> 0.0
390+
assert_eq!(measure.similarity(empty, non_empty), 0.0);
391+
assert_eq!(measure.similarity(non_empty, empty), 0.0);
392+
}
393+
394+
#[test]
395+
fn test_jaccard_negative_alpha_min_common_features() {
396+
let measure = Jaccard;
397+
// alpha = -1.0 returns 0
398+
assert_eq!(measure.minimum_common_feature_count(5, 5, -1.0), 0);
399+
}
400+
401+
#[test]
402+
fn test_jaccard_similarity_empty_inputs() {
403+
let measure = Jaccard;
404+
let empty: &[Spur] = &[];
405+
let non_empty = &[Spur::default()];
406+
407+
// Jaccard: if both empty -> 1.0
408+
assert_eq!(measure.similarity(empty, empty), 1.0);
409+
// If one empty -> 0.0
410+
assert_eq!(measure.similarity(empty, non_empty), 0.0);
411+
assert_eq!(measure.similarity(non_empty, empty), 0.0);
412+
}
413+
414+
#[test]
415+
fn test_overlap_similarity_empty_inputs() {
416+
let measure = Overlap;
417+
let empty: &[Spur] = &[];
418+
let non_empty = &[Spur::default()];
419+
420+
// Overlap: if both empty -> 1.0
421+
assert_eq!(measure.similarity(empty, empty), 1.0);
422+
// If one empty -> 0.0
423+
assert_eq!(measure.similarity(empty, non_empty), 0.0);
424+
assert_eq!(measure.similarity(non_empty, empty), 0.0);
425+
}
426+
427+
struct MockDatabase;
428+
impl Database for MockDatabase {
429+
fn insert(&mut self, _text: String) {}
430+
fn clear(&mut self) {}
431+
fn lookup_strings(&self, _size: usize, _feature: Spur) -> Option<&FxHashSet<StringId>> {
432+
None
433+
}
434+
fn get_string(&self, _id: StringId) -> Option<&str> {
435+
None
436+
}
437+
fn get_features(&self, _id: StringId) -> Option<&Vec<Spur>> {
438+
None
439+
}
440+
fn feature_extractor(&self) -> &dyn FeatureExtractor {
441+
unimplemented!()
442+
}
443+
fn max_feature_len(&self) -> usize {
444+
100
445+
}
446+
fn interner(&self) -> Arc<Mutex<Rodeo>> {
447+
unimplemented!()
448+
}
449+
fn total_strings(&self) -> usize {
450+
0
451+
}
452+
}

0 commit comments

Comments
 (0)