|
1 | | -use lasso::Rodeo; |
2 | | -use simstring_rust::{ |
3 | | - CharacterNgrams, Cosine, Database, Dice, ExactMatch, HashDb, Jaccard, Measure, Overlap, |
4 | | -}; |
5 | | -use std::sync::Arc; |
| 1 | +use lasso::{Rodeo, Spur}; |
| 2 | +use rustc_hash::FxHashSet; |
| 3 | +use simstring_rust::database::{Database, HashDb, StringId}; |
| 4 | +use simstring_rust::extractors::{CharacterNgrams, FeatureExtractor}; |
| 5 | +use simstring_rust::measures::{Cosine, Dice, ExactMatch, Jaccard, Measure, Overlap}; |
| 6 | +use std::sync::{Arc, Mutex}; |
6 | 7 |
|
7 | 8 | fn approx_eq(a: f64, b: f64) -> bool { |
8 | 9 | (a - b).abs() < 1e-9 |
@@ -350,3 +351,102 @@ fn test_overlap_edge_cases() { |
350 | 351 | assert_eq!(measure.similarity(&x, &empty), 0.0); |
351 | 352 | assert_eq!(measure.similarity(&empty, &x), 0.0); |
352 | 353 | } |
| 354 | + |
| 355 | +#[test] |
| 356 | +fn test_cosine_zero_alpha_max_feature_size() { |
| 357 | + let measure = Cosine; |
| 358 | + let db = MockDatabase; |
| 359 | + // When alpha is 0.0, max_feature_size should return db.max_feature_len() |
| 360 | + assert_eq!(measure.max_feature_size(5, 0.0, &db), 100); |
| 361 | +} |
| 362 | + |
| 363 | +#[test] |
| 364 | +fn test_cosine_similarity_empty_inputs() { |
| 365 | + let measure = Cosine; |
| 366 | + let empty: &[Spur] = &[]; |
| 367 | + let non_empty = &[Spur::default()]; |
| 368 | + |
| 369 | + assert_eq!(measure.similarity(empty, empty), 0.0); |
| 370 | + assert_eq!(measure.similarity(empty, non_empty), 0.0); |
| 371 | + assert_eq!(measure.similarity(non_empty, empty), 0.0); |
| 372 | +} |
| 373 | + |
| 374 | +#[test] |
| 375 | +fn test_dice_zero_alpha_max_feature_size() { |
| 376 | + let measure = Dice; |
| 377 | + let db = MockDatabase; |
| 378 | + assert_eq!(measure.max_feature_size(5, 0.0, &db), 100); |
| 379 | +} |
| 380 | + |
| 381 | +#[test] |
| 382 | +fn test_dice_similarity_empty_inputs() { |
| 383 | + let measure = Dice; |
| 384 | + let empty: &[Spur] = &[]; |
| 385 | + let non_empty = &[Spur::default()]; |
| 386 | + |
| 387 | + // Dice: if both empty -> 1.0 |
| 388 | + assert_eq!(measure.similarity(empty, empty), 1.0); |
| 389 | + // If one empty -> 0.0 |
| 390 | + assert_eq!(measure.similarity(empty, non_empty), 0.0); |
| 391 | + assert_eq!(measure.similarity(non_empty, empty), 0.0); |
| 392 | +} |
| 393 | + |
| 394 | +#[test] |
| 395 | +fn test_jaccard_negative_alpha_min_common_features() { |
| 396 | + let measure = Jaccard; |
| 397 | + // alpha = -1.0 returns 0 |
| 398 | + assert_eq!(measure.minimum_common_feature_count(5, 5, -1.0), 0); |
| 399 | +} |
| 400 | + |
| 401 | +#[test] |
| 402 | +fn test_jaccard_similarity_empty_inputs() { |
| 403 | + let measure = Jaccard; |
| 404 | + let empty: &[Spur] = &[]; |
| 405 | + let non_empty = &[Spur::default()]; |
| 406 | + |
| 407 | + // Jaccard: if both empty -> 1.0 |
| 408 | + assert_eq!(measure.similarity(empty, empty), 1.0); |
| 409 | + // If one empty -> 0.0 |
| 410 | + assert_eq!(measure.similarity(empty, non_empty), 0.0); |
| 411 | + assert_eq!(measure.similarity(non_empty, empty), 0.0); |
| 412 | +} |
| 413 | + |
| 414 | +#[test] |
| 415 | +fn test_overlap_similarity_empty_inputs() { |
| 416 | + let measure = Overlap; |
| 417 | + let empty: &[Spur] = &[]; |
| 418 | + let non_empty = &[Spur::default()]; |
| 419 | + |
| 420 | + // Overlap: if both empty -> 1.0 |
| 421 | + assert_eq!(measure.similarity(empty, empty), 1.0); |
| 422 | + // If one empty -> 0.0 |
| 423 | + assert_eq!(measure.similarity(empty, non_empty), 0.0); |
| 424 | + assert_eq!(measure.similarity(non_empty, empty), 0.0); |
| 425 | +} |
| 426 | + |
| 427 | +struct MockDatabase; |
| 428 | +impl Database for MockDatabase { |
| 429 | + fn insert(&mut self, _text: String) {} |
| 430 | + fn clear(&mut self) {} |
| 431 | + fn lookup_strings(&self, _size: usize, _feature: Spur) -> Option<&FxHashSet<StringId>> { |
| 432 | + None |
| 433 | + } |
| 434 | + fn get_string(&self, _id: StringId) -> Option<&str> { |
| 435 | + None |
| 436 | + } |
| 437 | + fn get_features(&self, _id: StringId) -> Option<&Vec<Spur>> { |
| 438 | + None |
| 439 | + } |
| 440 | + fn feature_extractor(&self) -> &dyn FeatureExtractor { |
| 441 | + unimplemented!() |
| 442 | + } |
| 443 | + fn max_feature_len(&self) -> usize { |
| 444 | + 100 |
| 445 | + } |
| 446 | + fn interner(&self) -> Arc<Mutex<Rodeo>> { |
| 447 | + unimplemented!() |
| 448 | + } |
| 449 | + fn total_strings(&self) -> usize { |
| 450 | + 0 |
| 451 | + } |
| 452 | +} |
0 commit comments