33import numpy as np
44from scipy .sparse .csr import csr_matrix
55from string_grouper .string_grouper import DEFAULT_MIN_SIMILARITY , \
6- DEFAULT_MAX_N_MATCHES , DEFAULT_REGEX , \
7- DEFAULT_NGRAM_SIZE , DEFAULT_N_PROCESSES , DEFAULT_IGNORE_CASE , \
6+ DEFAULT_REGEX , DEFAULT_NGRAM_SIZE , DEFAULT_N_PROCESSES , DEFAULT_IGNORE_CASE , \
87 StringGrouperConfig , StringGrouper , StringGrouperNotFitException , \
9- match_most_similar , group_similar_strings , match_strings ,\
8+ match_most_similar , group_similar_strings , match_strings , \
109 compute_pairwise_similarities
1110from unittest .mock import patch
12- from scipy .sparse .csgraph ._flow import csr_matrix
1311
1412def mock_symmetrize_matrix (A : csr_matrix ) -> csr_matrix :
1513 return A
@@ -383,7 +381,7 @@ def test_get_matches_single(self):
383381 left_side = ['foo' , 'foo' , 'bar' , 'baz' , 'foo' , 'foo' ]
384382 right_side = ['foo' , 'foo' , 'bar' , 'baz' , 'foo' , 'foo' ]
385383 left_index = [0 , 0 , 1 , 2 , 3 , 3 ]
386- right_index = [0 , 3 , 1 , 2 , 0 , 3 ]
384+ right_index = [3 , 0 , 1 , 2 , 3 , 0 ]
387385 similarity = [1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 ]
388386 expected_df = pd .DataFrame ({'left_index' : left_index , 'left_side' : left_side ,
389387 'similarity' : similarity ,
@@ -399,8 +397,8 @@ def test_get_matches_1_series_1_id_series(self):
399397 left_side_id = ['A0' , 'A0' , 'A1' , 'A2' , 'A3' , 'A3' ]
400398 left_index = [0 , 0 , 1 , 2 , 3 , 3 ]
401399 right_side = ['foo' , 'foo' , 'bar' , 'baz' , 'foo' , 'foo' ]
402- right_side_id = ['A0 ' , 'A3 ' , 'A1' , 'A2' , 'A0 ' , 'A3 ' ]
403- right_index = [0 , 3 , 1 , 2 , 0 , 3 ]
400+ right_side_id = ['A3 ' , 'A0 ' , 'A1' , 'A2' , 'A3 ' , 'A0 ' ]
401+ right_index = [3 , 0 , 1 , 2 , 3 , 0 ]
404402 similarity = [1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 ]
405403 expected_df = pd .DataFrame ({'left_index' : left_index , 'left_side' : left_side , 'left_id' : left_side_id ,
406404 'similarity' : similarity ,
0 commit comments