@@ -25,23 +25,25 @@ def awesome_cossim_topn(
2525 """
2626 This function will return a matrix C in CSR format, where
2727 C = [sorted top n results > lower_bound for each row of A * B].
28- If return_best_topn=True it will also return best_topn (the
29- true maximum number of elements > lower_bound per row of A * B).
28+ If return_best_topn=True then best_topn
29+ (the true maximum number of elements > lower_bound per row of A * B)
30+ will also be returned in a tuple together with C as (C, best_topn).
3031
3132 Input:
3233 A and B: two CSR matrices
33- ntop: n top results
34- lower_bound: a threshold that the element of A*B must greater than
35- use_threads: use multi-thread or not
34+ ntop: top n results
35+ lower_bound: a threshold that the element of A*B must be greater than
36+ use_threads: use multi-thread or not
3637 n_jobs: number of thread, must be >= 1
37- ntop_is_flexible: if True, memory management will be handed over to C/C++ if
38- python's attempt at allocating memory fails.
39- mem_manager_is_C: (this is mainly for testing purposes) if True, will force
40- memory management to be handed over to C/C++. Should be
41- used only when ntop >= number of columns of B or
42- ntop_is_flexible=True. Defaults to False.
43- return_best_topn: if True, will return best_topn together with C as a tuple:
44- (C, best_topn)
38+ ntop_is_flexible: (default: False) if True, memory management will be handed
39+ over to C/C++ whenever python's attempt at allocating
40+ memory fails.
41+ mem_manager_is_C: (default: False) this is mainly for testing purposes. if
42+ True, will force memory management to be handed over to
43+ C/C++. Should be used only when ntop >= number of columns
44+ of B or ntop_is_flexible=True.
45+ return_best_topn: (default: False) if True, will return best_topn together
46+ with C as a tuple: (C, best_topn)
4547
4648 Output:
4749 C: result matrix (returned alone, if return_best_topn=False)
@@ -80,11 +82,13 @@ def awesome_cossim_topn(
8082 return output
8183
8284 # filled matrices from here on
83- indptr = np .empty (M + 1 , dtype = idx_dtype )
85+ indptr = np .empty (M + 1 , dtype = idx_dtype )
8486 try :
8587 indices = np .empty (nnz_max , dtype = idx_dtype )
8688 data = np .empty (nnz_max , dtype = A .dtype )
89+
8790 if mem_manager_is_C : raise MemoryError # This is mainly for testing purposes
91+
8892 except MemoryError :
8993 # if mem_manager_is_C: print('Exception raised! Continuing ...', flush=True)
9094 if ntop_is_flexible or ntop >= N :
@@ -107,7 +111,6 @@ def awesome_cossim_topn(
107111 lower_bound ,
108112 indptr
109113 )
110-
111114 else :
112115
113116 indices , data , best_topn = ct_thread .sparse_dot_free_threaded (
@@ -120,14 +123,19 @@ def awesome_cossim_topn(
120123 lower_bound ,
121124 indptr , n_jobs
122125 )
123-
124126 else :
127+
125128 if mem_manager_is_C :
126- raise Exception ('When mem_manager_is_C=True, set ntop >= N, or set ntop_is_flexible=True' )
129+ raise Exception (
130+ 'When mem_manager_is_C=True, set ntop >= B.shape[1], or set ntop_is_flexible=True'
131+ )
127132 else :
128- raise Exception ('Not enough memory! Data array is too large. Try reducing the value of ntop.' )
129-
133+ raise Exception (
134+ 'Not enough memory! Data array is too large. Try reducing the value of ntop.'
135+ 'or set ntop_is_flexible=True'
136+ )
130137 else :
138+ # no exception was raised; then use old function (as it is expected to be the fastest)
131139
132140 best_topn_arr = np .full (1 , 0 , dtype = idx_dtype )
133141
@@ -144,7 +152,6 @@ def awesome_cossim_topn(
144152 lower_bound ,
145153 indptr , indices , data , best_topn_arr
146154 )
147-
148155 else :
149156 if n_jobs < 1 :
150157 err_str = 'Whenever you select the multi-thread mode, n_job must be greater than or equal to 1!'
@@ -161,9 +168,9 @@ def awesome_cossim_topn(
161168 lower_bound ,
162169 indptr , indices , data , best_topn_arr , n_jobs
163170 )
164-
165171 best_topn = best_topn_arr [0 ]
166172
173+ # prepare and return the output:
167174 output = csr_matrix ((data , indices , indptr ), shape = (M , N ))
168175 if return_best_topn :
169176 return output , best_topn
0 commit comments