1-
21import numpy as np
2+ import scipy
33from scipy import linalg
4+ from scipy import sparse
5+ from scipy import ndimage
6+ import scipy .sparse .linalg .eigen .arpack
7+ from scipy .sparse .linalg .eigen .arpack import eigen , eigen_symmetric
8+ #from pyamg.graph import lloyd_cluster
9+ import pyamg
10+ from pyamg import smoothed_aggregation_solver
11+ from scipy .sparse .linalg import lobpcg
12+ from diffusions import _build_laplacian
413
514def spectral_embedding (adjacency ):
615 """ A diffusion reordering, but that works for negative values.
@@ -15,9 +24,44 @@ def spectral_embedding(adjacency):
1524 lambdas , diffusion_map = linalg .eigh (lap )
1625 return lambdas , diffusion_map .T [- 2 ::- 1 ]* d
1726
27+ def spectral_embedding_sparse (adjacency , k_max = 14 , mode = 'amg' , take_first = True ):
28+ """ A diffusion reordering, but that works for negative values.
29+ """
30+ # Normalize the graph: the sum of each set of edges must be one
31+ diag_weights = np .array (adjacency .sum (axis = 1 ))
32+ diag_mask = (diag_weights == 0 )
33+ diag_weights [diag_mask ] = 1
34+ dd = np .sign (diag_weights )/ np .sqrt (np .abs (diag_weights ))
35+ if mode == 'bf' :
36+ lambdas , diffusion_map = eigen_symmetric (adjacency , k = k_max , which = 'LA' )
37+ print lambdas
38+ if take_first :
39+ res = diffusion_map .T [::- 1 ]* dd .ravel ()
40+ else :
41+ res = diffusion_map .T [- 2 ::- 1 ]* dd .ravel ()
42+ elif mode == 'amg' :
43+ print 'amg'
44+ sh = adjacency .shape [0 ]
45+ adjacency = adjacency .copy ()
46+ diag = sparse .coo_matrix ((diag_weights .ravel (), (range (sh ), range (sh ))))
47+ adjacency = - adjacency + diag
48+ ml = smoothed_aggregation_solver (adjacency .tocsr ())
49+ X = scipy .rand (adjacency .shape [0 ], k_max )
50+ X [:, 0 ] = 1. / np .sqrt (adjacency .shape [0 ])
51+ M = ml .aspreconditioner ()
52+ lambdas , diffusion_map = lobpcg (adjacency , X , M = M , tol = 1.e-12 , largest = False )
53+ print lambdas
54+ if take_first :
55+ res = diffusion_map .T * dd .ravel ()
56+ else :
57+ res = diffusion_map .T [1 :] * dd .ravel ()
58+ print res .shape , dd .shape
59+ return res
60+
1861
1962
20- def modularity_embedding (adjacency ):
63+
64+ def modularity_embedding (adjacency , kmax = 10 ):
2165 """ Proceedings of the fifth SIAM international conference on data
2266 mining, Smyth, A spectral clustering approach to finding
2367 communities in graphs.
@@ -33,7 +77,28 @@ def modularity_embedding(adjacency):
3377 #weights.flat[::n+1] = 1
3478 weights = abs_adjacency / abs_adjacency .sum (axis = 0 )
3579 lambdas , maps = linalg .eig (weights )
36- return maps .T
80+ indices = np .argsort (lambdas )[::- 1 ]
81+ print lambdas [:10 ]
82+ return maps .T [indices ]
83+
84+ def modularity_embedding_sparse (adjacency , kmax = 10 ):
85+ """ Proceedings of the fifth SIAM international conference on data
86+ mining, Smyth, A spectral clustering approach to finding
87+ communities in graphs.
88+
89+ Return the eigenvalues of the Q matrice
90+ """
91+ if isinstance (adjacency , sparse .csc .csc_matrix ):
92+ adjacency = np .array (adjacency .todense ())
93+ abs_adjacency = np .abs (adjacency )
94+ weights = abs_adjacency / abs_adjacency .sum (axis = 0 )
95+ weights = sparse .csc_matrix (weights )
96+ lambdas , maps = eigen (weights , \
97+ k = kmax , which = 'LR' )
98+ print lambdas
99+ return maps .T #[1:]
100+
101+
37102
38103
39104def newman_clustering (adjacency , eps = 1e-8 ):
@@ -64,20 +129,28 @@ def q_score(adjacency, labels):
64129 """ Returns the Q score of a clustering.
65130 """
66131 q = 0
67- n_features = adjacency .shape [0 ]
68- weights = np .abs (adjacency )
69- weights .flat [::n_features + 1 ] = 0
70- total_weights = weights .sum ()
132+ """
133+ if isinstance(adjacency, sparse.csc.csc_matrix):
134+ adjacency = np.array(adjacency.todense())
135+ """
136+ weights = adjacency
137+ total_weights = 0.5 * weights .sum ()
71138 for label in np .unique (labels ):
72- q += weights [label == labels ].T [label == labels ].sum ()/ total_weights
73- q -= (weights [label == labels ].sum ()/ total_weights )** 2
74- return q
139+ inds = np .nonzero (labels == label )[0 ]
140+ a = 0.5 * (weights [inds ][:, inds ]).sum ()
141+ b = weights [inds ].sum () - a
142+ q += a / total_weights
143+ q -= 0.5 * (b / total_weights )
144+ #q += weights[label == labels].T[label == labels].sum()/total_weights
145+ #q -= (weights[label == labels].sum()/total_weights)**2
146+ return 2 * q
75147
76148
77149def best_k_means (k , maps , adjacency , n_bst = 10 ):
78150 from nipy .neurospin .clustering .clustering import _kmeans
79151 best_score = - np .inf
80152 for _ in range (n_bst ):
153+ print "doing kmeans"
81154 _ , labels , _ = _kmeans (maps , nbclusters = k )
82155 score = q_score (adjacency , labels )
83156 if score > best_score :
@@ -86,14 +159,15 @@ def best_k_means(k, maps, adjacency, n_bst=10):
86159 return best_labels , best_score
87160
88161
89- def communities_clustering (adjacency , k_best = None , n_bst = 10 ):
162+ def communities_clustering (adjacency , k_best = None , n_bst = 2 ):
90163 adjacency = np .abs (adjacency )
91164 n_features = adjacency .shape [0 ]
92165 adjacency .flat [::n_features + 1 ] = 0
93166 maps = modularity_embedding (adjacency )
94167 scores = dict ()
95168 if k_best is None :
96- for k in range (2 , .3 * n_features ):
169+ #for k in range(2, .3*n_features):
170+ for k in range (2 , 6 ):
97171 this_maps = maps [:k - 1 ].T .copy ()
98172 labels , score = best_k_means (k , this_maps , adjacency , n_bst = n_bst )
99173 scores [k ] = score
@@ -104,3 +178,64 @@ def communities_clustering(adjacency, k_best=None, n_bst=10):
104178 n_bst = 5 * n_bst )
105179 print 'Final : k=%i, score=%s' % (k_best , score )
106180 return labels
181+
182+ def communities_clustering_sparse (adjacency , k_best = None , k_min = 2 , k_max = 8 , n_bst = 4 , mode = 'bf' , take_first = False ):
183+ maps = spectral_embedding_sparse (adjacency , k_max = k_max + 2 , mode = mode , \
184+ take_first = take_first )
185+ scores = dict ()
186+ res = dict ()
187+ if k_best is None :
188+ for k in range (k_min , k_max + 1 ):
189+ this_maps = maps [:k - 1 ].T .copy ()
190+ labels , score = best_k_means (k , this_maps , adjacency , n_bst = n_bst )
191+ scores [k ] = score
192+ print scores [k ]
193+ res [k ] = labels
194+ #k_best = scores.keys()[np.argmax(scores.values())]
195+ else :
196+ this_maps = maps [:k_best - 1 ].T .copy ()
197+ res , scores = best_k_means (k_best , this_maps , adjacency ,
198+ n_bst = 4 * n_bst )
199+ print 'Final : k=%i, score=%s' % (k_best , score )
200+ return res , scores
201+
202+ def separate_in_regions (data , mask , k_best = None , k_min = 2 , k_max = 8 , \
203+ center = None , only_connex = True , \
204+ take_first = True , beta = 10 , mode = 'bf' ):
205+ labs , nb_labels = ndimage .label (mask )
206+ if nb_labels > 1 :
207+ if center is None :
208+ sizes = np .array (ndimage .sum (mask , labs , range (1 , nb_labels + 1 )))
209+ ind_max = np .argmax (sizes ) + 1
210+ else :
211+ ind_max = labs [tuple (center )]
212+ mask = labs == ind_max
213+ lap , w = _build_laplacian (np .atleast_3d (data ), mask = np .atleast_3d (mask ), \
214+ normed = True , beta = beta )
215+ return lap , w
216+ print lap .shape
217+ res , scores = communities_clustering_sparse (lap , k_best = k_best , \
218+ k_min = k_min , k_max = k_max , take_first = take_first , mode = mode )
219+ if not only_connex :
220+ if k_best == None :
221+ labels = dict ()
222+ for k in range (k_min , k_max + 1 ):
223+ _ = np .copy (labs )
224+ _ [_ > 0 ] += k_best
225+ _ [mask > 0 ] = res [k ] + 1
226+ labels [k ] = _
227+ else :
228+ labels = np .copy (labs )
229+ labels [labels > 0 ] += k_best
230+ labels [mask > 0 ] = res + 1
231+ else :
232+ if k_best == None :
233+ labels = dict ()
234+ for k in range (k_min , k_max + 1 ):
235+ _ = np .copy (mask ).astype (np .int )
236+ _ [mask > 0 ] = res [k ] + 1
237+ labels [k ] = _
238+ else :
239+ labels = np .copy (mask ).astype (np .int )
240+ labels [mask > 0 ] = res + 1
241+ return labels , scores
0 commit comments