Skip to content

Commit fc7ffaf

Browse files
author
Emmanuelle Gouillart
committed
Modifications to spectral embedding code: sparse implementation.
Still need to understand better what we're doing.
1 parent 6346409 commit fc7ffaf

1 file changed

Lines changed: 147 additions & 12 deletions

File tree

spectral_embedding.py

Lines changed: 147 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
1-
21
import numpy as np
2+
import scipy
33
from scipy import linalg
4+
from scipy import sparse
5+
from scipy import ndimage
6+
import scipy.sparse.linalg.eigen.arpack
7+
from scipy.sparse.linalg.eigen.arpack import eigen, eigen_symmetric
8+
#from pyamg.graph import lloyd_cluster
9+
import pyamg
10+
from pyamg import smoothed_aggregation_solver
11+
from scipy.sparse.linalg import lobpcg
12+
from diffusions import _build_laplacian
413

514
def spectral_embedding(adjacency):
615
""" A diffusion reordering, but that works for negative values.
@@ -15,9 +24,44 @@ def spectral_embedding(adjacency):
1524
lambdas, diffusion_map = linalg.eigh(lap)
1625
return lambdas, diffusion_map.T[-2::-1]*d
1726

27+
def spectral_embedding_sparse(adjacency, k_max=14, mode='amg', take_first=True):
28+
""" A diffusion reordering, but that works for negative values.
29+
"""
30+
# Normalize the graph: the sum of each set of edges must be one
31+
diag_weights = np.array(adjacency.sum(axis=1))
32+
diag_mask = (diag_weights == 0)
33+
diag_weights[diag_mask] = 1
34+
dd = np.sign(diag_weights)/np.sqrt(np.abs(diag_weights))
35+
if mode == 'bf':
36+
lambdas, diffusion_map = eigen_symmetric(adjacency, k=k_max, which='LA')
37+
print lambdas
38+
if take_first:
39+
res = diffusion_map.T[::-1]*dd.ravel()
40+
else:
41+
res = diffusion_map.T[-2::-1]*dd.ravel()
42+
elif mode == 'amg':
43+
print 'amg'
44+
sh = adjacency.shape[0]
45+
adjacency = adjacency.copy()
46+
diag = sparse.coo_matrix((diag_weights.ravel(), (range(sh), range(sh))))
47+
adjacency = - adjacency + diag
48+
ml = smoothed_aggregation_solver(adjacency.tocsr())
49+
X = scipy.rand(adjacency.shape[0], k_max)
50+
X[:, 0] = 1. / np.sqrt(adjacency.shape[0])
51+
M = ml.aspreconditioner()
52+
lambdas, diffusion_map = lobpcg(adjacency, X, M=M, tol=1.e-12, largest=False)
53+
print lambdas
54+
if take_first:
55+
res = diffusion_map.T * dd.ravel()
56+
else:
57+
res = diffusion_map.T[1:] * dd.ravel()
58+
print res.shape, dd.shape
59+
return res
60+
1861

1962

20-
def modularity_embedding(adjacency):
63+
64+
def modularity_embedding(adjacency, kmax=10):
2165
""" Proceedings of the fifth SIAM international conference on data
2266
mining, Smyth, A spectral clustering approach to finding
2367
communities in graphs.
@@ -33,7 +77,28 @@ def modularity_embedding(adjacency):
3377
#weights.flat[::n+1] = 1
3478
weights = abs_adjacency/abs_adjacency.sum(axis=0)
3579
lambdas, maps = linalg.eig(weights)
36-
return maps.T
80+
indices = np.argsort(lambdas)[::-1]
81+
print lambdas[:10]
82+
return maps.T[indices]
83+
84+
def modularity_embedding_sparse(adjacency, kmax=10):
85+
""" Proceedings of the fifth SIAM international conference on data
86+
mining, Smyth, A spectral clustering approach to finding
87+
communities in graphs.
88+
89+
Return the eigenvalues of the Q matrice
90+
"""
91+
if isinstance(adjacency, sparse.csc.csc_matrix):
92+
adjacency = np.array(adjacency.todense())
93+
abs_adjacency = np.abs(adjacency)
94+
weights = abs_adjacency/abs_adjacency.sum(axis=0)
95+
weights = sparse.csc_matrix(weights)
96+
lambdas, maps = eigen(weights, \
97+
k=kmax, which='LR')
98+
print lambdas
99+
return maps.T#[1:]
100+
101+
37102

38103

39104
def newman_clustering(adjacency, eps=1e-8):
@@ -64,20 +129,28 @@ def q_score(adjacency, labels):
64129
""" Returns the Q score of a clustering.
65130
"""
66131
q = 0
67-
n_features = adjacency.shape[0]
68-
weights = np.abs(adjacency)
69-
weights.flat[::n_features+1] = 0
70-
total_weights = weights.sum()
132+
"""
133+
if isinstance(adjacency, sparse.csc.csc_matrix):
134+
adjacency = np.array(adjacency.todense())
135+
"""
136+
weights = adjacency
137+
total_weights = 0.5 * weights.sum()
71138
for label in np.unique(labels):
72-
q += weights[label == labels].T[label == labels].sum()/total_weights
73-
q -= (weights[label == labels].sum()/total_weights)**2
74-
return q
139+
inds = np.nonzero(labels == label)[0]
140+
a = 0.5 * (weights[inds][:, inds]).sum()
141+
b = weights[inds].sum() - a
142+
q += a/total_weights
143+
q -= 0.5*(b/total_weights)
144+
#q += weights[label == labels].T[label == labels].sum()/total_weights
145+
#q -= (weights[label == labels].sum()/total_weights)**2
146+
return 2 * q
75147

76148

77149
def best_k_means(k, maps, adjacency, n_bst=10):
78150
from nipy.neurospin.clustering.clustering import _kmeans
79151
best_score = -np.inf
80152
for _ in range(n_bst):
153+
print "doing kmeans"
81154
_, labels, _ = _kmeans(maps, nbclusters=k)
82155
score = q_score(adjacency, labels)
83156
if score > best_score:
@@ -86,14 +159,15 @@ def best_k_means(k, maps, adjacency, n_bst=10):
86159
return best_labels, best_score
87160

88161

89-
def communities_clustering(adjacency, k_best=None, n_bst=10):
162+
def communities_clustering(adjacency, k_best=None, n_bst=2):
90163
adjacency = np.abs(adjacency)
91164
n_features = adjacency.shape[0]
92165
adjacency.flat[::n_features+1] = 0
93166
maps = modularity_embedding(adjacency)
94167
scores = dict()
95168
if k_best is None:
96-
for k in range(2, .3*n_features):
169+
#for k in range(2, .3*n_features):
170+
for k in range(2, 6):
97171
this_maps = maps[:k-1].T.copy()
98172
labels, score = best_k_means(k, this_maps, adjacency, n_bst=n_bst)
99173
scores[k] = score
@@ -104,3 +178,64 @@ def communities_clustering(adjacency, k_best=None, n_bst=10):
104178
n_bst=5*n_bst)
105179
print 'Final : k=%i, score=%s' % (k_best, score)
106180
return labels
181+
182+
def communities_clustering_sparse(adjacency, k_best=None, k_min=2, k_max=8, n_bst=4, mode='bf', take_first=False):
183+
maps = spectral_embedding_sparse(adjacency, k_max=k_max+2, mode=mode, \
184+
take_first=take_first)
185+
scores = dict()
186+
res = dict()
187+
if k_best is None:
188+
for k in range(k_min, k_max + 1):
189+
this_maps = maps[:k - 1].T.copy()
190+
labels, score = best_k_means(k, this_maps, adjacency, n_bst=n_bst)
191+
scores[k] = score
192+
print scores[k]
193+
res[k] = labels
194+
#k_best = scores.keys()[np.argmax(scores.values())]
195+
else:
196+
this_maps = maps[:k_best - 1].T.copy()
197+
res, scores = best_k_means(k_best, this_maps, adjacency,
198+
n_bst=4*n_bst)
199+
print 'Final : k=%i, score=%s' % (k_best, score)
200+
return res, scores
201+
202+
def separate_in_regions(data, mask, k_best=None, k_min=2, k_max=8, \
203+
center=None, only_connex=True, \
204+
take_first=True, beta=10, mode='bf'):
205+
labs, nb_labels = ndimage.label(mask)
206+
if nb_labels > 1:
207+
if center is None:
208+
sizes = np.array(ndimage.sum(mask, labs, range(1, nb_labels + 1)))
209+
ind_max = np.argmax(sizes) + 1
210+
else:
211+
ind_max = labs[tuple(center)]
212+
mask = labs == ind_max
213+
lap, w = _build_laplacian(np.atleast_3d(data), mask=np.atleast_3d(mask), \
214+
normed=True, beta=beta)
215+
return lap, w
216+
print lap.shape
217+
res, scores = communities_clustering_sparse(lap, k_best=k_best, \
218+
k_min=k_min, k_max=k_max, take_first=take_first, mode=mode)
219+
if not only_connex:
220+
if k_best==None:
221+
labels = dict()
222+
for k in range(k_min, k_max + 1):
223+
_ = np.copy(labs)
224+
_[_ > 0] += k_best
225+
_[mask > 0] = res[k] + 1
226+
labels[k] = _
227+
else:
228+
labels = np.copy(labs)
229+
labels[labels > 0] += k_best
230+
labels[mask > 0] = res + 1
231+
else:
232+
if k_best==None:
233+
labels = dict()
234+
for k in range(k_min, k_max + 1):
235+
_ = np.copy(mask).astype(np.int)
236+
_[mask > 0] = res[k] + 1
237+
labels[k] = _
238+
else:
239+
labels = np.copy(mask).astype(np.int)
240+
labels[mask > 0] = res + 1
241+
return labels, scores

0 commit comments

Comments
 (0)