Open
Description
Reported by @gidiko:
Using the following snippet in python:
import networkx as nx
from skylark import sketch
import numpy as np
num_nodes = 10000
num_attach_edges = 5
sketch_size = 10
graph = nx.random_graphs.barabasi_albert_graph(num_nodes, num_attach_edges)
sparse_matrix = nx.to_scipy_sparse_matrix(graph)
print 'num_nodes=%d, num_edges=%d' % (graph.number_of_nodes(), graph.number_of_edges())
cwt_sketcher = sketch.CWT(num_nodes, sketch_size)
cwt_dense_sketched_matrix = np.zeros((sketch_size, num_nodes))
cwt_dense_sketched_matrix = cwt_sketcher.apply(sparse_matrix, cwt_dense_sketched_matrix)
print '||cwt_sketched_matrix||_F = %f' % np.linalg.norm(cwt_dense_sketched_matrix)
jlt_sketcher = sketch.JLT(num_nodes, sketch_size)
jlt_dense_sketched_matrix = np.zeros((sketch_size, num_nodes))
jlt_dense_sketched_matrix = jlt_sketcher.apply(sparse_matrix, jlt_dense_sketched_matrix)
print '||jlt_sketched_matrix||_F = %f' % np.linalg.norm(jlt_dense_sketched_matrix)
gives
num_nodes=10000, num_edges=49975
||cwt_sketched_matrix||_F = 0.000000
||jlt_sketched_matrix||_F = 0.000000
We should first identify if this behavior comes from the C++ layer or the Python layer (which also includes a scipy adaptation step)
Interestingly explicit type and matrix ordering information play an important role. Ideally extra tests and subsequent adaptations should be pushed to the Python-layer side so that no additional tricks would be needed on the user side.
Anyway the following sequence of fragments seem to work for the purpose of sketching sparse matrices (graphs) in Python, sample output also included:
import networkx as nx
from skylark import sketch
import numpy as np
import El
import scipy.sparse as sparse
num_nodes = 10000
num_attach_edges = 5
sketch_size = 10
graph = nx.random_graphs.barabasi_albert_graph(num_nodes, num_attach_edges)
adjacency_matrix = nx.to_scipy_sparse_matrix(graph)
sketcher = sketch.JLT(num_nodes, sketch_size)
sketched_matrix = np.zeros((num_nodes, sketch_size))
sketched_matrix = sketcher.apply(adjacency_matrix.astype(np.float64), sketched_matrix, dim=1)
sketched_matrix
array([[ 8.11888046, 3.66619092, -0.69312617, ..., -5.27543667,
3.64034862, -2.53173489],
[-0.06671235, 3.61801307, 1.63408647, ..., -1.24440895,
1.58554545, 3.32967843],
[-0.71950559, 1.44892792, -2.43286975, ..., -3.25777655,
-0.0842889 , 1.00385429],
...,
[-0.47169948, -0.08080012, -0.82346422, ..., 0.57822103,
-1.01447011, -1.78904265],
[ 0.16467788, 0.05643725, -1.13017304, ..., -0.71708611,
-0.43244396, 0.88299947],
[ 0.41101845, -0.39103277, -1.8149693 , ..., -0.06841895,
0.58573791, 1.14902866]])
adjacency_matrix = nx.to_scipy_sparse_matrix(graph)
adjacency_matrix = sparse.csc_matrix(adjacency_matrix)
sketcher = sketch.JLT(num_nodes, sketch_size)
sketched_matrix = El.Matrix(El.dTag)
El.Zeros(sketched_matrix, num_nodes, sketch_size)
sketched_matrix = sketcher.apply(adjacency_matrix.astype(np.float64), sketched_matrix, dim=1)
sketched_matrix = sketched_matrix.ToNumPy()
sketched_matrix
array([[-4.30805621, 7.4245152 , -6.5035574 , ..., 0.2839466 ,
-0.15883759, 2.63956836],
[-3.49545669, -0.13695825, 4.02011208, ..., -0.5727807 ,
-1.31282783, 1.08634027],
[ 3.44727912, -2.49833119, -0.46271317, ..., -0.59458873,
0.20161868, -1.33262505],
...,
[ 0.21068334, 0.84575439, 0.38971438, ..., -0.32079043,
-0.32991477, 0.6029707 ],
[-0.47248113, 1.27882794, -0.07796452, ..., -0.1517788 ,
0.30718014, 0.97679054],
[ 0.59970901, 0.02808683, -0.24872374, ..., 0.38333562,
-0.72996882, -0.09309003]])
And then switching to CWT(?):
adjacency_matrix = nx.to_scipy_sparse_matrix(graph)
sketcher = sketch.CWT(num_nodes, sketch_size)
sketched_matrix = np.zeros((num_nodes, sketch_size))
sketched_matrix = sketcher.apply(adjacency_matrix.astype(np.float64), sketched_matrix, dim=1)
sketched_matrix
array([[-1., -5., -1., ..., -3., 5., 1.],
[ 3., 1., 0., ..., -1., 6., 0.],
[-1., 3., -1., ..., -3., 1., 2.],
...,
[ 0., 0., 0., ..., -1., 0., 0.],
[ 0., -1., 2., ..., 0., 0., 1.],
[ 0., 0., 1., ..., 0., -1., 1.]])
Metadata
Assignees
Labels
No labels