-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcomputeBipartiteClusteringCoefficients.py
94 lines (71 loc) · 3.04 KB
/
computeBipartiteClusteringCoefficients.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/env python
##############################################################################
#
# computeBipartiteClusteringCoefficients.py - use NetworkX to compute bipartite c.c.
#
#
# File: computeBipartiteClusteringCoefficients.py
# Author: Alex Stivala
# Created: May 2022
#
##############################################################################
"""
Load bipartite netowrk in Pajek bipartite network format
and compute Bipartite clustering coefficients. Using NetworkX
as igraph does not have bipartite functions (as of igraph 1.2.10) [using R]
and tnet [also R] is too slow and/or runs out of memory trying to compute them
while Python and NetworkX on the same network takes less than a second (and no noticable
memory usagE).
Using NetworkX version 2.6.3 and Python version 3.9.10
For NetworkX see https://networkx.org/
"""
import sys
import networkx as nx
from networkx.algorithms import bipartite
#-----------------------------------------------------------------------------
#
# Main
#
#-----------------------------------------------------------------------------
def usage(progname):
"""
print usage msg and exit
"""
sys.stderr.write("usage: " + progname + " pajek_bipartite_net_filename\n")
sys.exit(1)
def main():
"""
See usage message in module header block
"""
if len(sys.argv) != 2:
usage(sys.argv[0])
pajek_filename = sys.argv[1]
# read_pajek() does not work for bipartite Pajek format so have to read and build
# graph from file here
# We assume the nodes are numbered 1..N (But renumber them from 0..N-1 for Python)
pajek_text = open(pajek_filename).readlines()
# first line e..g
# *vertices 16000 12000
# give total number of nodes and number of type A nodes (rest are type B)
firstline = pajek_text[0].rstrip().lower().split(' ')
if len(firstline) != 3 or firstline[0] != '*vertices':
sys.stderr.write('expecting *vertices num_nodes num_A_nodes as first line\n')
sys.exit(1)
num_nodes = int(firstline[1])
num_A_nodes = int(firstline[2])
G = nx.Graph()
G.add_nodes_from(range(0, num_A_nodes), bipartite = 0)
G.add_nodes_from(range(num_A_nodes, num_nodes), bipartite = 1)
# edges are after '*edges'
edges_line = [(i, text) for (i, text) in enumerate(pajek_text) if text.lower() == "*edges\n"][0][0]
for edgeline in pajek_text[(edges_line+1):]:
i, j = edgeline.rstrip().split(' ')
G.add_edge(int(i)-1, int(j)-1)
print (nx.info(G))
assert nx.is_bipartite(G)
#node set level not graph level: print ('Average bipartite clustering:', nx.bipartite.average_clustering(G))
#node level not graph level: print ('Latapy clustering:', nx.bipartite.latapy_clustering(G)
print ('Robins-Alexander bipartite clustering: ', bipartite.robins_alexander_clustering(G))
# node level not graph level: print ('Square clustering:', nx.square_clustering(G))
if __name__ == "__main__":
main()