Skip to content

Commit bf6785a

Browse files
author
Mark Needham
committed
pull in explanations as well
1 parent 9b76211 commit bf6785a

15 files changed

+38893
-169
lines changed

generate_notebook.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,18 @@ def find_tag(file, tag):
3636
sys.exit(1)
3737

3838
algorithm_name = sys.argv[1]
39-
algorithm_description = find_tag(sys.argv[2], "introduction")
39+
algorithm_file = sys.argv[2]
4040
cypher_file = sys.argv[3]
4141

42+
algorithm_description = find_tag(algorithm_file, "introduction")
43+
4244
stream_graph_tag = "stream-sample-graph"
4345
if len(sys.argv) >= 5:
4446
stream_graph_tag = sys.argv[4]
4547

48+
explanation_tag = "stream-sample-graph-explanation"
49+
if len(sys.argv) >= 6:
50+
explanation_tag = sys.argv[5]
4651

4752
heading_text = """\
4853
# {0}
@@ -99,6 +104,8 @@ def find_tag(file, tag):
99104
100105
df''' % streaming_query_content
101106

107+
streaming_graph_explanation_text = find_tag(algorithm_file, explanation_tag)
108+
102109
nb = nbf.v4.new_notebook()
103110
nb['cells'] = [nbf.v4.new_markdown_cell(heading_text),
104111
nbf.v4.new_code_cell(imports),
@@ -107,7 +114,8 @@ def find_tag(file, tag):
107114
nbf.v4.new_markdown_cell(create_graph_text),
108115
nbf.v4.new_code_cell(create_graph),
109116
nbf.v4.new_markdown_cell(streaming_graph_text),
110-
nbf.v4.new_code_cell(run_algorithm)]
117+
nbf.v4.new_code_cell(run_algorithm),
118+
nbf.v4.new_markdown_cell(streaming_graph_explanation_text)]
111119

112120
output_file = 'notebooks/{0}.ipynb'.format(algorithm_name.replace(" ", ""))
113121

generate_notebooks.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,15 @@ python generate_notebook.py \
2525
"Unweighted Connected Components" \
2626
"https://github.com/neo4j-contrib/neo4j-graph-algorithms/raw/3.2/doc/asciidoc/connected-components.adoc" \
2727
"https://github.com/neo4j-contrib/neo4j-graph-algorithms/raw/3.2/doc/asciidoc/scripts/connected-components.cypher" \
28-
"unweighted-stream-sample-graph"
28+
"unweighted-stream-sample-graph" \
29+
"unweighted-stream-sample-graph-explanation"
2930

3031
python generate_notebook.py \
3132
"Weighted Connected Components" \
3233
"https://github.com/neo4j-contrib/neo4j-graph-algorithms/raw/3.2/doc/asciidoc/connected-components.adoc" \
3334
"https://github.com/neo4j-contrib/neo4j-graph-algorithms/raw/3.2/doc/asciidoc/scripts/connected-components.cypher" \
34-
"weighted-stream-sample-graph"
35+
"weighted-stream-sample-graph" \
36+
"weighted-stream-sample-graph-explanation"
3537

3638
python generate_notebook.py \
3739
"Strongly Connected Components" \

notebooks/AllPairsShortestPath.ipynb

Lines changed: 71 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -124,93 +124,93 @@
124124
" <tbody>\n",
125125
" <tr>\n",
126126
" <th>0</th>\n",
127-
" <td>F</td>\n",
128-
" <td>F</td>\n",
127+
" <td>B</td>\n",
128+
" <td>B</td>\n",
129129
" <td>0.0</td>\n",
130130
" </tr>\n",
131131
" <tr>\n",
132132
" <th>1</th>\n",
133+
" <td>B</td>\n",
133134
" <td>D</td>\n",
134-
" <td>F</td>\n",
135-
" <td>50.0</td>\n",
135+
" <td>40.0</td>\n",
136136
" </tr>\n",
137137
" <tr>\n",
138138
" <th>2</th>\n",
139139
" <td>B</td>\n",
140-
" <td>F</td>\n",
141-
" <td>90.0</td>\n",
140+
" <td>E</td>\n",
141+
" <td>70.0</td>\n",
142142
" </tr>\n",
143143
" <tr>\n",
144144
" <th>3</th>\n",
145-
" <td>D</td>\n",
146-
" <td>D</td>\n",
147-
" <td>0.0</td>\n",
145+
" <td>B</td>\n",
146+
" <td>F</td>\n",
147+
" <td>110.0</td>\n",
148148
" </tr>\n",
149149
" <tr>\n",
150150
" <th>4</th>\n",
151-
" <td>B</td>\n",
152-
" <td>B</td>\n",
151+
" <td>A</td>\n",
152+
" <td>A</td>\n",
153153
" <td>0.0</td>\n",
154154
" </tr>\n",
155155
" <tr>\n",
156156
" <th>5</th>\n",
157-
" <td>D</td>\n",
158-
" <td>E</td>\n",
159-
" <td>30.0</td>\n",
157+
" <td>A</td>\n",
158+
" <td>B</td>\n",
159+
" <td>50.0</td>\n",
160160
" </tr>\n",
161161
" <tr>\n",
162162
" <th>6</th>\n",
163-
" <td>B</td>\n",
164-
" <td>D</td>\n",
165-
" <td>40.0</td>\n",
163+
" <td>A</td>\n",
164+
" <td>C</td>\n",
165+
" <td>50.0</td>\n",
166166
" </tr>\n",
167167
" <tr>\n",
168168
" <th>7</th>\n",
169-
" <td>B</td>\n",
170-
" <td>E</td>\n",
171-
" <td>70.0</td>\n",
169+
" <td>A</td>\n",
170+
" <td>D</td>\n",
171+
" <td>90.0</td>\n",
172172
" </tr>\n",
173173
" <tr>\n",
174174
" <th>8</th>\n",
175175
" <td>A</td>\n",
176-
" <td>A</td>\n",
177-
" <td>0.0</td>\n",
176+
" <td>E</td>\n",
177+
" <td>120.0</td>\n",
178178
" </tr>\n",
179179
" <tr>\n",
180180
" <th>9</th>\n",
181181
" <td>A</td>\n",
182182
" <td>F</td>\n",
183-
" <td>100.0</td>\n",
183+
" <td>160.0</td>\n",
184184
" </tr>\n",
185185
" <tr>\n",
186186
" <th>10</th>\n",
187-
" <td>A</td>\n",
188-
" <td>B</td>\n",
189-
" <td>50.0</td>\n",
187+
" <td>D</td>\n",
188+
" <td>D</td>\n",
189+
" <td>0.0</td>\n",
190190
" </tr>\n",
191191
" <tr>\n",
192192
" <th>11</th>\n",
193-
" <td>A</td>\n",
194-
" <td>C</td>\n",
195-
" <td>50.0</td>\n",
193+
" <td>D</td>\n",
194+
" <td>E</td>\n",
195+
" <td>30.0</td>\n",
196196
" </tr>\n",
197197
" <tr>\n",
198198
" <th>12</th>\n",
199-
" <td>A</td>\n",
200199
" <td>D</td>\n",
201-
" <td>50.0</td>\n",
200+
" <td>F</td>\n",
201+
" <td>70.0</td>\n",
202202
" </tr>\n",
203203
" <tr>\n",
204204
" <th>13</th>\n",
205-
" <td>A</td>\n",
206-
" <td>E</td>\n",
207-
" <td>80.0</td>\n",
205+
" <td>C</td>\n",
206+
" <td>C</td>\n",
207+
" <td>0.0</td>\n",
208208
" </tr>\n",
209209
" <tr>\n",
210210
" <th>14</th>\n",
211-
" <td>E</td>\n",
212-
" <td>F</td>\n",
213-
" <td>20.0</td>\n",
211+
" <td>C</td>\n",
212+
" <td>D</td>\n",
213+
" <td>40.0</td>\n",
214214
" </tr>\n",
215215
" <tr>\n",
216216
" <th>15</th>\n",
@@ -221,53 +221,53 @@
221221
" <tr>\n",
222222
" <th>16</th>\n",
223223
" <td>C</td>\n",
224-
" <td>F</td>\n",
225-
" <td>90.0</td>\n",
224+
" <td>E</td>\n",
225+
" <td>70.0</td>\n",
226226
" </tr>\n",
227227
" <tr>\n",
228228
" <th>17</th>\n",
229-
" <td>C</td>\n",
230-
" <td>C</td>\n",
231-
" <td>0.0</td>\n",
229+
" <td>E</td>\n",
230+
" <td>F</td>\n",
231+
" <td>40.0</td>\n",
232232
" </tr>\n",
233233
" <tr>\n",
234234
" <th>18</th>\n",
235235
" <td>C</td>\n",
236-
" <td>D</td>\n",
237-
" <td>40.0</td>\n",
236+
" <td>F</td>\n",
237+
" <td>110.0</td>\n",
238238
" </tr>\n",
239239
" <tr>\n",
240240
" <th>19</th>\n",
241-
" <td>C</td>\n",
242-
" <td>E</td>\n",
243-
" <td>70.0</td>\n",
241+
" <td>F</td>\n",
242+
" <td>F</td>\n",
243+
" <td>0.0</td>\n",
244244
" </tr>\n",
245245
" </tbody>\n",
246246
"</table>\n",
247247
"</div>"
248248
],
249249
"text/plain": [
250250
" source target distance\n",
251-
"0 F F 0.0\n",
252-
"1 D F 50.0\n",
253-
"2 B F 90.0\n",
254-
"3 D D 0.0\n",
255-
"4 B B 0.0\n",
256-
"5 D E 30.0\n",
257-
"6 B D 40.0\n",
258-
"7 B E 70.0\n",
259-
"8 A A 0.0\n",
260-
"9 A F 100.0\n",
261-
"10 A B 50.0\n",
262-
"11 A C 50.0\n",
263-
"12 A D 50.0\n",
264-
"13 A E 80.0\n",
265-
"14 E F 20.0\n",
251+
"0 B B 0.0\n",
252+
"1 B D 40.0\n",
253+
"2 B E 70.0\n",
254+
"3 B F 110.0\n",
255+
"4 A A 0.0\n",
256+
"5 A B 50.0\n",
257+
"6 A C 50.0\n",
258+
"7 A D 90.0\n",
259+
"8 A E 120.0\n",
260+
"9 A F 160.0\n",
261+
"10 D D 0.0\n",
262+
"11 D E 30.0\n",
263+
"12 D F 70.0\n",
264+
"13 C C 0.0\n",
265+
"14 C D 40.0\n",
266266
"15 E E 0.0\n",
267-
"16 C F 90.0\n",
268-
"17 C C 0.0\n",
269-
"18 C D 40.0\n",
270-
"19 C E 70.0"
267+
"16 C E 70.0\n",
268+
"17 E F 40.0\n",
269+
"18 C F 110.0\n",
270+
"19 F F 0.0"
271271
]
272272
},
273273
"execution_count": 4,
@@ -295,6 +295,11 @@
295295
"\n",
296296
"df"
297297
]
298+
},
299+
{
300+
"cell_type": "markdown",
301+
"metadata": {},
302+
"source": []
298303
}
299304
],
300305
"metadata": {},

notebooks/BetweennessCentrality.ipynb

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
"metadata": {},
66
"source": [
77
"# Betweenness Centrality\n",
8-
"_Betweenness Centrality_ is a measure of centrality in a graph based on calculating geodesic (shortest) paths between nodes.\n",
9-
"There is at least one shortest path between every pair of nodes in a connected graph.\n",
10-
"The shortest path can be based on the number of relationships that the path passes through in an unweighted network or the sum of the weights of the relationships in a weighted network.\n",
8+
"_Betweenness Centrality_ is a way of detecting the amount of influence a node has over the flow of information in a graph.\n",
119
"\n",
12-
"The betweenness centrality for each node is the number of these shortest paths that pass through that node.\n",
13-
"The nodes that most frequently lie on these shortest paths are will have a higher betweenness centrality score.\n",
10+
"image::../images/betweenness_centrality.png[]\n",
11+
"\n",
12+
"It is often used to find nodes that serve as a bridge from one part of a graph to another.\n",
13+
"In the above example Alice is the main connection in the graph.\n",
14+
"If Alice is removed all connections in the graph would be cut off.\n",
15+
"This makes Alice \"important\" because she ensures that no nodes are isolated.\n",
1416
"\n",
1517
"First we'll import the Neo4j driver and Pandas libraries:\n"
1618
]
@@ -135,22 +137,22 @@
135137
" </tr>\n",
136138
" <tr>\n",
137139
" <th>2</th>\n",
138-
" <td>Mark</td>\n",
140+
" <td>Bridget</td>\n",
139141
" <td>0.0</td>\n",
140142
" </tr>\n",
141143
" <tr>\n",
142144
" <th>3</th>\n",
143-
" <td>Michael</td>\n",
145+
" <td>Doug</td>\n",
144146
" <td>0.0</td>\n",
145147
" </tr>\n",
146148
" <tr>\n",
147149
" <th>4</th>\n",
148-
" <td>Bridget</td>\n",
150+
" <td>Mark</td>\n",
149151
" <td>0.0</td>\n",
150152
" </tr>\n",
151153
" <tr>\n",
152154
" <th>5</th>\n",
153-
" <td>Doug</td>\n",
155+
" <td>Michael</td>\n",
154156
" <td>0.0</td>\n",
155157
" </tr>\n",
156158
" </tbody>\n",
@@ -161,10 +163,10 @@
161163
" user centrality\n",
162164
"0 Alice 4.0\n",
163165
"1 Charles 2.0\n",
164-
"2 Mark 0.0\n",
165-
"3 Michael 0.0\n",
166-
"4 Bridget 0.0\n",
167-
"5 Doug 0.0"
166+
"2 Bridget 0.0\n",
167+
"3 Doug 0.0\n",
168+
"4 Mark 0.0\n",
169+
"5 Michael 0.0"
168170
]
169171
},
170172
"execution_count": 4,
@@ -190,6 +192,14 @@
190192
"\n",
191193
"df"
192194
]
195+
},
196+
{
197+
"cell_type": "markdown",
198+
"metadata": {},
199+
"source": [
200+
"We can see that Alice is the main broker in this network and Charles is a minor broker.\n",
201+
"The others don't have any influence because all the shortest paths between pairs of people go via Alice or Charles."
202+
]
193203
}
194204
],
195205
"metadata": {},

notebooks/ClosenessCentrality.ipynb

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
"metadata": {},
66
"source": [
77
"# Closeness Centrality\n",
8-
"The _Closeness Centrality_ of a node measures the distance from that node to all other nodes.\n",
8+
"_Closeness Centrality_ is a way of detecting nodes that are able to spread information very efficiently through a graph.\n",
9+
"\n",
10+
"The _Closeness Centrality_ of a node measures its average distance to all other nodes.\n",
911
"Nodes with a high closeness score have the shortest distances to all other nodes.\n",
10-
"The premise of this algorithm is that nodes with short distance to other nodes can spread information very efficiently through the network.\n",
11-
"This is important for the availability of knowledge and resources.\n",
1212
"\n",
1313
"First we'll import the Neo4j driver and Pandas libraries:\n"
1414
]
@@ -184,6 +184,15 @@
184184
"\n",
185185
"df"
186186
]
187+
},
188+
{
189+
"cell_type": "markdown",
190+
"metadata": {},
191+
"source": [
192+
"\"C\" is the best connected node in this graph although \"B\" and \"D\" aren't far behind.\n",
193+
"\"A\" and \"E\" don't have close ties to many other nodes so their scores are lower.\n",
194+
"A score of 1 would indicate that a node has a direct connection to all other nodes."
195+
]
187196
}
188197
],
189198
"metadata": {},

0 commit comments

Comments
 (0)