Skip to content

Commit c035ca9

Browse files
committed
Neo4J: docker-compose and scripts
1 parent fc02ac1 commit c035ca9

7 files changed

Lines changed: 290626 additions & 0 deletions

File tree

neo4j/docker-compose.yml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Version from: June 2020
2+
3+
version: "3.5"
4+
services:
5+
scripts:
6+
build: scripts
7+
depends_on:
8+
- coordinator
9+
environment:
10+
- COORDINATOR_NODE=coordinator
11+
networks:
12+
- cluster
13+
14+
coordinator: # first node
15+
image: neo4j:3.5
16+
ports:
17+
- 7474:7474 # HTTP
18+
- 7687:7687 # Bolt
19+
environment:
20+
- NEO4J_AUTH=none
21+
networks:
22+
- cluster
23+
volumes:
24+
- data:/data # persistent; remove for ephemeral tasks
25+
26+
27+
# The Open-Source version of Neo4j does *not* support multiple nodes;
28+
# hence, we won't support clusters in this setup.
29+
30+
31+
networks:
32+
cluster:
33+
name: cluster
34+
35+
volumes:
36+
data: {}

neo4j/scripts/Dockerfile

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
FROM ubuntu:20.04
2+
3+
ENV DEBIAN_FRONTEND=noninteractive
4+
RUN apt-get update \
5+
&& apt-get -y install \
6+
httpie ruby ruby-dev \
7+
jq \
8+
build-essential \
9+
net-tools dnsutils # network debugging
10+
11+
RUN gem install faraday
12+
13+
COPY . /scripts
14+
WORKDIR /scripts

neo4j/scripts/aggregate.sh

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/bin/bash
2+
3+
export DB_HOST=http://$COORDINATOR_NODE:7474
4+
5+
## Example queries
6+
echo "Available relationship types:"
7+
http GET "$DB_HOST/db/data/relationship/types"
8+
echo "Some node:"
9+
http GET "$DB_HOST/db/data/node/20"
10+
http GET "$DB_HOST/db/data/node/20/properties"
11+
http GET "$DB_HOST/db/data/node/20/relationships/all"
12+
13+
## Run aggregations
14+
echo "Aggregations:"
15+
CYPHER="db/data/cypher"
16+
http POST "$DB_HOST/$CYPHER" query="MATCH (n) RETURN DISTINCT labels(n), count(n)"
17+
http POST "$DB_HOST/$CYPHER" query="MATCH (p:Actor)-[:ACTED_IN]->(m:Movie {name: 'The Matrix'}) RETURN p" | jq ".data[][] | {label: .metadata.labels, name: .data.name}" -c
18+
19+
echo "Limited 'Kevin Bacon' number (here up til 3 degrees from Kevin Bacon = 6 hops)"
20+
http POST "$DB_HOST/$CYPHER" query='MATCH (Actor {name:"Kevin Bacon"})-[:ACTED_IN*1..6]-(other:Actor) return COUNT(DISTINCT other)'
21+
22+
echo "A*:"
23+
http POST "$DB_HOST/$CYPHER" query='MATCH (bacon:Actor {name: "Kevin Bacon"}), (neo:Actor {name: "Keanu Reeves"}), path=shortestPath((bacon)-[:ACTED_IN*]-(neo)) RETURN length(path)' # 2 hops = same movie!
24+
TOTAL=$(http POST "$DB_HOST/$CYPHER" query='MATCH (p:Actor) RETURN COUNT(DISTINCT p)' | jq ".data[][]")
25+
echo "Reverse A* up to 2 hops (much slower):"
26+
http POST "$DB_HOST/$CYPHER" query="MATCH paths=shortestPath((bacon:Actor {name: \"Kevin Bacon\"})-[:ACTED_IN*1..4]-(other:Actor)) WHERE bacon <> other RETURN COUNT(paths) / $TOTAL.0" # 29% within 2 hops
27+
echo "Any connection to Kevin Bacon (rest is islands; runtime ~30s):"
28+
time http POST "$DB_HOST/$CYPHER" query="MATCH paths=shortestPath((bacon:Actor {name: \"Kevin Bacon\"})-[:ACTED_IN*]-(other:Actor)) WHERE bacon <> other RETURN COUNT(paths) / $TOTAL.0" # 89.847% connected at all (89.838% connected within 6 degrees)

neo4j/scripts/create_users.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"statements": [
3+
{
4+
"statement": "CREATE (mr:Person {name: 'Martijn', from: 'Netherlands', occupation: 'software developer'})"
5+
},
6+
{
7+
"statement": "MATCH (mr:Person) WHERE mr.name = 'Martijn' CREATE (on:Person {name: 'Onno', from: 'Netherlands'}), (mr)-[:KNOWS {since: 2018}]->(on), (on)-[:KNOWS]->(mr)"
8+
},
9+
{
10+
"statement": "MATCH (mr:Person {name: 'Martijn'}), (fe:Cat {name: 'Felix'}) CREATE (mr)-[:living_together]->(fe)"
11+
}
12+
]
13+
}
14+

neo4j/scripts/importer.rb

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Notes by turiphro:
2+
# - The tsv dataset from the first edition wasn't available anymore;
3+
# the neo4j dump is outdated and hard to import from a separate Docker container.
4+
# Therefore, I extracted an old version of the dataset from the 1GB+ archive (6GB+ data) at
5+
# https://archive.org/download/freebase-data-dump-2010-07-16/freebase-datadump-tsv.tar.bz2
6+
# - Importer script inspired by importer.rb from 7DB in 7wks 1st edition,
7+
# but refactored to send Cypher to the node4j server (since the REST API was removed in 4.0+).
8+
9+
require "faraday"
10+
11+
12+
REST_URL = ENV["DB_HOST"]
13+
REST_CYPHER_ENDPOINT = "db/data/cypher"
14+
15+
puts REST_URL
16+
conn = Faraday.new(:url => REST_URL) do |builder|
17+
builder.adapter :net_http
18+
end
19+
20+
21+
count = 0
22+
File.open(ARGV[0], :encoding => 'UTF-8').each do |line|
23+
_, _, actor, movie = line.split("\t")
24+
next if actor.empty? || movie.empty?
25+
26+
# add actor node, movie node, and relationship
27+
cypher_query =
28+
"MERGE (actor:Actor {name:\"#{actor}\"})
29+
MERGE (movie:Movie {name:\"#{movie}\"})
30+
MERGE (actor)-[:ACTED_IN]->(movie)"
31+
32+
conn.post(REST_CYPHER_ENDPOINT, "query=#{cypher_query}")
33+
34+
puts " #{count} relationships loaded" if (count += 1) % 1000 == 0
35+
end

0 commit comments

Comments
 (0)