Skip to content

Commit 092126e

Browse files
committed
MongoDB: docker-compose and scripts
1 parent ef6f80b commit 092126e

12 files changed

Lines changed: 100292 additions & 5 deletions

README.md

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,23 @@ This is a collection of ready-to-go experiments with databases.
55

66
Every database has a Hello World setup with
77
[Docker compose](https://docs.docker.com/compose/) to run on any OS,
8-
usually in a distributed (multi-host) fashion.
8+
usually in a distributed (multi-host) fashion. The hosts run in a local
9+
Docker network, which is probably the closest you can get to a simulation
10+
of an actual cluster of hosts on a local machine.
11+
912
Start any database with:
1013
`docker-compose up`
1114

1215
Some databases might need to scale to multiple nodes (`--scale node=N`).
1316

1417
All databases have some example scripts that try to highlight some of the
15-
strengths and peculiarities:
18+
strengths and peculiarities, and each database provides the following scripts:
1619

17-
- `docker-compose run scripts ./populate.sh`: (big) data generation, to have some data
20+
- `docker-compose run scripts ./populate.sh`: (big'ish) data generation, to have some data
1821
- `docker-compose run scripts ./aggregate.sh`: perform some exemplar aggregation on the data
1922

20-
Read the corresponding README for the database for specific instructions.
23+
Read the corresponding README's for the database for specific instructions.
2124

22-
This database lab was inspired by the "7 Databases in 7 Weeks" book, but will
25+
This database lab was inspired by the "7 Databases in 7 Weeks" book, but might
2326
grow in database scope over time. I tried to pin all Docker containers to
2427
specific versions; feel free to update or expand the examples where desired!

mongo/docker-compose.yml

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
# Based on:
2+
# https://github.com/Ameausoone/mongo-docker-compose/blob/fddaeac58b7df373b8f939d805ee7d270065fd6e/docker-compose.yml
3+
#
4+
# This is a pretty distributed setup:
5+
# /--- mongors1n1
6+
# clients --> mongos1 --x-> mongocfg1 -x---- mongors1n2
7+
# \ | |\--- mongors1n3
8+
# -x-> mongocfg2 -x
9+
# / | |/--- mongors2n1
10+
# other clts --> mongos2 --x-> mongocfg3 -x---- mongors2n2
11+
# \--- mongors2n3
12+
# In other words:
13+
# Clients connect to one of both mongos endpoints,
14+
# who simply forward requests to the current config master,
15+
# who forward requests to the current master in the right
16+
# shard (according to the config's partitioning metadata).
17+
# In each group, one server can go down without problems.
18+
19+
version: "2"
20+
services:
21+
22+
scripts:
23+
build: scripts
24+
depends_on:
25+
- mongos1 # could also connect to mongos2
26+
- mongos2
27+
environment:
28+
- COORDINATOR_NODE=mongos1
29+
- COORDINATOR_PORT=27017
30+
command: mongo --host mongos1
31+
32+
mongors1n1:
33+
image: mongo:3.4
34+
command: mongod --noprealloc --smallfiles --replSet mongors1 --shardsvr --nojournal --oplogSize 16 --noauth --rest
35+
environment:
36+
TERM: xterm
37+
expose:
38+
- 27018
39+
- 28018 # REST
40+
ports:
41+
- 29000:28018
42+
volumes:
43+
- /etc/localtime:/etc/localtime:ro
44+
- mongors1n1vol:/data/db
45+
46+
mongors1n2:
47+
image: mongo:3.4
48+
command: mongod --noprealloc --smallfiles --replSet mongors1 --shardsvr --nojournal --oplogSize 16 --noauth --rest
49+
environment:
50+
TERM: xterm
51+
expose:
52+
- 27018
53+
volumes:
54+
- /etc/localtime:/etc/localtime:ro
55+
- mongors1n2vol:/data/db
56+
57+
mongors1n3:
58+
image: mongo:3.4
59+
command: mongod --noprealloc --smallfiles --replSet mongors1 --shardsvr --nojournal --oplogSize 16 --noauth --rest
60+
environment:
61+
TERM: xterm
62+
expose:
63+
- 27018
64+
volumes:
65+
- /etc/localtime:/etc/localtime:ro
66+
- mongors1n3vol:/data/db
67+
68+
mongors2n1:
69+
image: mongo:3.4
70+
command: mongod --noprealloc --smallfiles --replSet mongors2 --shardsvr --nojournal --oplogSize 16 --noauth --rest
71+
environment:
72+
TERM: xterm
73+
expose:
74+
- 27018
75+
volumes:
76+
- /etc/localtime:/etc/localtime:ro
77+
- mongors2n1vol:/data/db
78+
79+
mongors2n2:
80+
image: mongo:3.4
81+
command: mongod --noprealloc --smallfiles --replSet mongors2 --shardsvr --nojournal --oplogSize 16 --noauth
82+
environment:
83+
TERM: xterm
84+
expose:
85+
- 27018
86+
volumes:
87+
- /etc/localtime:/etc/localtime:ro
88+
- mongors2n2vol:/data/db
89+
90+
mongors2n3:
91+
image: mongo:3.4
92+
command: mongod --noprealloc --smallfiles --replSet mongors2 --shardsvr --nojournal --oplogSize 16 --noauth
93+
environment:
94+
TERM: xterm
95+
expose:
96+
- 27018
97+
volumes:
98+
- /etc/localtime:/etc/localtime:ro
99+
- mongors2n3vol:/data/db
100+
101+
mongocfg1:
102+
image: mongo:3.4
103+
command: mongod --noprealloc --smallfiles --configsvr --replSet mongocfg1 --noauth
104+
environment:
105+
TERM: xterm
106+
expose:
107+
- 27019
108+
volumes:
109+
- /etc/localtime:/etc/localtime:ro
110+
- mongocfg1vol:/data/configdb
111+
112+
mongocfg2:
113+
image: mongo:3.4
114+
command: mongod --noprealloc --smallfiles --configsvr --replSet mongocfg1 --noauth
115+
environment:
116+
TERM: xterm
117+
expose:
118+
- 27019
119+
volumes:
120+
- /etc/localtime:/etc/localtime:ro
121+
- mongocfg2vol:/data/configdb
122+
123+
mongocfg3:
124+
image: mongo:3.4
125+
command: mongod --noprealloc --smallfiles --configsvr --replSet mongocfg1 --noauth
126+
environment:
127+
TERM: xterm
128+
expose:
129+
- 27019
130+
volumes:
131+
- /etc/localtime:/etc/localtime:ro
132+
- mongocfg3vol:/data/configdb
133+
134+
mongos1:
135+
image: mongo:3.4
136+
depends_on:
137+
- mongocfg1
138+
- mongocfg2
139+
- mongocfg3
140+
command: mongos --configdb mongocfg1/mongocfg1:27019,mongocfg2:27019,mongocfg3:27019 --port 27017 --noauth
141+
ports:
142+
- 27017:27017
143+
volumes:
144+
- /etc/localtime:/etc/localtime:ro
145+
146+
mongos2:
147+
image: mongo:3.4
148+
depends_on:
149+
- mongocfg1
150+
- mongocfg2
151+
- mongocfg3
152+
command: mongos --configdb mongocfg1/mongocfg1:27019,mongocfg2:27019,mongocfg3:27019 --port 27017 --noauth
153+
ports:
154+
- 27018:27017
155+
volumes:
156+
- /etc/localtime:/etc/localtime:ro
157+
158+
159+
volumes:
160+
mongors1n1vol:
161+
mongors1n2vol:
162+
mongors1n3vol:
163+
mongors2n1vol:
164+
mongors2n2vol:
165+
mongors2n3vol:
166+
mongocfg1vol:
167+
mongocfg2vol:
168+
mongocfg3vol:

mongo/scripts/Dockerfile

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
FROM mongo:3.4
2+
3+
RUN apt-get update \
4+
&& apt-get -y install \
5+
gcc g++ make \
6+
ruby ruby-dev
7+
8+
RUN gem install mongo
9+
10+
COPY . /scripts
11+
WORKDIR /scripts
12+

mongo/scripts/aggregate.js

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
// Aggregate on data in MongoDB database
2+
// Uses Mongo shell commands (not a standalone script)
3+
4+
use geography
5+
6+
7+
/* Simple queries */
8+
9+
print("--------------------------")
10+
print(".find(Amsterdam):")
11+
db.cities.find({name: "Amsterdam"})
12+
13+
amsterdam = db.cities.find({name: /^Amst[aoeu]rdam$/, country: "NL"})[0]
14+
print(".find(above amsterdam):")
15+
db.cities.find({'location.longitude': {$gt: amsterdam.location.longitude}}).count()
16+
17+
var range = {}
18+
range['$lt'] = 1e6
19+
range['$gt'] = 1e5
20+
print(".find(mid-size cities ^P):")
21+
db.cities.find({name: /^P/, population: range}, {name: 1, _id: 0})
22+
23+
print("--------------------------")
24+
print("Nested queries:")
25+
db.cities.find(
26+
{
27+
$or: [
28+
{famous_for: /light/, 'mayor.party': {$exists: true}},
29+
{name: /^Londonder/}
30+
],
31+
population: {$type: 'number'}
32+
},
33+
{name: 1, famous_for: 1, mayor: 1, population: 1, _id: 0}
34+
)
35+
36+
print("--------------------------")
37+
print("Updating")
38+
db.cities.update(
39+
{_id: amsterdam['_id']},
40+
{
41+
$set: {'age': 550},
42+
$inc: {'population': 1000},
43+
$push: {'famous_for': "Anne Frankhuis"}
44+
}
45+
)
46+
db.cities.findOne({_id: amsterdam._id})
47+
48+
print("--------------------------")
49+
print("Adding indices for speed improvements")
50+
51+
db.cities.createIndex({location: "2d"})
52+
db.cities.find({location: {$near: [45.52, -122.67]}}).limit(5)
53+
54+
db.phones.dropIndexes()
55+
// explain the plan, and include the results (executionStats)
56+
explained_before = db.phones.find({display: "+1 867-5309000"}).explain("executionStats")
57+
print(explained_before)
58+
db.phones.createIndex(
59+
{display: 1},
60+
{unique: true, dropDups: true}
61+
)
62+
db.phones.createIndex(
63+
{'components.area': 1, 'components.number': 1},
64+
{background: 1} // async
65+
)
66+
print(db.phones.getIndices())
67+
explained_after = db.phones.find({display: "+1 867-5309000"}).explain("executionStats")
68+
print("Before: " + explained_before.executionStats.executionTimeMillis + "ms")
69+
print("After: " + explained_after.executionStats.executionTimeMillis + "ms")
70+
71+
72+
/* MapReduce & Aggregation */
73+
74+
// Simple aggregation built-ins
75+
print("--------------------------")
76+
print("Numbers before 5550005")
77+
db.phones.distinct(
78+
'components.number', // field to return in array
79+
{'components.number': {$lt: 5550005}} // find
80+
)
81+
print("number of phones above 5599999 (slow, iterating on all)")
82+
const start_grp = new Date()
83+
db.phones.group({ // NOTE: officially doesn't work for sharded setups
84+
initial: {count: 0, max: 0}, // initial output
85+
reduce: function(phone, output) {
86+
// function to run on each item (item, ongoing_result)
87+
output.count++;
88+
output.max = Math.max(output.max, phone.components.number);
89+
},
90+
cond: {'components.number': {$gt: 5599999}}, // find
91+
key: {'components.area': true} // group by
92+
})
93+
print("Query (group built-in) took " + (new Date() - start_grp) + "ms")
94+
95+
// Aggregation framework
96+
// Same query as before, but much quicker (20x on this machine)
97+
const start_aggr = new Date()
98+
db.phones.aggregate([
99+
{$match: {'components.number': {$gt: 5599999}}},
100+
{$group: {
101+
_id: '$components.area',
102+
count: {$sum: 1},
103+
max: {$max: '$components.number'}
104+
}}
105+
])
106+
print("Query (aggregation framework) took " + (new Date() - start_aggr) + "ms")
107+
108+
// MapReduce framework
109+
const start_mr = new Date()
110+
load('map.js') // could also be saved server-side
111+
load('reduce.js')
112+
db.phones.mapReduce(
113+
map_count,
114+
reduce_count,
115+
{
116+
query: {'components.number': {$gt: 5599999}},
117+
out: {inline: 1}
118+
}
119+
).results
120+
print("Query (MapReduce framework) took " + (new Date() - start_mr) + "ms")

mongo/scripts/aggregate.rb

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
require 'mongo'
2+
3+
client = Mongo::Client.new(
4+
[ ENV['COORDINATOR_NODE'] + ":" + ENV['COORDINATOR_PORT'] ], :database => 'geography'
5+
)
6+
7+
db = client.database
8+
print(db.collection_names)
9+
10+
collection = client[:phones]
11+
collection.find({
12+
'components.number' => {'$gt' => 5599999},
13+
'display' => {'$exists' => true}
14+
}).limit(5).each do |doc|
15+
puts doc[:display]
16+
end
17+
18+
# For advanced aggregation/MapReduce, have a look at the aggregation.js script.

mongo/scripts/aggregate.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/bin/bash
2+
# Designed to be run inside container 'scripts'
3+
4+
# Run commands inside the Mongo client:
5+
echo "----------- MONGO SHELL -----------"
6+
mongo --host $COORDINATOR_NODE --port $COORDINATOR_PORT < aggregate.js
7+
8+
# Or connect from your favourite language:
9+
echo "----------- RUBY SCRIPT -----------"
10+
ruby aggregate.rb

mongo/scripts/initiate.sh

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#!/bin/bash
2+
# Based on:
3+
# https://github.com/singram/mongo-docker-compose/blob/master/initiate
4+
# This is pretty ugly, so I hope it's possible to write nice configs (or have
5+
# auto-discovery of hosts) at some point.
6+
7+
echo "This scripts is going to take a minute or so."
8+
echo "============================================="
9+
10+
sleep 5
11+
12+
for (( rs = 1; rs < 3; rs++ )); do
13+
echo "> Intializing replica ${rs} set (between mongorsNnM's)"
14+
command="rs.initiate(); sleep(1000); cfg = rs.conf(); cfg.members[0].host = \"mongors${rs}n1:27018\"; sleep(1000);rs.reconfig(cfg); rs.add(\"mongors${rs}n2:27018\"); rs.addArb(\"mongors${rs}n3:27018\"); rs.status();"
15+
echo ${command} | mongo --host mongors${rs}n1 --port 27018
16+
sleep 1
17+
done
18+
19+
sleep 10
20+
21+
echo "> Intializing replica set for mongoconfig (between mongocfgN's)"
22+
command="rs.initiate(); sleep(1000); cfg = rs.conf(); cfg.members[0].host = \"mongocfg1:27019\"; rs.reconfig(cfg); rs.add(\"mongocfg2:27019\"); rs.add(\"mongocfg3:27019\"); rs.status();"
23+
echo "${command}" | mongo --host mongocfg1 --port 27019
24+
25+
sleep 15
26+
27+
echo "> Adding shards to mongos endpoints (mongosS -> mongorsNnM)"
28+
echo "sh.addShard('mongors1/mongors1n1:27018,mongors1n2:27018'); sh.addShard('mongors2/mongors2n1:27018,mongors2n2:27018');sh.status()" | mongo --host mongos1 --port 27017
29+
echo "sh.addShard('mongors1/mongors1n1:27018,mongors1n2:27018'); sh.addShard('mongors2/mongors2n1:27018,mongors2n2:27018');sh.status()" | mongo --host mongos2 --port 27017
30+
31+
sleep 10

0 commit comments

Comments
 (0)