Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,25 +1,35 @@
all: build/glib_hash_table build/stl_unordered_map build/boost_unordered_map build/google_sparse_hash_map build/google_dense_hash_map build/qt_qhash build/python_dict build/ruby_hash

build/glib_hash_table: src/glib_hash_table.c Makefile src/template.c
if [ ! -d build ]; then mkdir build; fi
gcc -ggdb -O2 -lm `pkg-config --cflags --libs glib-2.0` src/glib_hash_table.c -o build/glib_hash_table

build/stl_unordered_map: src/stl_unordered_map.cc Makefile src/template.c
if [ ! -d build ]; then mkdir build; fi
g++ -O2 -lm src/stl_unordered_map.cc -o build/stl_unordered_map -std=c++0x

build/boost_unordered_map: src/boost_unordered_map.cc Makefile src/template.c
if [ ! -d build ]; then mkdir build; fi
g++ -O2 -lm src/boost_unordered_map.cc -o build/boost_unordered_map

build/google_sparse_hash_map: src/google_sparse_hash_map.cc Makefile src/template.c
if [ ! -d build ]; then mkdir build; fi
g++ -O2 -lm src/google_sparse_hash_map.cc -o build/google_sparse_hash_map

build/google_dense_hash_map: src/google_dense_hash_map.cc Makefile src/template.c
if [ ! -d build ]; then mkdir build; fi
g++ -O2 -lm src/google_dense_hash_map.cc -o build/google_dense_hash_map

build/qt_qhash: src/qt_qhash.cc Makefile src/template.c
g++ -O2 -lm `pkg-config --cflags --libs QtCore` src/qt_qhash.cc -o build/qt_qhash
if [ ! -d build ]; then mkdir build; fi
# Replaced this because it didn't compile
# g++ -O2 -lm `pkg-config --cflags --libs QtCore` src/qt_qhash.cc -o build/qt_qhash
g++ -O2 -lm -fPIC -I/usr/include/qt5 -I/usr/include/qt5/QtCore -L /usr/lib/x86_64-linux-gnu -lQt5Core src/qt_qhash.cc -o build/qt_qhash

build/python_dict: src/python_dict.c Makefile src/template.c
gcc -O2 -lm -I/usr/include/python2.6 -lpython2.6 src/python_dict.c -o build/python_dict
if [ ! -d build ]; then mkdir build; fi
gcc -O2 -lm -I/usr/include/python2.7 -lpython2.7 src/python_dict.c -o build/python_dict

build/ruby_hash: src/ruby_hash.c Makefile src/template.c
gcc -O2 -lm -I/usr/include/ruby-1.9.0 -I /usr/include/ruby-1.9.0/x86_64-linux -lruby1.9 src/ruby_hash.c -o build/ruby_hash
if [ ! -d build ]; then mkdir build; fi
gcc -O2 -lm -I/usr/include/ruby-1.9.1 -I /usr/include/ruby-1.9.1/x86_64-linux -lruby-1.9.1 src/ruby_hash.c -o build/ruby_hash
38 changes: 0 additions & 38 deletions README

This file was deleted.

61 changes: 61 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Hash Table Shootout

A series of benchmarks that run against several hash table implementations,
written by Nick Welch in 2010. Updated in 2015 by Jim Belton.

## Release Notes
### 2015-11-26
* Fixed a bug whereby all C++ implementations where hashing pointer values rather than strings
* Vastly sped up benchmarks from hours to minutes
* Simplified standard output from benchmarks
* Added `kjv-bench.py` which benchmarks each hash table implementation by counting the unique words in the King James version of the bible.

## How to Run the Benchmarks

First, some prerequisites are:

* make (~3.81)
* gcc/g++ (~4.8.2)
* python (hard-coded for 2.7 -- edit Makefile to use another version)
* ruby (hard-coded for 1.9.1 -- edit Makefile to use another version)

Install the hash libraries:

1. Install glib by running: `sudo apt-get install libglib2.0-dev`
2. Install boost by running: `sudo apt-get install libboost-all-dev`
3. Install google sparsehash: `sudo apt-get install libsparsehash-dev`
4. Install qt dev by running: `sudo apt-get install qtbase5-dev`
5. Install python dev; run: `sudo apt-get install python-dev`
6. Install ruby dev; run: `sudo apt-get install ruby-dev`

Now, run:

```
$ make
$ python bench.py # Note: This step takes many minutes.
$ python make_chart_data.py < output | python make_html.py
$ python kjv-bench.py # Runs the new KJV benchmark against all the implementations (fast)
```

Your charts are now in charts.html.

## Tweaks and Tips

You can tweak some of the values in bench.py to make it run faster at the
expense of less granular data, and you might need to tweak some of the *tickSize*
settings in `charts-template.html`.

To run the benchmark at the highest priority possible, do this:

```
$ sudo nice -n-20 ionice -c1 -n0 sudo -u $USER python bench.py
```

You might also want to disable any swap files/partitions so that swapping
doesn't influence performance. The programs will die if they try to
allocate too much memory.

## Copyright Information

Originally written by Nick Welch in 2010. Updated and enhanced by Jim Belton in 2015.
This version is copyright by Jim Belton and licensed under the [MIT License](https://opensource.org/licenses/MIT)
92 changes: 48 additions & 44 deletions bench.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,22 @@
import sys, os, subprocess, signal

programs = [
'glib_hash_table',
'stl_unordered_map',
'boost_unordered_map',
'google_sparse_hash_map',
'google_dense_hash_map',
'qt_qhash',
'python_dict',
'ruby_hash',
]

minkeys = 2*1000*1000
maxkeys = 40*1000*1000
interval = 2*1000*1000
import os
import re
import signal
import subprocess
import sys

best_out_of = 2
programs = os.listdir("build")
pattern = re.compile(r'(\d+(?:\.\d+)?) (\d+)')
maxkeys = 40*1000*1000
interval = 2*1000*1000

# for the final run, use this:
#minkeys = 2*1000*1000
#maxkeys = 40*1000*1000
#interval = 2*1000*1000
#best_out_of = 3
# and use nice/ionice
# and shut down to the console
# and swapoff any swap files/partitions
# and swapoff any swap files/partitions`

outfile = open('output', 'w')

Expand All @@ -33,37 +26,48 @@
benchtypes = ('sequential', 'random', 'delete', 'sequentialstring', 'randomstring', 'deletestring')

for benchtype in benchtypes:
nkeys = minkeys
while nkeys <= maxkeys:
for program in programs:
fastest_attempt = 1000000
fastest_attempt_data = ''
for program in programs:
fastest_attempt = []

for attempt in range(best_out_of):
points = []
proc = subprocess.Popen(['./build/' + program, benchtype, str(maxkeys), str(interval)], stdout=subprocess.PIPE)

for size in range(interval, maxkeys + 1, interval):
# wait for the benchmark to output a time and amount of data memory used
line = proc.stdout.readline()

if not line:
sys.stderr.write("%s: %s %s failed to output all results\n" % (__file__, program, benchtype))
break

for attempt in range(best_out_of):
proc = subprocess.Popen(['./build/'+program, str(nkeys), benchtype], stdout=subprocess.PIPE)
match = pattern.match(line)

# wait for the program to fill up memory and spit out its "ready" message
try:
runtime = float(proc.stdout.readline().strip())
except:
runtime = 0
if not match:
sys.stderr.write("%s: %s %s output did not contain time and memory: %s" % (__file__, program, benchtype, line))
break

ps_proc = subprocess.Popen(['ps up %d | tail -n1' % proc.pid], shell=True, stdout=subprocess.PIPE)
nbytes = int(ps_proc.stdout.read().split()[4]) * 1024
ps_proc.wait()
points.append((float(match.group(1)), int(match.group(2))))

os.kill(proc.pid, signal.SIGKILL)
proc.wait()
# Shutdown the benchmark if needed
ps_proc = subprocess.Popen(['ps up %d | tail -n1' % proc.pid], shell=True, stdout=subprocess.PIPE)
nbytes = int(ps_proc.stdout.read().split()[4]) * 1024
ps_proc.wait()
os.kill(proc.pid, signal.SIGKILL)
proc.wait()

if nbytes and runtime: # otherwise it crashed
line = ','.join(map(str, [benchtype, nkeys, program, nbytes, "%0.6f" % runtime]))
# If there is output and at least as much as any previous attempt
if len(points) > 0 and len(points) >= len(fastest_attempt):
if len(points) > len(fastest_attempt) or points[len(points) - 1][0] < fastest_attempt[len(fastest_attempt) - 1][0]:
fastest_attempt = points

if runtime < fastest_attempt:
fastest_attempt = runtime
fastest_attempt_data = line
if len(fastest_attempt) > 0:
for i in range(len(fastest_attempt)):
size = (i + 1) * interval
outfile.write("%s,%d,%s,%d,%0.6f\n" % (benchtype, size, program, fastest_attempt[i][1], fastest_attempt[i][0]))

if fastest_attempt != 1000000:
print >> outfile, fastest_attempt_data
print fastest_attempt_data
print "%s %s: %d keys, %fs, %d Mbytes" % (program, benchtype, size, fastest_attempt[i][0],
fastest_attempt[i][1] / 1048576)

nkeys += interval
else:
print "No run of %s %s succeeded" % (program, benchtype)
Loading