Skip to content

Commit 4c066ba

Browse files
author
Jonathan Rocher
committed
Added benchmarking script for pytables and numexpr.
1 parent 492f0cd commit 4c066ba

File tree

1 file changed

+176
-0
lines changed

1 file changed

+176
-0
lines changed
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
#######################################################################
2+
# This script compares the speed of the computation of a polynomial
3+
# for different (numpy.memmap and tables.Expr) out-of-memory paradigms.
4+
#
5+
# Author: Francesc Alted
6+
# Date: 2010-02-24
7+
#######################################################################
8+
9+
import os
10+
import sys
11+
from time import time
12+
import numpy as np
13+
import tables as tb
14+
import numexpr as ne
15+
16+
expr = ".25*x**3 + .75*x**2 - 1.5*x - 2" # the polynomial to compute
17+
N = 10*1000*1000 # the number of points to compute expression (80 MB)
18+
step = 100*1000 # perform calculation in slices of `step` elements
19+
dtype = np.dtype('f8') # the datatype
20+
21+
# Global variable for the x values for pure numpy & numexpr
22+
x = None
23+
24+
# *** The next variables do not need to be changed ***
25+
26+
# Filenames for numpy.memmap
27+
fprefix = "numpy.memmap" # the I/O file prefix
28+
mpfnames = [fprefix+"-x.bin", fprefix+"-r.bin"]
29+
30+
# Filename for tables.Expr
31+
h5fname = "tablesExpr.h5" # the I/O file
32+
33+
MB = 1024*1024. # a MegaByte
34+
35+
36+
def print_filesize(filename, clib=None, clevel=0):
37+
"""Print some statistics about file sizes."""
38+
39+
#os.system("sync") # make sure that all data has been flushed to disk
40+
if type(filename) is list:
41+
filesize_bytes = 0
42+
for fname in filename:
43+
filesize_bytes += os.stat(fname)[6]
44+
else:
45+
filesize_bytes = os.stat(filename)[6]
46+
filesize_MB = round(filesize_bytes / MB, 1)
47+
print "\t\tTotal file sizes: %d -- (%s MB)" % (filesize_bytes, filesize_MB),
48+
if clevel > 0:
49+
print "(using %s lvl%s)" % (clib, clevel)
50+
else:
51+
print
52+
53+
54+
def populate_x_numpy():
55+
"""Populate the values in x axis for numpy."""
56+
global x
57+
# Populate x in range [-1, 1]
58+
x = np.linspace(-1, 1, N)
59+
60+
61+
def populate_x_memmap():
62+
"""Populate the values in x axis for numpy.memmap."""
63+
# Create container for input
64+
x = np.memmap(mpfnames[0], dtype=dtype, mode="w+", shape=(N,))
65+
66+
# Populate x in range [-1, 1]
67+
for i in xrange(0, N, step):
68+
chunk = np.linspace((2*i-N)/float(N), (2*(i+step)-N)/float(N), step)
69+
x[i:i+step] = chunk
70+
del x # close x memmap
71+
72+
73+
def populate_x_tables(clib, clevel):
74+
"""Populate the values in x axis for pytables."""
75+
f = tb.openFile(h5fname, "w")
76+
77+
# Create container for input
78+
atom = tb.Atom.from_dtype(dtype)
79+
filters = tb.Filters(complib=clib, complevel=clevel)
80+
x = f.createCArray(f.root, "x", atom=atom, shape=(N,), filters=filters)
81+
82+
# Populate x in range [-1, 1]
83+
for i in xrange(0, N, step):
84+
chunk = np.linspace((2*i-N)/float(N), (2*(i+step)-N)/float(N), step)
85+
x[i:i+step] = chunk
86+
f.close()
87+
88+
89+
def compute_numpy():
90+
"""Compute the polynomial with pure numpy."""
91+
y = eval(expr)
92+
93+
94+
def compute_numexpr():
95+
"""Compute the polynomial with pure numexpr."""
96+
y = ne.evaluate(expr)
97+
98+
99+
def compute_memmap():
100+
"""Compute the polynomial with numpy.memmap."""
101+
# Reopen inputs in read-only mode
102+
x = np.memmap(mpfnames[0], dtype=dtype, mode='r', shape=(N,))
103+
# Create the array output
104+
r = np.memmap(mpfnames[1], dtype=dtype, mode="w+", shape=(N,))
105+
106+
# Do the computation by chunks and store in output
107+
r[:] = eval(expr) # where is stored the result?
108+
#r = eval(expr) # result is stored in-memory
109+
110+
del x, r # close x and r memmap arrays
111+
print_filesize(mpfnames)
112+
113+
114+
def compute_tables(clib, clevel):
115+
"""Compute the polynomial with tables.Expr."""
116+
f = tb.openFile(h5fname, "a")
117+
x = f.root.x # get the x input
118+
# Create container for output
119+
atom = tb.Atom.from_dtype(dtype)
120+
filters = tb.Filters(complib=clib, complevel=clevel)
121+
r = f.createCArray(f.root, "r", atom=atom, shape=(N,), filters=filters)
122+
123+
# Do the actual computation and store in output
124+
ex = tb.Expr(expr) # parse the expression
125+
ex.setOutput(r) # where is stored the result?
126+
# when commented out, the result goes in-memory
127+
ex.eval() # evaluate!
128+
129+
f.close()
130+
print_filesize(h5fname, clib, clevel)
131+
132+
133+
if __name__ == '__main__':
134+
135+
tb.print_versions()
136+
137+
print "Total size for datasets:", round(2*N*dtype.itemsize/MB, 1), "MB"
138+
139+
# Get the compression libraries supported
140+
supported_clibs = [clib for clib in ["blosc"] # ("zlib", "lzo", "bzip2", "blosc")
141+
if tb.whichLibVersion(clib)]
142+
143+
# Initialization code
144+
for what in ["numpy", "numexpr", "numpy.memmap"]:
145+
print "Populating x using %s with %d points..." % (what, N)
146+
t0 = time()
147+
if what == "numpy":
148+
populate_x_numpy()
149+
compute = compute_numpy
150+
elif what == "numexpr":
151+
populate_x_numpy()
152+
compute = compute_numexpr
153+
elif what == "numpy.memmap":
154+
populate_x_memmap()
155+
compute = compute_memmap
156+
print "*** Time elapsed populating:", round(time() - t0, 3)
157+
print "Computing: '%s' using %s" % (expr, what)
158+
t0 = time()
159+
compute()
160+
print "**** Time elapsed computing:", round(time() - t0, 3)
161+
162+
for what in ["tables.Expr"]:
163+
first = True # Sentinel
164+
for clib in supported_clibs:
165+
for clevel in (0, 1, 5, 9):
166+
if not first and clevel == 0:
167+
continue
168+
print "Populating x using %s with %d points..." % (what, N)
169+
populate_x_tables(clib, clevel)
170+
print "*** Time elapsed populating:", round(time() - t0, 3)
171+
print "Computing: '%s' using %s" % (expr, what)
172+
t0 = time()
173+
compute_tables(clib, clevel)
174+
print "**** Time elapsed computing:", round(time() - t0, 3)
175+
first = False
176+

0 commit comments

Comments
 (0)