Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into no-mpi
Browse files Browse the repository at this point in the history
  • Loading branch information
mloubout committed Nov 21, 2018
2 parents d89042e + 7007690 commit 059ff82
Show file tree
Hide file tree
Showing 11 changed files with 239 additions and 73 deletions.
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ install:
- if [[ $INSTALL_TYPE == 'conda' ]]; then
conda env create -q -f environment.yml python=$TRAVIS_PYTHON_VERSION;
source activate devito;
if [[ $MPI_INSTALL == '1' ]]; then pip install -e .[extras]; else pip install -e .; fi
pip install -e .;
conda list;
fi
- if [[ "$MPI_INSTALL" == '1' ]]; then
Expand All @@ -105,6 +105,7 @@ install:

# Install devito with pip
- if [[ $INSTALL_TYPE == 'pip_setup' ]]; then python setup.py install; fi
- if [[ $MPI_INSTALL == '1' ]]; then pip install -e .[extras]; fi

before_script:
- echo -e "Host github.com\n\tStrictHostKeyChecking no\n" >> ~/.ssh/config
Expand Down
6 changes: 1 addition & 5 deletions devito/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,7 @@ def _at_callback(val): # noqa
level, mode = val
if level == 'off':
level = False
if configuration['backend'] == 'core' and mode == 'runtime':
warning("Unsupported auto-tuning mode `runtime` with backend `core`")
return at_setup(level, 'preemptive')
else:
return at_setup(level, mode)
return at_setup(level, mode)
configuration.add('autotuning', 'off', at_accepted, callback=_at_callback, # noqa
impacts_jit=False)

Expand Down
162 changes: 122 additions & 40 deletions devito/core/autotuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,50 @@
from operator import mul
import resource

from devito.ir.iet import Iteration, FindNodes, FindSymbols
from devito.ir import Backward, Iteration, FindNodes, FindSymbols
from devito.logger import perf, warning
from devito.parameters import configuration

__all__ = ['autotune']


def autotune(operator, arguments, parameters, tunable):
def autotune(operator, args, level, mode):
"""
Acting as a high-order function, take as input an operator and a list of
operator arguments to perform empirical autotuning. Some of the operator
arguments are marked as tunable.
Operator autotuning.
Parameters
----------
operator : Operator
Input Operator.
args : dict_like
The runtime arguments with which `operator` is run.
level : str
The autotuning aggressiveness (basic, aggressive). A more aggressive
autotuning might eventually result in higher performance, though in
some circumstances it might instead increase the actual runtime.
mode : str
The autotuning mode (preemptive, runtime). In preemptive mode, the
output runtime values supplied by the user to `operator.apply` are
replaced with shadow copies.
"""
key = [level, mode]
accepted = configuration._accepted['autotuning']
if key not in accepted:
raise ValueError("The accepted `(level, mode)` combinations are `%s`; "
"provided `%s` instead" % (accepted, key))

parameters = operator.parameters
tunable = operator._dle_args

# We get passed all the arguments, but the cfunction only requires a subset
at_arguments = OrderedDict([(p.name, arguments[p.name]) for p in parameters])
at_args = OrderedDict([(p.name, args[p.name]) for p in parameters])

# User-provided output data must not be altered
output = [i.name for i in operator.output]
for k, v in arguments.items():
if k in output:
at_arguments[k] = v.copy()
# User-provided output data won't be altered in `preemptive` mode
if mode == 'preemptive':
output = [i.name for i in operator.output]
for k, v in args.items():
if k in output:
at_args[k] = v.copy()

iterations = FindNodes(Iteration).visit(operator.body)
dim_mapper = {i.dim.name: i.dim for i in iterations}
Expand All @@ -35,33 +58,27 @@ def autotune(operator, arguments, parameters, tunable):
# runs will finish quickly
steppers = [i for i in iterations if i.dim.is_Time]
if len(steppers) == 0:
stepper = None
timesteps = 1
elif len(steppers) == 1:
stepper = steppers[0]
start = at_arguments[stepper.dim.min_name]
timesteps = stepper.extent(start=start, finish=options['at_squeezer']) - 1
if timesteps < 0:
timesteps = options['at_squeezer'] - timesteps
perf("AutoTuner: Number of timesteps adjusted to %d" % timesteps)
at_arguments[stepper.dim.min_name] = start
at_arguments[stepper.dim.max_name] = timesteps
if stepper.dim.is_Stepping:
at_arguments[stepper.dim.parent.min_name] = start
at_arguments[stepper.dim.parent.max_name] = timesteps
timesteps = init_time_bounds(stepper, at_args)
if timesteps is None:
return args
else:
warning("AutoTuner: Couldn't understand loop structure; giving up")
return arguments
return args

# Attempted block sizes ...
mapper = OrderedDict([(i.argument.symbolic_size.name, i) for i in tunable])
mapper = OrderedDict([(i.tunable.name, i) for i in tunable])
# ... Defaults (basic mode)
blocksizes = [OrderedDict([(i, v) for i in mapper]) for v in options['at_blocksize']]
# ... Always try the entire iteration space (degenerate block)
itershape = [mapper[i].iteration.symbolic_extent.subs(arguments) for i in mapper]
itershape = [mapper[i].iteration.symbolic_extent.subs(args) for i in mapper]
blocksizes.append(OrderedDict([(i, mapper[i].iteration.extent(0, j-1))
for i, j in zip(mapper, itershape)]))
# ... More attempts if auto-tuning in aggressive mode
if configuration['autotuning'].level == 'aggressive':
if level == 'aggressive':
blocksizes = more_heuristic_attempts(blocksizes)

# How many temporaries are allocated on the stack?
Expand All @@ -75,15 +92,18 @@ def autotune(operator, arguments, parameters, tunable):
# square blocks are tested
timings = OrderedDict()
for bs in blocksizes:
# Can we safely autotune over the given time range?
check_time_bounds(stepper, at_args, args)

illegal = False
for k, v in at_arguments.items():
for k, v in at_args.items():
if k in bs:
val = bs[k]
start = mapper[k].original_dim.symbolic_start.subs(arguments)
end = mapper[k].original_dim.symbolic_end.subs(arguments)
start = mapper[k].original_dim.symbolic_start.subs(args)
end = mapper[k].original_dim.symbolic_end.subs(args)

if val <= mapper[k].iteration.extent(start, end):
at_arguments[k] = val
at_args[k] = val
else:
# Block size cannot be larger than actual dimension
illegal = True
Expand All @@ -93,7 +113,7 @@ def autotune(operator, arguments, parameters, tunable):

# Make sure we remain within stack bounds, otherwise skip block size
dim_sizes = {}
for k, v in at_arguments.items():
for k, v in at_args.items():
if k in bs:
dim_sizes[mapper[k].argument.symbolic_size] = bs[k]
elif k in dim_mapper:
Expand All @@ -113,31 +133,93 @@ def autotune(operator, arguments, parameters, tunable):

# Use AutoTuner-specific profiler structs
timer = operator.profiler.timer.reset()
at_arguments[operator.profiler.name] = timer
at_args[operator.profiler.name] = timer

operator.cfunction(*list(at_arguments.values()))
operator.cfunction(*list(at_args.values()))
elapsed = sum(getattr(timer._obj, i) for i, _ in timer._obj._fields_)
timings[tuple(bs.items())] = elapsed
perf("AutoTuner: Block shape <%s> took %f (s) in %d timesteps" %
(','.join('%d' % i for i in bs.values()), elapsed, timesteps))

# Prepare for the next autotuning run
update_time_bounds(stepper, at_args, timesteps, mode)

try:
best = dict(min(timings, key=timings.get))
perf("AutoTuner: Selected block shape %s" % best)
except ValueError:
warning("AutoTuner: Couldn't find legal block shapes")
return arguments
return args

# Build the new argument list
tuned = OrderedDict()
for k, v in arguments.items():
tuned[k] = best[k] if k in mapper else v
args = {k: best.get(k, v) for k, v in args.items()}

# In `runtime` mode, some timesteps have been executed already, so we
# get to adjust the time range
finalize_time_bounds(stepper, at_args, args, mode)

# Reset profiling data
assert operator.profiler.name in args
args[operator.profiler.name] = operator.profiler.timer.reset()

return args


def init_time_bounds(stepper, at_args):
if stepper is None:
return
dim = stepper.dim.root
if stepper.direction is Backward:
at_args[dim.max_name] = at_args[dim.max_name]
at_args[dim.min_name] = at_args[dim.max_name] - options['at_squeezer']
if at_args[dim.max_name] < at_args[dim.min_name]:
warning("AutoTuner: too few time iterations; giving up")
return False
else:
at_args[dim.min_name] = at_args[dim.min_name]
at_args[dim.max_name] = at_args[dim.min_name] + options['at_squeezer']
if at_args[dim.min_name] > at_args[dim.max_name]:
warning("AutoTuner: too few time iterations; giving up")
return False

return stepper.extent(start=at_args[dim.min_name], finish=at_args[dim.max_name])


def check_time_bounds(stepper, at_args, args):
if stepper is None:
return
dim = stepper.dim.root
if stepper.direction is Backward:
if at_args[dim.min_name] < args[dim.min_name]:
raise ValueError("Too few time iterations")

else:
if at_args[dim.max_name] > args[dim.max_name]:
raise ValueError("Too few time iterations")


# Reset the profiling struct
assert operator.profiler.name in tuned
tuned[operator.profiler.name] = operator.profiler.timer.reset()
def update_time_bounds(stepper, at_args, timesteps, mode):
if mode != 'runtime' or stepper is None:
return
dim = stepper.dim.root
if stepper.direction is Backward:
at_args[dim.max_name] -= timesteps
at_args[dim.min_name] -= timesteps
else:
at_args[dim.min_name] += timesteps
at_args[dim.max_name] += timesteps

return tuned

def finalize_time_bounds(stepper, at_args, args, mode):
if mode != 'runtime' or stepper is None:
return
dim = stepper.dim.root
if stepper.direction is Backward:
args[dim.max_name] = at_args[dim.max_name]
args[dim.min_name] = args[dim.min_name]
else:
args[dim.min_name] = at_args[dim.min_name]
args[dim.max_name] = args[dim.max_name]


def more_heuristic_attempts(blocksizes):
Expand Down
27 changes: 23 additions & 4 deletions devito/core/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,30 @@ def _generate_mpi(self, iet, **kwargs):

return iet

def _autotune(self, args):
if self._dle_flags.get('blocking', False):
return autotune(self, args, self.parameters, self._dle_args)
else:
def _autotune(self, args, setup):
if setup is False or not self._dle_flags.get('blocking'):
return args
elif setup is True:
level = configuration['autotuning'].level or 'basic'
args = autotune(self, args, level, configuration['autotuning'].mode)
elif isinstance(setup, str):
args = autotune(self, args, setup, configuration['autotuning'].mode)
elif isinstance(setup, tuple) and len(setup) == 2:
level, mode = setup
if level is False:
return args
else:
args = autotune(self, args, level, mode)
else:
raise ValueError("Expected bool, str, or 2-tuple, got `%s` instead"
% type(setup))

# Record the tuned values
mapper = self._state.setdefault('tuned', {})
mapper.update({k: v for k, v in args.items()
if k in [i.tunable.name for i in self._dle_args]})

return args


class OperatorDebug(OperatorCore):
Expand Down
18 changes: 13 additions & 5 deletions devito/data/allocators.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ def alloc(self, shape, dtype):
if c_pointer is None:
raise RuntimeError("Unable to allocate %d elements in memory", str(size))

c_pointer = ctypes.cast(c_pointer, np.ctypeslib.ndpointer(dtype=dtype,
shape=shape))
pointer = np.ctypeslib.as_array(c_pointer, shape=shape)
c_pointer_cast = ctypes.cast(
c_pointer, np.ctypeslib.ndpointer(dtype=dtype, shape=shape))
pointer = np.ctypeslib.as_array(c_pointer_cast, shape=shape)

return (pointer, memfree_args)

Expand Down Expand Up @@ -242,7 +242,14 @@ def _alloc_C_libcall(self, size, ctype):
if not self.available():
raise RuntimeError("Couldn't find `libnuma`'s `numa_alloc_*` to "
"allocate memory")
c_bytesize = ctypes.c_ulong(size * ctypes.sizeof(ctype))

if size == 0:
# work around the fact that the allocator may return NULL when
# the size is 0, and numpy does not like that
c_bytesize = ctypes.c_ulong(1)
else:
c_bytesize = ctypes.c_ulong(size * ctypes.sizeof(ctype))

if self.put_onnode:
c_pointer = self.lib.numa_alloc_onnode(c_bytesize, self._node)
elif self.put_local:
Expand All @@ -253,7 +260,8 @@ def _alloc_C_libcall(self, size, ctype):
# note! even though restype was set above, ctypes returns a
# python integer.
# See https://stackoverflow.com/questions/17840144/
if c_pointer == 0:
# edit: it apparently can return None, also!
if c_pointer == 0 or c_pointer is None:
return None, None
else:
# Convert it back to a void * - this is
Expand Down
4 changes: 4 additions & 0 deletions devito/dle/backends/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ def __repr__(self):
def original_dim(self):
return self.iteration.dim

@property
def tunable(self):
return self.argument.symbolic_size


class AbstractRewriter(object):
"""
Expand Down
9 changes: 5 additions & 4 deletions devito/ir/support/space.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

from frozendict import frozendict

from devito.tools import PartialOrderTuple, as_tuple, filter_ordered, toposort
from devito.tools import PartialOrderTuple, as_tuple, filter_ordered, toposort, is_integer


__all__ = ['NullInterval', 'Interval', 'IntervalGroup', 'IterationSpace', 'DataSpace',
'Forward', 'Backward', 'Any']
Expand Down Expand Up @@ -112,8 +113,8 @@ class Interval(AbstractInterval):
is_Defined = True

def __init__(self, dim, lower, upper):
assert isinstance(lower, int)
assert isinstance(upper, int)
assert is_integer(lower)
assert is_integer(upper)
super(Interval, self).__init__(dim)
self.lower = lower
self.upper = upper
Expand Down Expand Up @@ -290,7 +291,7 @@ def zero(self, d=None):
relations=self.relations)

def __getitem__(self, key):
if isinstance(key, (slice, int)):
if isinstance(key, slice) or is_integer(key):
return super(IntervalGroup, self).__getitem__(key)
if not self.is_well_defined:
raise ValueError("Cannot fetch Interval from ill defined Space")
Expand Down
Loading

0 comments on commit 059ff82

Please sign in to comment.