Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

compiler: Yet another batch of compilation tweaks #2396

Merged
merged 18 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
compiler: Switch from local to global autopadding
  • Loading branch information
FabioLuporini committed Jul 2, 2024
commit 60f5d43824107de3035e36505ce3db4d8c6bc930
8 changes: 2 additions & 6 deletions devito/passes/iet/linearization.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,8 @@ def key1(f, d):
"""
if f.is_regular:
# For paddable objects the following holds:
# `same dim + same halo + same dtype => same (auto-)padding`
# Bundle need the actual function dtype
if f.is_Bundle:
return (d, f._size_halo[d], f.is_autopaddable, f.c0.dtype)
else:
return (d, f._size_halo[d], f.is_autopaddable, f.dtype)
# `same dim + same halo + same padding_dtype => same (auto-)padding`
return (d, f._size_halo[d], f.__padding_dtype__)
else:
return False

Expand Down
12 changes: 7 additions & 5 deletions devito/types/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,8 @@ def __args_setup__(cls, *args, **kwargs):
raise ValueError("Components must be of same type")
if not issubclass(klss.pop(), AbstractFunction):
raise ValueError("Component type must be subclass of AbstractFunction")
if len({i.__padding_dtype__ for i in components}) != 1:
raise ValueError("Components must have the same padding dtype")

return args, kwargs

Expand Down Expand Up @@ -438,14 +440,14 @@ def ncomp(self):
def initvalue(self):
return None

# CodeSymbol overrides defaulting to self.c0's behaviour
# Overrides defaulting to self.c0's behaviour

for i in ['_mem_internal_eager', '_mem_internal_lazy', '_mem_local',
'_mem_mapped', '_mem_host', '_mem_stack', '_mem_constant',
'_mem_shared', '_size_domain', '_size_halo', '_size_owned',
'_size_padding', '_size_nopad', '_size_nodomain', '_offset_domain',
'_offset_halo', '_offset_owned', '_dist_dimensions', '_C_get_field',
'grid', 'symbolic_shape']:
'_mem_shared', '__padding_dtype__', '_size_domain', '_size_halo',
'_size_owned', '_size_padding', '_size_nopad', '_size_nodomain',
'_offset_domain', '_offset_halo', '_offset_owned', '_dist_dimensions',
'_C_get_field', 'grid', 'symbolic_shape']:
locals()[i] = property(lambda self, v=i: getattr(self.c0, v))

@property
Expand Down
45 changes: 28 additions & 17 deletions devito/types/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -999,30 +999,41 @@ def __padding_setup__(self, **kwargs):
padding = tuple(kwargs.get('padding', ((0, 0),)*self.ndim))
return DimensionTuple(*padding, getters=self.dimensions)

@cached_property
def __padding_dtype__(self):
v = configuration['autopadding']
if not self.is_autopaddable or not v:
return None
try:
if issubclass(v, np.number):
return v
except TypeError:
return np.float32

def __padding_setup_smart__(self, **kwargs):
nopadding = ((0, 0),)*self.ndim

if kwargs.get('autopadding', configuration['autopadding']):
# The padded Dimension
candidates = self.space_dimensions
if not candidates:
return nopadding
d = candidates[-1]

mmts = configuration['platform'].max_mem_trans_size(self.dtype)
remainder = self._size_nopad[d] % mmts
if remainder == 0:
# Already a multiple of `mmts`, no need to pad
return nopadding
if not self.__padding_dtype__:
return nopadding

dpadding = (0, (mmts - remainder))
padding = [(0, 0)]*self.ndim
padding[self.dimensions.index(d)] = dpadding
# The padded Dimension
candidates = self.space_dimensions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could shorten to:

if not self.space_dimensions:
    return nopadding
d = self.space_dimensions[-1]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right, fixing

if not candidates:
return nopadding
d = candidates[-1]

return tuple(padding)
else:
mmts = configuration['platform'].max_mem_trans_size(self.__padding_dtype__)
remainder = self._size_nopad[d] % mmts
if remainder == 0:
# Already a multiple of `mmts`, no need to pad
return nopadding

dpadding = (0, (mmts - remainder))
padding = [(0, 0)]*self.ndim
padding[self.dimensions.index(d)] = dpadding

return tuple(padding)

def __ghost_setup__(self, **kwargs):
return (0, 0)

Expand Down
2 changes: 1 addition & 1 deletion tests/test_linearize.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,4 +611,4 @@ def test_different_dtype():

# Check generated code has different strides for different dtypes
assert "bL0(x,y) b[(x)*y_stride0 + (y)]" in str(op1)
assert "L0(x,y) f[(x)*y_stride1 + (y)]" in str(op1)
assert "L0(x,y) f[(x)*y_stride0 + (y)]" in str(op1)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we parametrize this test with autopadding so y_stride1 case is there for "mixed" padding

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's not possible by construction anymore; autopadding is now a global choice, in essence. This means that if you have an fp32 and an fp64 the data will be padded to either fp32 or fp64; in the former case, the performance will be suboptimal. But regardless, we will always only generate one stride per dimension. See types/basic.py::AbstractFunction::__padding_dtype__ logic

That said, I've now parametrized the test as you suggested, but the expected outcome is always y_stride0