devitocodes · FabioLuporini · Apr 9, 2024 · Apr 8, 2024 · Apr 8, 2024
diff --git a/devito/passes/clusters/blocking.py b/devito/passes/clusters/blocking.py
@@ -160,21 +160,29 @@ def __init__(self, options):
     def _make_key_hook(self, cluster, level):
         return (is_on_device(cluster.functions, self.gpu_fit),)
 
+    def _has_other_blockable_dim(self, cluster, d):
+        return any(cluster.properties.is_parallel_relaxed(i) and
+                   not self._has_short_trip_count(i)
+                   for i in set(cluster.ispace.itdims) - {d})
+
     def callback(self, clusters, prefix):
         if not prefix:
             return clusters
 
         d = prefix[-1].dim
-        if self._has_short_trip_count(d):
-            return clusters
 
         processed = []
         for c in clusters:
             if not c.properties.is_parallel_relaxed(d):
                 return clusters
 
             if is_on_device(c.functions, self.gpu_fit):
-                if self._has_data_reuse(c):
+                if self._has_short_trip_count(d):
+                    if self._has_other_blockable_dim(c, d):
+                        return clusters
+                    else:
+                        properties = c.properties.block(d, 'small')
+                elif self._has_data_reuse(c):
                     properties = c.properties.block(d)
                 else:
                     properties = c.properties.block(d, 'small')

diff --git a/tests/test_dle.py b/tests/test_dle.py
@@ -6,7 +6,7 @@
 
 from conftest import assert_structure, assert_blocking, _R, skipif
 from devito import (Grid, Function, TimeFunction, SparseTimeFunction, SpaceDimension,
-                    CustomDimension, Dimension, SubDimension,
+                    CustomDimension, Dimension, DefaultDimension, SubDimension,
                     PrecomputedSparseTimeFunction, Eq, Inc, ReduceMin, ReduceMax,
                     Operator, configuration, dimensions, info, cos)
 from devito.exceptions import InvalidArgument
@@ -179,120 +179,138 @@ def test_cache_blocking_structure_distributed(mode):
         assert iters[4].dim is z
 
 
-def test_cache_blocking_structure_optrelax():
-    grid = Grid(shape=(8, 8, 8))
+class TestBlockingOptRelax:
 
-    u = TimeFunction(name="u", grid=grid, space_order=2)
-    src = SparseTimeFunction(name="src", grid=grid, nt=3, npoint=1,
-                             coordinates=np.array([(0.5, 0.5, 0.5)]))
-
-    eqns = [Eq(u.forward, u.dx)]
-    eqns += src.inject(field=u.forward, expr=src)
-
-    op = Operator(eqns, opt=('advanced', {'blockrelax': True}))
-
-    bns, _ = assert_blocking(op, {'x0_blk0', 'p_src0_blk0'})
-
-    iters = FindNodes(Iteration).visit(bns['p_src0_blk0'])
-    assert len(iters) == 5
-    assert iters[0].dim.is_Block
-    assert iters[1].dim.is_Block
-
-
-def test_cache_blocking_structure_optrelax_customdim():
-    grid = Grid(shape=(8, 8, 8))
-    d = CustomDimension(name='d', symbolic_size=2)
-    x, y, z = grid.dimensions
-
-    u = TimeFunction(name="u", grid=grid)
-    f = Function(name="f", grid=grid, dimensions=(d, x, y, z), shape=(2,) + grid.shape)
-
-    eqn = Eq(f, u[d, x, y, z] + u[d, x + 1, y, z])
-
-    op = Operator(eqn, opt=('advanced', {'blockrelax': True}))
-
-    _, _ = assert_blocking(op, {'x0_blk0'})
-    assert_structure(op, ['d,x0_blk0,y0_blk0,z0_blk0,x,y,z'],
-                     'd,x0_blk0,y0_blk0,z0_blk0,x,y,z')
-
-
-def test_cache_blocking_structure_leftright_subdims():
-    grid = Grid(shape=(12, 12))
-    nbl = 3
+    def test_basic(self):
+        grid = Grid(shape=(8, 8, 8))
 
-    damp = Function(name='damp', grid=grid)
+        u = TimeFunction(name="u", grid=grid, space_order=2)
+        src = SparseTimeFunction(name="src", grid=grid, nt=3, npoint=1,
+                                 coordinates=np.array([(0.5, 0.5, 0.5)]))
 
-    eqns = [Eq(damp, 0.)]
-    for d in damp.dimensions:
-        # Left
-        dl = SubDimension.left(name='%sl' % d.name, parent=d, thickness=nbl)
-        eqns.extend([Inc(damp.subs({d: dl}), 1.)])
-        # right
-        dr = SubDimension.right(name='%sr' % d.name, parent=d, thickness=nbl)
-        eqns.extend([Inc(damp.subs({d: dr}), 1.)])
+        eqns = [Eq(u.forward, u.dx)]
+        eqns += src.inject(field=u.forward, expr=src)
 
-    op = Operator(eqns, opt=('fission', 'blocking', {'blockrelax': 'device-aware'}))
+        op = Operator(eqns, opt=('advanced', {'blockrelax': True}))
 
-    bns, _ = assert_blocking(op,
-                             {'x0_blk0', 'xl0_blk0', 'xr0_blk0', 'x1_blk0', 'x2_blk0'})
-    assert all(IsPerfectIteration().visit(i) for i in bns.values())
-    assert all(len(FindNodes(Iteration).visit(i)) == 4 for i in bns.values())
+        bns, _ = assert_blocking(op, {'x0_blk0', 'p_src0_blk0'})
 
+        iters = FindNodes(Iteration).visit(bns['p_src0_blk0'])
+        assert len(iters) == 5
+        assert iters[0].dim.is_Block
+        assert iters[1].dim.is_Block
 
-@pytest.mark.parametrize('opt, expected', [('noop', ('ijk', 'ikl')),
-                         (('advanced', {'blockinner': True, 'blockrelax': True}),
-                         ('i0_blk0ijk', 'i0_blk0ikl'))])
-def test_cache_blocking_structure_optrelax_linalg(opt, expected):
-    mat_shape = (4, 4)
+    def test_customdim(self):
+        grid = Grid(shape=(8, 8, 8))
+        d = CustomDimension(name='d', symbolic_size=2)
+        x, y, z = grid.dimensions
 
-    i, j, k, l = dimensions('i j k l')
-    A = Function(name='A', shape=mat_shape, dimensions=(i, j))
-    B = Function(name='B', shape=mat_shape, dimensions=(j, k))
-    C = Function(name='C', shape=mat_shape, dimensions=(j, k))
-    D = Function(name='D', shape=mat_shape, dimensions=(i, k))
-    E = Function(name='E', shape=mat_shape, dimensions=(k, l))
-    F = Function(name='F', shape=mat_shape, dimensions=(i, l))
+        u = TimeFunction(name="u", grid=grid)
+        f = Function(name="f", grid=grid, dimensions=(d, x, y, z),
+                     shape=(2,) + grid.shape)
 
-    eqs = [Inc(D, A*B + A*C), Inc(F, D*E)]
+        eqn = Eq(f, u[d, x, y, z] + u[d, x + 1, y, z])
 
-    A.data[:] = 1
-    B.data[:] = 1
-    C.data[:] = 1
-    E.data[:] = 1
+        op = Operator(eqn, opt=('advanced', {'blockrelax': True}))
 
-    op0 = Operator(eqs, opt=opt)
-    op0.apply()
-    assert_structure(op0, expected)
-    assert np.linalg.norm(D.data) == 32.0
-    assert np.linalg.norm(F.data) == 128.0
+        assert_blocking(op, {'x0_blk0'})
+        assert_structure(op, ['d,x0_blk0,y0_blk0,z0_blk0,x,y,z'],
+                         'd,x0_blk0,y0_blk0,z0_blk0,x,y,z')
 
+    def test_defaultdim_alone(self):
+        grid = Grid(shape=(8, 8, 8))
+        d = DefaultDimension(name='d', default_value=2)
+        time = grid.time_dim
+        x, y, z = grid.dimensions
 
-def test_cache_blocking_structure_optrelax_prec_inject():
-    grid = Grid(shape=(10, 10))
-    dt = grid.stepping_dim.spacing
+        u = TimeFunction(name="u", grid=grid)
+        f = Function(name="f", grid=grid, dimensions=(d, x, y, z),
+                     shape=(2,) + grid.shape)
+
+        eqn = Inc(f, u*cos(time*d))
+
+        op = Operator(eqn, opt=('advanced', {'blockrelax': 'device-aware'}))
+
+        assert_blocking(op, {'d0_blk0', 'x0_blk0'})
+        assert_structure(op,
+                         ['t,d0_blk0,d', 't,d,x0_blk0,y0_blk0,z0_blk0,x,y,z'],
+                         't,d0_blk0,d,d,x0_blk0,y0_blk0,z0_blk0,x,y,z')
+
+    def test_leftright_subdims(self):
+        grid = Grid(shape=(12, 12))
+        nbl = 3
+
+        damp = Function(name='damp', grid=grid)
+
+        eqns = [Eq(damp, 0.)]
+        for d in damp.dimensions:
+            # Left
+            dl = SubDimension.left(name='%sl' % d.name, parent=d, thickness=nbl)
+            eqns.extend([Inc(damp.subs({d: dl}), 1.)])
+            # right
+            dr = SubDimension.right(name='%sr' % d.name, parent=d, thickness=nbl)
+            eqns.extend([Inc(damp.subs({d: dr}), 1.)])
+
+        op = Operator(eqns, opt=('fission', 'blocking', {'blockrelax': 'device-aware'}))
+
+        bns, _ = assert_blocking(op, {'x0_blk0', 'xl0_blk0', 'xr0_blk0',
+                                      'x1_blk0', 'x2_blk0'})
+        assert all(IsPerfectIteration().visit(i) for i in bns.values())
+        assert all(len(FindNodes(Iteration).visit(i)) == 4 for i in bns.values())
+
+    @pytest.mark.parametrize('opt, expected', [('noop', ('ijk', 'ikl')),
+                             (('advanced', {'blockinner': True, 'blockrelax': True}),
+                             ('i0_blk0ijk', 'i0_blk0ikl'))])
+    def test_linalg(self, opt, expected):
+        mat_shape = (4, 4)
+
+        i, j, k, l = dimensions('i j k l')
+        A = Function(name='A', shape=mat_shape, dimensions=(i, j))
+        B = Function(name='B', shape=mat_shape, dimensions=(j, k))
+        C = Function(name='C', shape=mat_shape, dimensions=(j, k))
+        D = Function(name='D', shape=mat_shape, dimensions=(i, k))
+        E = Function(name='E', shape=mat_shape, dimensions=(k, l))
+        F = Function(name='F', shape=mat_shape, dimensions=(i, l))
+
+        eqs = [Inc(D, A*B + A*C), Inc(F, D*E)]
+
+        A.data[:] = 1
+        B.data[:] = 1
+        C.data[:] = 1
+        E.data[:] = 1
+
+        op0 = Operator(eqs, opt=opt)
+        op0.apply()
+        assert_structure(op0, expected)
+        assert np.linalg.norm(D.data) == 32.0
+        assert np.linalg.norm(F.data) == 128.0
+
+    def test_prec_inject(self):
+        grid = Grid(shape=(10, 10))
+        dt = grid.stepping_dim.spacing
 
-    u = TimeFunction(name="u", grid=grid, time_order=2, space_order=4)
+        u = TimeFunction(name="u", grid=grid, time_order=2, space_order=4)
 
-    # The values we put it don't matter, we won't run an Operator
-    points = [(0.05, 0.9), (0.01, 0.8), (0.07, 0.84)]
-    gridpoints = [(5, 90), (1, 80), (7, 84)]
-    interpolation_coeffs = np.ndarray(shape=(3, 2, 2))
-    sf = PrecomputedSparseTimeFunction(
-        name='s', grid=grid, r=2, npoint=len(points), nt=5,
-        gridpoints=gridpoints, interpolation_coeffs=interpolation_coeffs
-    )
+        # The values we put it don't matter, we won't run an Operator
+        points = [(0.05, 0.9), (0.01, 0.8), (0.07, 0.84)]
+        gridpoints = [(5, 90), (1, 80), (7, 84)]
+        interpolation_coeffs = np.ndarray(shape=(3, 2, 2))
+        sf = PrecomputedSparseTimeFunction(
+            name='s', grid=grid, r=2, npoint=len(points), nt=5,
+            gridpoints=gridpoints, interpolation_coeffs=interpolation_coeffs
+        )
 
-    eqns = sf.inject(field=u.forward, expr=sf * dt**2)
+        eqns = sf.inject(field=u.forward, expr=sf * dt**2)
 
-    op = Operator(eqns, opt=('advanced', {'blockrelax': 'device-aware',
-                                          'openmp': True,
-                                          'par-collapse-ncores': 1}))
+        op = Operator(eqns, opt=('advanced', {'blockrelax': 'device-aware',
+                                              'openmp': True,
+                                              'par-collapse-ncores': 1}))
 
-    assert_structure(op, ['t', 't,p_s0_blk0,p_s,rsx,rsy'],
-                     't,p_s0_blk0,p_s,rsx,rsy')
+        assert_structure(op, ['t', 't,p_s0_blk0,p_s,rsx,rsy'],
+                         't,p_s0_blk0,p_s,rsx,rsy')
 
 
-class TestBlockingParTile(object):
+class TestBlockingParTile:
 
     @pytest.mark.parametrize('par_tile,expected', [
         ((16, 16, 16), ((16, 16, 16), (16, 16, 16))),
@@ -582,7 +600,7 @@ def test_cache_blocking_imperfect_nest_v2(blockinner):
     assert np.allclose(u.data, u2.data, rtol=1e-07)
 
 
-class TestNodeParallelism(object):
+class TestNodeParallelism:
 
     def test_nthreads_generation(self):
         grid = Grid(shape=(10, 10))
@@ -1145,7 +1163,7 @@ def test_parallel_prec_inject(self):
         assert 'omp for collapse' in iterations[1].pragmas[0].value
 
 
-class TestNestedParallelism(object):
+class TestNestedParallelism:
 
     def test_basic(self):
         grid = Grid(shape=(3, 3, 3))