forked from devitocodes/devito
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtest_gpu_openacc.py
138 lines (98 loc) · 4.36 KB
/
test_gpu_openacc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import pytest
import numpy as np
from conftest import skipif
from devito import Grid, Function, TimeFunction, Eq, Operator, norm, solve
from devito.data import LEFT
from devito.ir.iet import retrieve_iteration_tree
from examples.seismic import TimeAxis, RickerSource, Receiver
class TestCodeGeneration(object):
def test_basic(self):
grid = Grid(shape=(3, 3, 3))
u = TimeFunction(name='u', grid=grid)
op = Operator(Eq(u.forward, u + 1), platform='nvidiaX', language='openacc')
trees = retrieve_iteration_tree(op)
assert len(trees) == 1
assert trees[0][1].pragmas[0].value ==\
'acc parallel loop collapse(3)'
assert op.body[1].header[0].value ==\
('acc enter data copyin(u[0:u_vec->size[0]]'
'[0:u_vec->size[1]][0:u_vec->size[2]][0:u_vec->size[3]])')
assert op.body[1].footer[0].contents[0].value ==\
('acc exit data copyout(u[0:u_vec->size[0]]'
'[0:u_vec->size[1]][0:u_vec->size[2]][0:u_vec->size[3]])')
assert op.body[1].footer[0].contents[1].value ==\
('acc exit data delete(u[0:u_vec->size[0]]'
'[0:u_vec->size[1]][0:u_vec->size[2]][0:u_vec->size[3]])')
class TestOperator(object):
@skipif('nodevice')
def test_op_apply(self):
grid = Grid(shape=(3, 3, 3))
u = TimeFunction(name='u', grid=grid, dtype=np.int32)
op = Operator(Eq(u.forward, u + 1))
# Make sure we've indeed generated OpenACC code
assert 'acc parallel' in str(op)
time_steps = 1000
op.apply(time_M=time_steps)
assert np.all(np.array(u.data[0, :, :, :]) == time_steps)
@skipif('nodevice')
def test_iso_ac(self):
shape = (101, 101)
extent = (1000, 1000)
origin = (0., 0.)
v = np.empty(shape, dtype=np.float32)
v[:, :51] = 1.5
v[:, 51:] = 2.5
grid = Grid(shape=shape, extent=extent, origin=origin)
t0 = 0.
tn = 1000.
dt = 1.6
time_range = TimeAxis(start=t0, stop=tn, step=dt)
f0 = 0.010
src = RickerSource(name='src', grid=grid, f0=f0,
npoint=1, time_range=time_range)
domain_size = np.array(extent)
src.coordinates.data[0, :] = domain_size*.5
src.coordinates.data[0, -1] = 20.
rec = Receiver(name='rec', grid=grid, npoint=101, time_range=time_range)
rec.coordinates.data[:, 0] = np.linspace(0, domain_size[0], num=101)
rec.coordinates.data[:, 1] = 20.
u = TimeFunction(name="u", grid=grid, time_order=2, space_order=2)
m = Function(name='m', grid=grid)
m.data[:] = 1./(v*v)
pde = m * u.dt2 - u.laplace
stencil = Eq(u.forward, solve(pde, u.forward))
src_term = src.inject(field=u.forward, expr=src * dt**2 / m)
rec_term = rec.interpolate(expr=u.forward)
op = Operator([stencil] + src_term + rec_term)
# Make sure we've indeed generated OpenACC code
assert 'acc parallel' in str(op)
op(time=time_range.num-1, dt=dt)
assert np.isclose(norm(rec), 490.56, atol=1e-2, rtol=0)
class TestMPI(object):
@skipif('nodevice')
@pytest.mark.parallel(mode=2)
def test_basic(self):
grid = Grid(shape=(6, 6))
x, y = grid.dimensions
t = grid.stepping_dim
u = TimeFunction(name='u', grid=grid, space_order=2)
u.data[:] = 1.
expr = u[t, x, y-1] + u[t, x-1, y] + u[t, x, y] + u[t, x, y+1] + u[t, x+1, y]
op = Operator(Eq(u.forward, expr), platform='nvidiaX', language='openacc')
# Make sure we've indeed generated OpenACC+MPI code
assert 'acc parallel' in str(op)
assert len(op._func_table) == 4
op(time_M=1)
glb_pos_map = grid.distributor.glb_pos_map
if LEFT in glb_pos_map[x]:
assert np.all(u.data[0] == [[11., 16., 17., 17., 16., 11.],
[16., 23., 24., 24., 23., 16.],
[17., 24., 25., 25., 24., 17.]])
else:
assert np.all(u.data[0] == [[17., 24., 25., 25., 24., 17.],
[16., 23., 24., 24., 23., 16.],
[11., 16., 17., 17., 16., 11.]])
@skipif('nodevice')
@pytest.mark.parallel(mode=2)
def test_iso_ac(self):
TestOperator().test_iso_ac()