Skip to content

Commit 3dbf9e5

Browse files
Huyuweitqchen
authored andcommitted
[TOP][Example] register pool, global_pool; add mobilenet example (#32)
* register pool, global_pool; add mobilenet example * tests of pool and global_pool * use new API of runtime module * small fix
1 parent 71a7618 commit 3dbf9e5

File tree

6 files changed

+308
-84
lines changed

6 files changed

+308
-84
lines changed
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
"""Forward propagation of MobileNet on GPU."""
2+
import numpy as np
3+
import time
4+
import os
5+
6+
import tvm
7+
import topi
8+
import nnvm.symbol as sym
9+
import nnvm.compiler
10+
import nnvm.runtime
11+
from tvm.contrib import nvcc
12+
13+
TASK="mobilenet"
14+
15+
target = 'cuda'
16+
ctx = tvm.gpu(0)
17+
18+
@tvm.register_func
19+
def tvm_callback_cuda_compile(code):
20+
ptx = nvcc.compile_cuda(code, target="ptx", options=["-arch=sm_60"])
21+
return ptx
22+
23+
def write_code(code, fname):
24+
with open(fname, "w") as f:
25+
f.write(code)
26+
27+
@tvm.register_func
28+
def tvm_callback_cuda_postproc(code):
29+
if not os.path.exists("perf"):
30+
os.mkdir("perf")
31+
write_code(code, "perf/%s_generated.cu" % TASK)
32+
return code
33+
34+
dtype = 'float32'
35+
epsilon = 1e-10 + 1e-5
36+
37+
def conv_block(data, name, channels, kernel_size=(3,3), strides=(1,1), padding=(1,1)):
38+
# convolution + bn + relu
39+
conv = sym.conv2d(data=data, channels=channels, kernel_size=kernel_size, strides=strides,
40+
padding=padding, use_bias=False, layout='NCHW', name=name + '_conv')
41+
bn = sym.batch_norm(data=conv, epsilon=epsilon, name=name + '_bn')
42+
act = sym.relu(data=bn, name=name + '_relu')
43+
return act
44+
45+
def separable_conv_block(data, name, depthwise_channels, pointwise_channels, kernel_size=(3,3), downsample=False, padding=(1,1)):
46+
if downsample:
47+
strides = (2,2)
48+
else:
49+
strides = (1,1)
50+
# depthwise convolution + bn + relu
51+
conv1 = sym.conv2d(data=data, channels=depthwise_channels, groups=depthwise_channels, kernel_size=kernel_size, strides=strides,
52+
padding=padding, use_bias=False, layout='NCHW', name=name + '_conv1')
53+
bn1 = sym.batch_norm(data=conv1, epsilon=epsilon, name=name + '_bn1')
54+
act1 = sym.relu(data=bn1, name=name + '_relu1')
55+
# pointwise convolution + bn + relu
56+
conv2 = sym.conv2d(data=act1, channels=pointwise_channels, kernel_size=(1,1), strides=(1,1),
57+
padding=(0,0), use_bias=False, layout='NCHW', name=name + '_conv2')
58+
bn2 = sym.batch_norm(data=conv2, epsilon=epsilon, name=name + '_bn2')
59+
act2 = sym.relu(data=bn2, name=name + '_relu2')
60+
return act2
61+
62+
def mobile_net(num_classes=1000, alpha=1.0, is_shallow=False):
63+
data = sym.Variable("data")
64+
body = conv_block(data, 'conv_block_1', int(32*alpha), strides=(2,2))
65+
body = separable_conv_block(body, 'separable_conv_block_1', int(32*alpha), int(64*alpha))
66+
body = separable_conv_block(body, 'separable_conv_block_2', int(64*alpha), int(128*alpha), downsample=True)
67+
body = separable_conv_block(body, 'separable_conv_block_3', int(128*alpha), int(128*alpha))
68+
body = separable_conv_block(body, 'separable_conv_block_4', int(128*alpha), int(256*alpha), downsample=True)
69+
body = separable_conv_block(body, 'separable_conv_block_5', int(256*alpha), int(256*alpha))
70+
body = separable_conv_block(body, 'separable_conv_block_6', int(256*alpha), int(512*alpha), downsample=True)
71+
if is_shallow:
72+
body = separable_conv_block(body, 'separable_conv_block_7', int(512*alpha), int(1024*alpha), downsample=True)
73+
body = separable_conv_block(body, 'separable_conv_block_8', int(1024*alpha), int(1024*alpha))
74+
else:
75+
for i in range(7, 12):
76+
body = separable_conv_block(body, 'separable_conv_block_%d' % i, int(512*alpha), int(512*alpha))
77+
body = separable_conv_block(body, 'separable_conv_block_12', int(512*alpha), int(1024*alpha), downsample=True)
78+
body = separable_conv_block(body, 'separable_conv_block_13', int(1024*alpha), int(1024*alpha))
79+
pool = sym.global_avg_pool2d(data=body, name='pool')
80+
flatten = sym.flatten(data=pool, name='flatten')
81+
fc = sym.dense(data=flatten, units=num_classes, use_bias=False, name='fc')
82+
softmax = sym.softmax(data=fc, name='softmax')
83+
return softmax
84+
85+
86+
batch_size = 1
87+
num_classes = 1000
88+
image_shape = (3,224,224)
89+
data_shape = (batch_size,) + image_shape
90+
out_shape = (batch_size, num_classes)
91+
92+
net = mobile_net(num_classes=num_classes, alpha=1.0, is_shallow=False)
93+
94+
# build graph
95+
with nnvm.compiler.build_config(opt_level=2):
96+
graph, lib, _ = nnvm.compiler.build(net, target, {'data': data_shape})
97+
# prepare params
98+
params = {}
99+
names = graph.index.input_names
100+
shapes = [graph.json_attr("shape")[graph.index.entry_id(x)] for x in names]
101+
for i in range(len(names)):
102+
params[names[i]] = tvm.nd.array(np.random.uniform(-0.1, 0.1, size=shapes[i]).astype(dtype), ctx=ctx)
103+
# create runtime module
104+
module = nnvm.runtime.create(graph, lib, ctx)
105+
# set input
106+
module.set_input(**params)
107+
# run
108+
print("run")
109+
module.run()
110+
ctx.sync()
111+
start = time.time()
112+
for i in range(1000):
113+
module.run()
114+
ctx.sync()
115+
print("average time cost of 1000 runs = %g ms" % ((time.time() - start)))
116+
# get output
117+
out = module.get_output(0, tvm.nd.empty(out_shape, dtype))

nnvm/include/nnvm/top/nn.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,6 @@ struct Pool2DParam : public dmlc::Parameter<Pool2DParam> {
202202
TShape pool_size;
203203
TShape strides;
204204
TShape padding;
205-
int groups;
206205
int layout;
207206
bool ceil_mode;
208207

@@ -214,12 +213,6 @@ struct Pool2DParam : public dmlc::Parameter<Pool2DParam> {
214213
DMLC_DECLARE_FIELD(padding).set_default(TShape({0, 0}))
215214
.describe("If padding is non-zero, then the input is implicitly zero-padded"
216215
"on both sides for padding number of points");
217-
DMLC_DECLARE_FIELD(groups).set_default(1)
218-
.describe("Controls the connections between inputs and outputs."
219-
"At groups=1, all inputs are convolved to all outputs."
220-
"At groups=2, the operation becomes equivalent to having two convolution"
221-
"layers side by side, each seeing half the input channels, and producing"
222-
"half the output channels, and both subsequently concatenated.");
223216
DMLC_DECLARE_FIELD(layout)
224217
.add_enum("NCHW", kNCHW)
225218
.add_enum("NHWC", kNHWC)

nnvm/python/nnvm/top/nn.py

Lines changed: 89 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ def compute_relu(attrs, inputs, _):
1818
reg.register_schedule("relu", _fschedule_broadcast)
1919
reg.register_pattern("relu", OpPattern.ELEMWISE)
2020

21+
2122
# leaky_relu
2223
@reg.register_compute("leaky_relu")
2324
def compute_leaky_relu(attrs, inputs, _):
@@ -27,6 +28,7 @@ def compute_leaky_relu(attrs, inputs, _):
2728
reg.register_schedule("leaky_relu", _fschedule_broadcast)
2829
reg.register_pattern("leaky_relu", OpPattern.ELEMWISE)
2930

31+
3032
# flatten
3133
@reg.register_compute("flatten")
3234
def compute_flatten(attrs, inputs, _):
@@ -73,11 +75,10 @@ def schedule_dense(_, outs, target):
7375
# naive schedule
7476
return tvm.create_schedule([x.op for x in outs])
7577

76-
# register extern for now, change me when fusion is enabled.
7778
reg.register_pattern("dense", OpPattern.OUT_ELEMWISE_FUSABLE)
7879

7980

80-
# conv
81+
# conv2d
8182
@reg.register_compute("conv2d")
8283
def compute_conv2d(attrs, inputs, _):
8384
"""Compute definition of conv2d"""
@@ -113,3 +114,89 @@ def schedule_conv2d(attrs, outs, target):
113114
return tvm.create_schedule([x.op for x in outs])
114115

115116
reg.register_pattern("conv2d", OpPattern.OUT_ELEMWISE_FUSABLE)
117+
118+
119+
# max_pool2d
120+
@reg.register_compute("max_pool2d")
121+
def compute_max_pool2d(attrs, inputs, _):
122+
"""Compute definition of max_pool2d"""
123+
pool_size = attrs.get_int_tuple("pool_size")
124+
strides = attrs.get_int_tuple("strides")
125+
padding = attrs.get_int_tuple("padding")
126+
layout = attrs["layout"]
127+
ceil_mode = attrs["ceil_mode"]
128+
assert layout == "NCHW", "only support nchw for now"
129+
assert ceil_mode == "False", "not support ceil_mode now"
130+
return topi.nn.pool(inputs[0], pool_size, strides, padding, pool_type='max')
131+
132+
@reg.register_schedule("max_pool2d")
133+
def schedule_max_pool2d(_, outs, target):
134+
"""Schedule definition of max_pool2d"""
135+
if target == "cuda":
136+
return topi.cuda.schedule_pool(outs)
137+
# naive schedule
138+
return tvm.create_schedule([x.op for x in outs])
139+
140+
reg.register_pattern("max_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE)
141+
142+
143+
# avg_pool2d
144+
@reg.register_compute("avg_pool2d")
145+
def compute_avg_pool2d(attrs, inputs, _):
146+
"""Compute definition of avg_pool2d"""
147+
pool_size = attrs.get_int_tuple("pool_size")
148+
strides = attrs.get_int_tuple("strides")
149+
padding = attrs.get_int_tuple("padding")
150+
layout = attrs["layout"]
151+
ceil_mode = attrs["ceil_mode"]
152+
assert layout == "NCHW", "only support nchw for now"
153+
assert ceil_mode == "False", "not support ceil_mode now"
154+
return topi.nn.pool(inputs[0], pool_size, strides, padding, pool_type='avg')
155+
156+
@reg.register_schedule("avg_pool2d")
157+
def schedule_avg_pool2d(_, outs, target):
158+
"""Schedule definition of avg_pool2d"""
159+
if target == "cuda":
160+
return topi.cuda.schedule_pool(outs)
161+
# naive schedule
162+
return tvm.create_schedule([x.op for x in outs])
163+
164+
reg.register_pattern("avg_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE)
165+
166+
167+
# global_max_pool2d
168+
@reg.register_compute("global_max_pool2d")
169+
def compute_global_max_pool2d(attrs, inputs, _):
170+
"""Compute definition of global_max_pool2d"""
171+
layout = attrs["layout"]
172+
assert layout == "NCHW", "only support nchw for now"
173+
return topi.nn.global_pool(inputs[0], pool_type='max')
174+
175+
@reg.register_schedule("global_max_pool2d")
176+
def schedule_global_max_pool2d(_, outs, target):
177+
"""Schedule definition of global_max_pool2d"""
178+
if target == "cuda":
179+
return topi.cuda.schedule_global_pool(outs)
180+
# naive schedule
181+
return tvm.create_schedule([x.op for x in outs])
182+
183+
reg.register_pattern("global_max_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE)
184+
185+
186+
# global_avg_pool2d
187+
@reg.register_compute("global_avg_pool2d")
188+
def compute_global_avg_pool2d(attrs, inputs, _):
189+
"""Compute definition of global_avg_pool2d"""
190+
layout = attrs["layout"]
191+
assert layout == "NCHW", "only support nchw for now"
192+
return topi.nn.global_pool(inputs[0], pool_type='avg')
193+
194+
@reg.register_schedule("global_avg_pool2d")
195+
def schedule_global_avg_pool2d(_, outs, target):
196+
"""Schedule definition of global_avg_pool2d"""
197+
if target == "cuda":
198+
return topi.cuda.schedule_global_pool(outs)
199+
# naive schedule
200+
return tvm.create_schedule([x.op for x in outs])
201+
202+
reg.register_pattern("global_avg_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE)

nnvm/tests/python/compiler/test_top_level1.py

Lines changed: 20 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ def test_relu():
1616
for target, ctx in ctx_list():
1717
graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
1818
m = nnvm.runtime.create(graph, lib, ctx)
19-
# get member functions
2019
data = np.random.uniform(size=dshape).astype(dtype)
2120
m.run(x=data)
2221
data = (data < 0) * data * 0.3 + (data>0) * data - 0.2
@@ -34,17 +33,10 @@ def test_exp():
3433
for target, ctx in ctx_list():
3534
graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
3635
m = nnvm.runtime.create(graph, lib, ctx)
37-
# get member functions
38-
set_input, run, get_output = m["set_input"], m["run"], m["get_output"]
39-
# set input
40-
data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
41-
set_input("x", data)
42-
# execute
43-
run()
44-
# get output
45-
out = tvm.nd.empty(oshape, dtype)
46-
get_output(0, out)
47-
y_np = np.exp(data.asnumpy())
36+
data = np.random.uniform(size=dshape).astype(dtype)
37+
m.run(x=data)
38+
out = m.get_output(0, tvm.nd.empty(oshape, dtype))
39+
y_np = np.exp(data)
4840
np.testing.assert_allclose(out.asnumpy(), y_np, atol=1e-5, rtol=1e-5)
4941

5042

@@ -58,17 +50,10 @@ def test_log():
5850
with nnvm.compiler.build_config(opt_level=1):
5951
graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
6052
m = nnvm.runtime.create(graph, lib, ctx)
61-
# get member functions
62-
set_input, run, get_output = m["set_input"], m["run"], m["get_output"]
63-
# set input
64-
data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
65-
set_input("x", data)
66-
# execute
67-
run()
68-
# get output
69-
out = tvm.nd.empty(oshape, dtype)
70-
get_output(0, out)
71-
y_np = np.log(data.asnumpy())
53+
data = np.random.uniform(size=dshape).astype(dtype)
54+
m.run(x=data)
55+
out = m.get_output(0, tvm.nd.empty(oshape, dtype))
56+
y_np = np.log(data)
7257
np.testing.assert_allclose(out.asnumpy(), y_np, atol=1e-5, rtol=1e-5)
7358

7459

@@ -82,17 +67,10 @@ def test_tanh():
8267
with nnvm.compiler.build_config(opt_level=1):
8368
graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
8469
m = nnvm.runtime.create(graph, lib, ctx)
85-
# get member functions
86-
set_input, run, get_output = m["set_input"], m["run"], m["get_output"]
87-
# set input
88-
data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
89-
set_input("x", data)
90-
# execute
91-
run()
92-
# get output
93-
out = tvm.nd.empty(oshape, dtype)
94-
get_output(0, out)
95-
y_np = np.sinh(data.asnumpy()) / np.cosh(data.asnumpy())
70+
data = np.random.uniform(size=dshape).astype(dtype)
71+
m.run(x=data)
72+
out = m.get_output(0, tvm.nd.empty(oshape, dtype))
73+
y_np = np.sinh(data) / np.cosh(data)
9674
np.testing.assert_allclose(out.asnumpy(), y_np, atol=1e-5, rtol=1e-5)
9775

9876

@@ -105,17 +83,10 @@ def test_sigmoid():
10583
for target, ctx in ctx_list():
10684
graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
10785
m = nnvm.runtime.create(graph, lib, ctx)
108-
# get member functions
109-
set_input, run, get_output = m["set_input"], m["run"], m["get_output"]
110-
# set input
111-
data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
112-
set_input("x", data)
113-
# execute
114-
run()
115-
# get output
116-
out = tvm.nd.empty(oshape, dtype)
117-
get_output(0, out)
118-
y_np = 1.0 / (1.0 + np.exp(-data.asnumpy()))
86+
data = np.random.uniform(size=dshape).astype(dtype)
87+
m.run(x=data)
88+
out = m.get_output(0, tvm.nd.empty(oshape, dtype))
89+
y_np = 1.0 / (1.0 + np.exp(-data))
11990
np.testing.assert_allclose(out.asnumpy(), y_np, atol=1e-5, rtol=1e-5)
12091

12192

@@ -129,17 +100,10 @@ def test_softmax():
129100
with nnvm.compiler.build_config(opt_level=1):
130101
graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
131102
m = nnvm.runtime.create(graph, lib, ctx)
132-
# get member functions
133-
set_input, run, get_output = m["set_input"], m["run"], m["get_output"]
134-
# set input
135-
data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
136-
set_input("x", data)
137-
# execute
138-
run()
139-
# get output
140-
out = tvm.nd.empty(oshape, dtype)
141-
get_output(0, out)
142-
y_np = topi.testing.softmax_python(data.asnumpy())
103+
data = np.random.uniform(size=dshape).astype(dtype)
104+
m.run(x=data)
105+
out = m.get_output(0, tvm.nd.empty(oshape, dtype))
106+
y_np = topi.testing.softmax_python(data)
143107
np.testing.assert_allclose(out.asnumpy(), y_np, atol=1e-5, rtol=1e-5)
144108

145109

0 commit comments

Comments
 (0)