Skip to content

Commit a9400b5

Browse files
committed
[TOPI] Using x86 schedules for ARM conv2d.
1 parent f08d5d7 commit a9400b5

File tree

3 files changed

+98
-1
lines changed

3 files changed

+98
-1
lines changed

python/tvm/relay/op/strategy/arm_cpu.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,16 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target):
5454
if groups == 1:
5555
if layout == "NCHW":
5656
if kernel_layout == "OIHW":
57+
# ARM conv2d spatial pack schedule.
5758
strategy.add_implementation(
5859
wrap_compute_conv2d(topi.arm_cpu.conv2d_nchw_spatial_pack),
5960
wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_nchw_spatial_pack),
6061
name="conv2d_nchw_spatial_pack.arm_cpu")
62+
# Intel x86 conv2d schedule.
63+
strategy.add_implementation(
64+
wrap_compute_conv2d(topi.x86.conv2d_nchw),
65+
wrap_topi_schedule(topi.x86.schedule_conv2d_nchw),
66+
name="conv2d_nchw.x86")
6167
# check if winograd algorithm is applicable
6268
_, _, kh, kw = get_const_tuple(kernel.shape)
6369
pt, pl, pb, pr = topi.nn.get_pad_tuple(padding, (kh, kw))
@@ -100,11 +106,13 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target):
100106
elif is_depthwise_conv2d(data.shape, layout, kernel.shape, kernel_layout, groups):
101107
if layout == "NCHW":
102108
assert kernel_layout == "OIHW" or re.match(r"OIHW\d*o", kernel_layout)
109+
# ARM conv2d depthwise schedule
103110
if kernel_layout == "OIHW":
104111
strategy.add_implementation(
105112
wrap_compute_conv2d(topi.arm_cpu.depthwise_conv2d_nchw),
106113
wrap_topi_schedule(topi.arm_cpu.schedule_depthwise_conv2d_nchw),
107114
name="depthwise_conv2d_nchw.arm_cpu")
115+
108116
# TODO:
109117
# This schedule has incorrect result on some hardware platforms (like NV Jetson TX2)
110118
# Let us comment it out but not remove.
@@ -115,6 +123,14 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target):
115123
# wrap_topi_schedule(topi.arm_cpu.schedule_depthwise_conv2d_nchw_spatial_pack),
116124
# name="depthwise_conv2d_nchw_spatial_pack.arm_cpu",
117125
# plevel=15)
126+
127+
# Intel x86 depthwise conv2d schedule.
128+
channel_multiplier = get_const_tuple(inputs[1].shape)[1]
129+
if channel_multiplier == 1 and dilation_h == 1 and dilation_w == 1:
130+
strategy.add_implementation(
131+
wrap_compute_conv2d(topi.x86.depthwise_conv2d_nchw),
132+
wrap_topi_schedule(topi.x86.schedule_depthwise_conv2d_nchw),
133+
name="depthwise_conv2d_nchw.x86")
118134
elif layout == "NHWC":
119135
assert kernel_layout == "HWOI"
120136
logger.warning("depthwise_conv2d with layout NHWC is not optimized for arm cpu.")
@@ -138,6 +154,26 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target):
138154
format(layout))
139155
return strategy
140156

157+
@conv2d_NCHWc_strategy.register("arm_cpu")
158+
def conv2d_NCHWc_strategy_arm_cpu(attrs, inputs, out_type, target):
159+
"""conv2d_NCHWc adopted from x86"""
160+
strategy = _op.OpStrategy()
161+
strategy.add_implementation(
162+
wrap_compute_conv2d(topi.x86.conv2d_NCHWc, True, True),
163+
wrap_topi_schedule(topi.x86.schedule_conv2d_NCHWc),
164+
name="conv2d_NCHWc.x86")
165+
return strategy
166+
167+
@depthwise_conv2d_NCHWc_strategy.register("arm_cpu")
168+
def depthwise_conv2d_NCHWc_strategy_arm_cpu(attrs, inputs, out_type, target):
169+
"""depthwise_conv2d_NCHWc adopted from x86"""
170+
strategy = _op.OpStrategy()
171+
strategy.add_implementation(
172+
wrap_compute_conv2d(topi.x86.depthwise_conv2d_NCHWc, True, True),
173+
wrap_topi_schedule(topi.x86.schedule_depthwise_conv2d_NCHWc),
174+
name="depthwise_conv2d_NCHWc.x86")
175+
return strategy
176+
141177
def wrap_compute_conv2d_winograd_nnpack(topi_compute):
142178
"""wrap topi compute for conv2d_winograd NNPack"""
143179
def _compute_conv2d_nnpack(attrs, inputs, out_type):

topi/python/topi/arm_cpu/conv2d_alter_op.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type):
5959
data, kernel = tinfos
6060
out_dtype = out_type.dtype
6161

62+
# Extract data types
63+
data_tensor, kernel_tensor = tinfos
64+
data_dtype = data_tensor.dtype
65+
kernel_dtype = kernel_tensor.dtype
66+
6267
idxd = tvm.tir.indexdiv
6368

6469
if topi_tmpl == "conv2d_nchw_spatial_pack.arm_cpu":
@@ -169,4 +174,60 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type):
169174

170175
return relay.nn.conv2d(*inputs, **new_attrs)
171176

177+
if topi_tmpl == "conv2d_NCHWc.x86":
178+
# Converting NCHW to NCHWc.
179+
assert data_layout == "NCHW" and kernel_layout == "OIHW"
180+
if cfg.is_fallback:
181+
_get_default_config(cfg, data_tensor, kernel_tensor, strides, padding,
182+
out_dtype, False, data_layout)
183+
batch_size, in_channel, height, width = get_const_tuple(data_tensor.shape)
184+
out_channel, _, kh, kw = get_const_tuple(kernel_tensor.shape)
185+
ic_bn, oc_bn = cfg["tile_ic"].size[-1], cfg["tile_oc"].size[-1]
186+
187+
# update new attrs
188+
new_attrs['channels'] = out_channel
189+
new_attrs['data_layout'] = 'NCHW%dc' % ic_bn
190+
# (oc, ic, h, w) -> (OC, IC, h, w, ic, oc)
191+
new_attrs['kernel_layout'] = 'OIHW%di%do' % (ic_bn, oc_bn)
192+
new_attrs['out_layout'] = 'NCHW%dc' % oc_bn
193+
194+
# Store altered operator's config
195+
new_data = te.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn),
196+
dtype=data_dtype)
197+
new_kernel = te.placeholder((out_channel//oc_bn, in_channel//ic_bn,
198+
kh, kw, ic_bn, oc_bn), dtype=kernel_tensor.dtype)
199+
new_workload = autotvm.task.args_to_workload(
200+
[new_data, new_kernel, strides, padding, dilation, new_attrs["data_layout"],
201+
new_attrs["out_layout"], out_dtype], topi_tmpl)
202+
dispatch_ctx.update(target, new_workload, cfg)
203+
return relay.nn.contrib_conv2d_nchwc(*inputs, **new_attrs)
204+
205+
if topi_tmpl == "depthwise_conv2d_NCHWc.x86":
206+
# Converting NCHW to NCHWc.
207+
assert data_layout == "NCHW" and kernel_layout == "OIHW"
208+
if cfg.is_fallback:
209+
_get_default_config(cfg, data_tensor, kernel_tensor, strides, padding,
210+
out_dtype, True, data_layout)
211+
212+
batch_size, in_channel, height, width = get_const_tuple(data_tensor.shape)
213+
out_channel, channel_multiplier, kh, kw = get_const_tuple(kernel_tensor.shape)
214+
ic_bn, oc_bn = cfg["tile_ic"].size[-1], cfg["tile_oc"].size[-1]
215+
assert channel_multiplier == 1
216+
217+
# update new attrs
218+
new_attrs['channels'] = out_channel
219+
new_attrs['data_layout'] = 'NCHW%dc' % ic_bn
220+
new_attrs['kernel_layout'] = 'OIHW1i%do' % oc_bn
221+
new_attrs['out_layout'] = 'NCHW%dc' % oc_bn
222+
223+
# Store altered operator's config.
224+
new_data = te.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn),
225+
dtype=data_dtype)
226+
new_kernel = te.placeholder((out_channel//oc_bn, 1, kh, kw, 1, oc_bn), dtype=kernel_dtype)
227+
new_workload = autotvm.task.args_to_workload(
228+
[new_data, new_kernel, strides, padding, dilation, new_attrs['data_layout'],
229+
new_attrs['out_layout'], out_dtype], topi_tmpl)
230+
dispatch_ctx.update(target, new_workload, cfg)
231+
return relay.nn.contrib_depthwise_conv2d_nchwc(*inputs, **new_attrs)
232+
172233
return None

topi/python/topi/x86/conv2d.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ def conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, layout, out_layo
169169

170170
cfg.define_split("tile_ic", in_channel, num_outputs=2)
171171
cfg.define_split("tile_oc", num_filter, num_outputs=2)
172-
cfg.define_split("tile_ow", ow, num_outputs=2, filter=lambda y: y.size[-1] <= 64)
172+
cfg.define_split("tile_ow", ow, num_outputs=2, filter=lambda y: y.size[-1] <= 64, policy="verbose")
173173
if is_kernel_1x1:
174174
cfg.define_knob("tile_oh", [1, 2] if oh > 1 else [1])
175175
else:

0 commit comments

Comments
 (0)