forked from apache/tvm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcross_compilation_and_rpc.py
263 lines (235 loc) · 9.62 KB
/
cross_compilation_and_rpc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""
.. _tutorial-cross-compilation-and-rpc:
Cross Compilation and RPC
=========================
**Author**: `Ziheng Jiang <https://github.com/ZihengJiang/>`_, `Lianmin Zheng <https://github.com/merrymercy/>`_
This tutorial introduces cross compilation and remote device
execution with RPC in TVM.
With cross compilation and RPC, you can **compile a program on your
local machine then run it on the remote device**. It is useful when
the remote device resource are limited, like Raspberry Pi and mobile
platforms. In this tutorial, we will use the Raspberry Pi for a CPU example
and the Firefly-RK3399 for an OpenCL example.
"""
######################################################################
# Build TVM Runtime on Device
# ---------------------------
#
# The first step is to build the TVM runtime on the remote device.
#
# .. note::
#
# All instructions in both this section and the next section should be
# executed on the target device, e.g. Raspberry Pi. We assume the target
# is running Linux.
#
# Since we do compilation on the local machine, the remote device is only used
# for running the generated code. We only need to build the TVM runtime on
# the remote device.
#
# .. code-block:: bash
#
# git clone --recursive https://github.com/apache/incubator-tvm tvm
# cd tvm
# make runtime -j2
#
# After building the runtime successfully, we need to set environment variables
# in :code:`~/.bashrc` file. We can edit :code:`~/.bashrc`
# using :code:`vi ~/.bashrc` and add the line below (Assuming your TVM
# directory is in :code:`~/tvm`):
#
# .. code-block:: bash
#
# export PYTHONPATH=$PYTHONPATH:~/tvm/python
#
# To update the environment variables, execute :code:`source ~/.bashrc`.
######################################################################
# Set Up RPC Server on Device
# ---------------------------
# To start an RPC server, run the following command on your remote device
# (Which is Raspberry Pi in this example).
#
# .. code-block:: bash
#
# python -m tvm.exec.rpc_server --host 0.0.0.0 --port=9090
#
# If you see the line below, it means the RPC server started
# successfully on your device.
#
# .. code-block:: bash
#
# INFO:root:RPCServer: bind to 0.0.0.0:9090
#
######################################################################
# Declare and Cross Compile Kernel on Local Machine
# -------------------------------------------------
#
# .. note::
#
# Now we go back to the local machine, which has a full TVM installed
# (with LLVM).
#
# Here we will declare a simple kernel on the local machine:
import numpy as np
import tvm
from tvm import te
from tvm import rpc
from tvm.contrib import util
n = tvm.runtime.convert(1024)
A = te.placeholder((n,), name='A')
B = te.compute((n,), lambda i: A[i] + 1.0, name='B')
s = te.create_schedule(B.op)
######################################################################
# Then we cross compile the kernel.
# The target should be 'llvm -mtriple=armv7l-linux-gnueabihf' for
# Raspberry Pi 3B, but we use 'llvm' here to make this tutorial runnable
# on our webpage building server. See the detailed note in the following block.
local_demo = True
if local_demo:
target = 'llvm'
else:
target = 'llvm -mtriple=armv7l-linux-gnueabihf'
func = tvm.build(s, [A, B], target=target, name='add_one')
# save the lib at a local temp folder
temp = util.tempdir()
path = temp.relpath('lib.tar')
func.export_library(path)
######################################################################
# .. note::
#
# To run this tutorial with a real remote device, change :code:`local_demo`
# to False and replace :code:`target` in :code:`build` with the appropriate
# target triple for your device. The target triple which might be
# different for different devices. For example, it is
# :code:`'llvm -mtriple=armv7l-linux-gnueabihf'` for Raspberry Pi 3B and
# :code:`'llvm -mtriple=aarch64-linux-gnu'` for RK3399.
#
# Usually, you can query the target by running :code:`gcc -v` on your
# device, and looking for the line starting with :code:`Target:`
# (Though it may still be a loose configuration.)
#
# Besides :code:`-mtriple`, you can also set other compilation options
# like:
#
# * -mcpu=<cpuname>
# Specify a specific chip in the current architecture to generate code for. By default this is inferred from the target triple and autodetected to the current architecture.
# * -mattr=a1,+a2,-a3,...
# Override or control specific attributes of the target, such as whether SIMD operations are enabled or not. The default set of attributes is set by the current CPU.
# To get the list of available attributes, you can do:
#
# .. code-block:: bash
#
# llc -mtriple=<your device target triple> -mattr=help
#
# These options are consistent with `llc <http://llvm.org/docs/CommandGuide/llc.html>`_.
# It is recommended to set target triple and feature set to contain specific
# feature available, so we can take full advantage of the features of the
# board.
# You can find more details about cross compilation attributes from
# `LLVM guide of cross compilation <https://clang.llvm.org/docs/CrossCompilation.html>`_.
######################################################################
# Run CPU Kernel Remotely by RPC
# ------------------------------
# We show how to run the generated CPU kernel on the remote device.
# First we obtain an RPC session from remote device.
if local_demo:
remote = rpc.LocalSession()
else:
# The following is my environment, change this to the IP address of your target device
host = '10.77.1.162'
port = 9090
remote = rpc.connect(host, port)
######################################################################
# Upload the lib to the remote device, then invoke a device local
# compiler to relink them. Now `func` is a remote module object.
remote.upload(path)
func = remote.load_module('lib.tar')
# create arrays on the remote device
ctx = remote.cpu()
a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
# the function will run on the remote device
func(a, b)
np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
######################################################################
# When you want to evaluate the performance of the kernel on the remote
# device, it is important to avoid the overhead of network.
# :code:`time_evaluator` will returns a remote function that runs the
# function over number times, measures the cost per run on the remote
# device and returns the measured cost. Network overhead is excluded.
time_f = func.time_evaluator(func.entry_name, ctx, number=10)
cost = time_f(a, b).mean
print('%g secs/op' % cost)
#########################################################################
# Run OpenCL Kernel Remotely by RPC
# ---------------------------------
# For remote OpenCL devices, the workflow is almost the same as above.
# You can define the kernel, upload files, and run via RPC.
#
# .. note::
#
# Raspberry Pi does not support OpenCL, the following code is tested on
# Firefly-RK3399. You may follow this `tutorial <https://gist.github.com/mli/585aed2cec0b5178b1a510f9f236afa2>`_
# to setup the OS and OpenCL driver for RK3399.
#
# Also we need to build the runtime with OpenCL enabled on rk3399 board. In the TVM
# root directory, execute
#
# .. code-block:: bash
#
# cp cmake/config.cmake .
# sed -i "s/USE_OPENCL OFF/USE_OPENCL ON/" config.cmake
# make runtime -j4
#
# The following function shows how we run an OpenCL kernel remotely
def run_opencl():
# NOTE: This is the setting for my rk3399 board. You need to modify
# them according to your environment.
target_host = "llvm -mtriple=aarch64-linux-gnu"
opencl_device_host = '10.77.1.145'
opencl_device_port = 9090
# create schedule for the above "add one" compute declaration
s = te.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=32)
s[B].bind(xo, te.thread_axis("blockIdx.x"))
s[B].bind(xi, te.thread_axis("threadIdx.x"))
func = tvm.build(s, [A, B], "opencl", target_host=target_host)
remote = rpc.connect(opencl_device_host, opencl_device_port)
# export and upload
path = temp.relpath('lib_cl.tar')
func.export_library(path)
remote.upload(path)
func = remote.load_module('lib_cl.tar')
# run
ctx = remote.cl()
a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
func(a, b)
np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
print("OpenCL test passed!")
######################################################################
# Summary
# -------
# This tutorial provides a walk through of cross compilation and RPC
# features in TVM.
#
# - Set up an RPC server on the remote device.
# - Set up the target device configuration to cross compile the kernels on the
# local machine.
# - Upload and run the kernels remotely via the RPC API.