Skip to content

Commit

Permalink
[VTA][OpenCL] intelfocl (#6126)
Browse files Browse the repository at this point in the history
* intelfocl support

* disable tsim test

* bugfix to vta autotvm

* disable tsim test in task_python_vta_tsim.sh

* fix integration test

* update vta submodule and re-enable tsim tests

* remove unnecessary comments
  • Loading branch information
zhanghaohit authored Apr 20, 2021
1 parent fbdffeb commit d0a0194
Show file tree
Hide file tree
Showing 23 changed files with 676 additions and 52 deletions.
8 changes: 8 additions & 0 deletions cmake/modules/VTA.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ elseif(PYTHON)
find_library(__cma_lib NAMES cma PATH /usr/lib)
elseif(${VTA_TARGET} STREQUAL "de10nano") # DE10-Nano rules
file(GLOB FPGA_RUNTIME_SRCS ${VTA_HW_PATH}/src/de10nano/*.cc ${VTA_HW_PATH}/src/*.cc)
elseif(${VTA_TARGET} STREQUAL "intelfocl") # Intel OpenCL for FPGA rules
file(GLOB FOCL_SRC ${VTA_HW_PATH}/src/oclfpga/*.cc)
list(APPEND FPGA_RUNTIME_SRCS ${FOCL_SRC})
list(APPEND FPGA_RUNTIME_SRCS ${VTA_HW_PATH}/src/vmem/virtual_memory.cc ${VTA_HW_PATH}/src/vmem/virtual_memory.h)
endif()
# Target lib: vta
add_library(vta SHARED ${FPGA_RUNTIME_SRCS})
Expand All @@ -123,6 +127,10 @@ elseif(PYTHON)
target_include_directories(vta SYSTEM PUBLIC 3rdparty)
target_include_directories(vta SYSTEM PUBLIC
"/usr/local/intelFPGA_lite/18.1/embedded/ds-5/sw/gcc/arm-linux-gnueabihf/include")
elseif(${VTA_TARGET} STREQUAL "intelfocl") # Intel OpenCL for FPGA rules
target_include_directories(vta PUBLIC 3rdparty)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
target_link_libraries(vta -lOpenCL)
endif()
endif()

Expand Down
11 changes: 7 additions & 4 deletions python/tvm/autotvm/task/topi_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def _decorate(topi_schedule):
@_register_task_schedule(task_name)
def wrapper(outs, *args, **kwargs):
"""wrapper function for topi schedule"""
workload = get_workload(outs)
workload = get_workload(outs, task_name)
if workload is None:
raise RuntimeError("Cannot find workload in attribute of this schedule")
tgt = Target.current()
Expand All @@ -241,18 +241,21 @@ def wrapper(outs, *args, **kwargs):
return _decorate


def get_workload(outs):
def get_workload(outs, task_name=None):
"""Retrieve the workload from outputs"""

def traverse(tensors):
"""traverse all ops to find attached workload"""
for t in tensors:
op = t.op
if "workload" in op.attrs:
return args_to_workload(op.attrs["workload"])
wkl = traverse(op.input_tensors)
if wkl:
return wkl

if "workload" in op.attrs:
ret = args_to_workload(op.attrs["workload"])
if task_name is None or ret[0] == task_name:
return ret
return None

outs = [outs] if isinstance(outs, tensor.Tensor) else outs
Expand Down
9 changes: 9 additions & 0 deletions python/tvm/relay/op/strategy/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,15 @@ def wrapper(attrs, outs, target):
return wrapper


def wrap_topi_compute(topi_compute):
"""Wrap TOPI compute which doesn't use attrs"""

def wrapper(attrs, inputs, out_type):
return [topi_compute(*inputs)]

return wrapper


def get_conv2d_in_channels(data_shape, data_layout):
"""Get conv2d input channels"""
data_shape = get_const_tuple(data_shape)
Expand Down
2 changes: 1 addition & 1 deletion python/tvm/relay/testing/tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

try:
tf_compat_v1 = tf.compat.v1
except ImportError:
except (ImportError, AttributeError):
tf_compat_v1 = tf

######################################################################
Expand Down
2 changes: 1 addition & 1 deletion python/tvm/topi/x86/bitserial_dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def bitserial_dense(
return matmul


@autotvm.register_topi_schedule("biserial_dense.x86")
@autotvm.register_topi_schedule("bitserial_dense.x86")
def schedule_bitserial_dense(cfg, outs):
"""Schedule for bitserial_dense.
Expand Down
4 changes: 2 additions & 2 deletions src/relay/backend/compile_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ class ScheduleGetter : public backend::MemoizedExprTranslator<Array<te::Tensor>>
<< "Cannot apply TOPI schedule to a primitive function with two complicated ops"
<< " anchor=" << anchor_op_ << " current=" << op;
}
if (op_pattern >= anchor_op_pattern_) {
if (op_pattern > anchor_op_pattern_) {
anchor_op_ = op;
anchor_attrs_ = call_node->attrs;
anchor_op_pattern_ = op_pattern;
Expand Down Expand Up @@ -309,7 +309,7 @@ class ScheduleGetter : public backend::MemoizedExprTranslator<Array<te::Tensor>>
tvm::Target target_;
Op anchor_op_;
Attrs anchor_attrs_;
int anchor_op_pattern_{0};
int anchor_op_pattern_{-1};
OpImplementation anchor_implementation_;
std::ostringstream readable_name_stream_;
Array<te::Operation> scalars_;
Expand Down
1 change: 0 additions & 1 deletion src/runtime/workspace_pool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ class WorkspacePool::Pool {
}
// Release all resources
void Release(Device dev, DeviceAPI* device) {
ICHECK_EQ(allocated_.size(), 1);
for (size_t i = 1; i < free_list_.size(); ++i) {
device->FreeDataSpace(dev, free_list_[i].data);
}
Expand Down
10 changes: 0 additions & 10 deletions src/tir/transforms/lower_tvm_builtin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,16 +109,6 @@ class BuiltinLower : public StmtExprMutator {
op = stmt.as<AllocateNode>();
// Get constant allocation bound.
int64_t nbytes = GetVectorBytes(op->dtype);
if (device_type_.defined()) {
if (const auto* dev_type = device_type_.as<IntImmNode>()) {
if (dev_type->value == kDLCPU) {
int32_t constant_size = op->constant_allocation_size();
if (constant_size > 0 && constant_size * nbytes < runtime::kMaxStackAlloca) {
return stmt;
}
}
}
}
PrimExpr total_bytes = make_const(op->extents[0].dtype(), nbytes);
for (size_t i = 0; i < op->extents.size(); ++i) {
total_bytes = total_bytes * op->extents[i];
Expand Down
2 changes: 1 addition & 1 deletion vta/python/vta/autotvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def reprogram_fpga(remote, _build_result):
_build_result : tvm.autotvm.measure.measure_methods.BuildResult
Artifact from the build phase, unused here.
"""
rpc_client.program_bitstream(remote, bitstream)
rpc_client.program_fpga(remote, bitstream)
rpc_client.reconfig_runtime(remote)

return default_module_loader(reprogram_fpga)
4 changes: 3 additions & 1 deletion vta/python/vta/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,13 @@ class DevContext(object):
MEM_ID_INP = 2
MEM_ID_ACC = 3
MEM_ID_OUT = 4
MEM_ID_ACC_8BIT = 5
# VTA ALU Opcodes
ALU_OPCODE_MIN = 0
ALU_OPCODE_MAX = 1
ALU_OPCODE_ADD = 2
ALU_OPCODE_SHR = 3
ALU_OPCODE_MUL = 4
# Task queue id (pipeline stage)
QID_LOAD_INP = 1
QID_LOAD_WGT = 1
Expand Down Expand Up @@ -232,7 +234,7 @@ def target_host(self):
return "llvm -mtriple=armv7-none-linux-gnueabihf"
if self.TARGET == "ultra96":
return "llvm -mtriple=aarch64-linux-gnu"
if self.TARGET in ["sim", "tsim"]:
if self.TARGET in ["sim", "tsim", "intelfocl"]:
return "llvm"
raise ValueError("Unknown target %s" % self.TARGET)

Expand Down
14 changes: 13 additions & 1 deletion vta/python/vta/program_bitstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,26 @@ def de10nano_bitstream_program(bitstream_path):
program(bitstream_path)


def bitstream_program(target, bitstream):
def intelfocl_bitstream_program(bitstream_path, mem_size=4 * 1024 * 1024 * 1024):
# pylint: disable=import-outside-toplevel
from tvm import get_global_func

program = get_global_func("vta.oclfpga.program")
program(bitstream_path, mem_size)


def bitstream_program(target, bitstream, *args):
"""program bitstream to devices"""

if target in ["pynq", "ultra96"]:
pynq_bitstream_program(bitstream)
elif target in ["de10nano"]:
de10nano_bitstream_program(bitstream)
elif target in ["sim", "tsim"]:
# In simulation, bit stream programming is a no-op
return
elif target in ["intelfocl"]:
intelfocl_bitstream_program(bitstream, *args)
else:
raise RuntimeError("Unknown target {}".format(target))

Expand Down
14 changes: 10 additions & 4 deletions vta/python/vta/rpc_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
"""VTA RPC client function"""
import os

from tvm import rpc
from vta import program_bitstream
from .environment import get_env
from .bitstream import download_bitstream, get_bitstream_path

Expand Down Expand Up @@ -45,16 +47,20 @@ def program_fpga(remote, bitstream=None):
bitstream : str, optional
Path to a local bistream file. If unset, tries to download from cache server.
"""
env = get_env()

if bitstream:
assert os.path.isfile(bitstream)
else:
bitstream = get_bitstream_path()
if not os.path.isfile(bitstream):
env = get_env()
if env.TARGET == "de10nano":
return
download_bitstream()

fprogram = remote.get_function("tvm.contrib.vta.init")
remote.upload(bitstream)
fprogram(os.path.basename(bitstream))
if isinstance(remote, rpc.LocalSession):
program_bitstream.bitstream_program(env.TARGET, bitstream)
else:
fprogram = remote.get_function("tvm.contrib.vta.init")
remote.upload(bitstream)
fprogram(os.path.basename(bitstream))
8 changes: 7 additions & 1 deletion vta/python/vta/testing/simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,13 @@ def _load_sw():
"""Load hardware library for simulator."""

env = get_env()
lib_driver_name = "libvta_tsim" if env.TARGET == "tsim" else "libvta_fsim"
lib_driver_name = (
"libvta_tsim"
if env.TARGET == "tsim"
else "libvta"
if env.TARGET == "intelfocl"
else "libvta_fsim"
)
require_sim = env.TARGET in ("sim", "tsim")
libs = []

Expand Down
2 changes: 1 addition & 1 deletion vta/python/vta/testing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def run(run_func):
"""
env = get_env()

if env.TARGET in ["sim", "tsim"]:
if env.TARGET in ["sim", "tsim", "intelfocl"]:
# Talk to local RPC if necessary to debug RPC server.
# Compile vta on your host with make at the root.
# Make sure TARGET is set to "sim" in the config.json file.
Expand Down
2 changes: 1 addition & 1 deletion vta/python/vta/top/graphpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ def visit_call(self, call):
self.start_pack and call.op == op.op.get("cast") and input_types[0].dtype == "int32"
):
cast = relay.Call(op.op.get("cast"), [args[0]], call.attrs)
return relay.Call(op.op.get("copy"), [cast])
return cast
elif call.op == self.pad:
pad_width = call.attrs.pad_width
if len(pad_width) == 6:
Expand Down
Loading

0 comments on commit d0a0194

Please sign in to comment.