This repository has been archived by the owner on Dec 26, 2024. It is now read-only.
forked from apache/mxnet
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathtvm_bridge.cc
180 lines (165 loc) · 5.91 KB
/
tvm_bridge.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file tvm_bridge.cc
* \brief Bridge to run TVM's PackedFunc in MXNet's async engine.
*
* This bridge is mainly used to expose MXNet's async engine push to
* TVM. It only uses TVM runtime in aheader only mode, which means
* there is no link dependencies.
*
* Support for TVM is optional even when this code
* is always compiled and built with the project.
* We choose this strategy because we do not yet want
* llvm as dependency(which TVM uses). So instead we expose hook
* to TVM and let user use this feature when they have TVM installed.
*
* We do require TVM and MXNet to be built with same C++ ABI of std::function
*/
#define TVM_RUNTIME_HEADER_ONLY 1
#include <tvm/runtime/packed_func.h>
#include <mxnet/c_api.h>
#include <mxnet/ndarray.h>
#include <mxnet/engine.h>
#include <memory>
namespace mxnet {
using tvm::runtime::PackedFunc;
using tvm::runtime::TVMArgs;
using tvm::runtime::TVMRetValue;
/*!
* \brief Async functor object
* calling argument of the function.
*/
class TVMFunctor {
public:
// constructor
explicit TVMFunctor(PackedFunc func, PackedFunc fset_stream)
: func_(func), fset_stream_(fset_stream) {}
void Init(const TVMArgs& args,
const std::vector<int>& const_loc,
std::vector<Engine::VarHandle>* const_vars,
std::vector<Engine::VarHandle>* mutate_vars) {
values_.clear();
type_codes_.clear();
values_.insert(values_.end(), args.values, args.values + args.size());
type_codes_.insert(
type_codes_.end(), args.type_codes, args.type_codes + args.size());
size_t const_loc_ptr = 0;
for (int i = 0; i < args.size(); ++i) {
if (args.type_codes[i] == kTVMNDArrayTypeCode) {
const NDArray& nd =
static_cast<NDArray*>(args.values[i].v_handle)[0];
// We cannot set the value until
type_codes_[i] = kArrayHandle;
array_data_.push_back(nd);
array_loc_.push_back(i);
// check if there is read or mutate
// by default assume we mutate the array.
if (const_loc_ptr < const_loc.size() &&
i == const_loc[const_loc_ptr]) {
const_vars->push_back(nd.var());
++const_loc_ptr;
} else {
mutate_vars->push_back(nd.var());
}
} else {
CHECK_LT(args.type_codes[i], kTVMType)
<< "Only allow POD type in mxnet async call";
}
}
}
Context ctx() {
return array_data_[0].ctx();
}
void Run(const RunContext& rctx) {
// setup DLTensor
for (size_t i = 0; i < array_loc_.size(); ++i) {
values_[array_loc_[i]].v_handle =
const_cast<DLTensor*>(&(array_data_[i].data().dltensor()));
}
// run the packed function
TVMRetValue rv;
TVMArgs args(&values_[0], &type_codes_[0], values_.size());
if (ctx().dev_type == Context::kGPU) {
#if MXNET_USE_CUDA
// pass stream via last argument.
void* strm = static_cast<void*>(rctx.get_stream<gpu>()->stream_);
int dev_type = kDLGPU;
fset_stream_(dev_type, rctx.ctx.dev_id, strm);
func_.CallPacked(args, &rv);
fset_stream_(dev_type, rctx.ctx.dev_id, nullptr);
#else
LOG(FATAL) << "Please compile with CUDA enabled for cuda features";
#endif
} else {
func_.CallPacked(args, &rv);
}
}
private:
/*! \brief The function */
PackedFunc func_;
/*! \brief Set stream */
PackedFunc fset_stream_;
/*! \brief Values field */
std::vector<TVMValue> values_;
/*! \brief type code field */
std::vector<int> type_codes_;
/*! \brief arrays field */
std::vector<NDArray> array_data_;
/*! \brief position of array in arguments */
std::vector<int> array_loc_;
};
// Wrap a TVM function to a function that invokes MXNet's Engine
// It does two things: call the engine properly
// set up the NDArray to DLTensor during invocation.
void WrapAsyncCall(TVMArgs wrap_args, TVMRetValue* wrap_rv) {
PackedFunc f = wrap_args[0];
PackedFunc fset_stream = wrap_args[1];
int num_const = wrap_args[2];
// sorted position of constant arguments
std::vector<int> const_loc;
for (int i = 0; i < num_const; ++i) {
const_loc.push_back(wrap_args[i + 3].operator int());
}
std::sort(const_loc.begin(), const_loc.end());
// wrapped function
// This is the function that called by the user.
auto wrapped = [f, fset_stream, const_loc](TVMArgs args, TVMRetValue* rv) {
std::shared_ptr<TVMFunctor> func =
std::make_shared<TVMFunctor>(f, fset_stream);
std::vector<Engine::VarHandle> const_vars, mutate_vars;
func->Init(args, const_loc, &const_vars, &mutate_vars);
Engine *engine = Engine::Get();
engine->DeduplicateVarHandle(&const_vars, &mutate_vars);
engine->PushSync([func](RunContext ctx) {
func->Run(ctx);
}, func->ctx(), const_vars, mutate_vars);
};
*wrap_rv = PackedFunc(wrapped);
}
} // namespace mxnet
// C callback that can be used by TVM to extract
// the WrapAsyncCall function.
extern "C" MXNET_DLL int MXTVMBridge(TVMFunctionHandle pregister) {
using tvm::runtime::PackedFunc;
const PackedFunc& fregister =
*static_cast<PackedFunc*>(pregister);
fregister("WrapAsyncCall", PackedFunc(mxnet::WrapAsyncCall));
return 0;
}