Skip to content

Commit a561718

Browse files
committed
WIP
1 parent a229041 commit a561718

File tree

6 files changed

+32
-31
lines changed

6 files changed

+32
-31
lines changed

python/tvm/contrib/nvcc.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ def compile_cuda(code, target="ptx", arch=None, options=None, path_target=None):
8888

8989
cmd += ["-o", file_target]
9090
cmd += [temp_code]
91-
9291
cxx_compiler_path = tvm.support.libinfo().get("TVM_CXX_COMPILER_PATH")
9392
if cxx_compiler_path != "":
9493
# This tells nvcc where to find the c++ compiler just in case it is not in the path.

src/relay/backend/graph_executor_codegen.cc

Lines changed: 3 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,8 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<
210210
auto storage_and_device = it.second;
211211
ICHECK_EQ(storage_and_device.size(), 2u);
212212
auto device_type = storage_and_device[1];
213+
std::cout << PrettyPrint(expr) << std::endl;
214+
std::cout << device_type << std::endl;
213215
tvm::Device dev;
214216
dev.device_id = 0;
215217
dev.device_type = static_cast<DLDeviceType>(device_type[0]->value);
@@ -226,6 +228,7 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<
226228
});
227229

228230
auto main_module = lowered_module.main_module;
231+
std::cout << "MainModule: " << main_module << std::endl;
229232
main_module = relay::transform::InferType()(main_module);
230233
relay::Function main_func = Downcast<relay::Function>(main_module->Lookup("main"));
231234

@@ -388,33 +391,6 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<
388391
if (auto global_node = call->op.as<GlobalVarNode>()) {
389392
auto prim_fn_name = global_node->name_hint;
390393

391-
Target target;
392-
393-
ICHECK_GE(storage_device_map_.count(call), 0)
394-
<< "Could not find a storage device for " << prim_fn_name
395-
<< "The memory planning was either not performed for this precise node, or there is bug "
396-
"in the memory planner.";
397-
398-
auto& device_type = storage_device_map_[call][1];
399-
auto call_dev_type = device_type[0]->value;
400-
// Normal Relay Function
401-
if (targets_.size() == 1) {
402-
// homogeneous execution.
403-
const auto& it = targets_.begin();
404-
target = (*it).second;
405-
} else {
406-
// heterogeneous execution.
407-
std::string call_dev_name;
408-
if (call_dev_type == 0) {
409-
call_dev_name = "llvm";
410-
} else {
411-
call_dev_name = runtime::DeviceName(call_dev_type);
412-
}
413-
if (targets_.count(call_dev_type) == 0) {
414-
LOG(FATAL) << "No target is provided for device " << call_dev_name;
415-
}
416-
target = targets_[call_dev_type];
417-
}
418394

419395
return GraphAddCallNode(call_node, _GetUniqueName(prim_fn_name), prim_fn_name);
420396
} else {

src/relay/backend/te_compiler.cc

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,11 @@ class LowerTensorExpr : public ExprMutator {
306306
return Call(ext_func->prim_fn_var, args, {});
307307
}
308308

309-
ICHECK_GE(device_context_map_.count(expr), 0);
309+
ICHECK_GE(device_context_map_.count(expr), 0)
310+
<< "Could not find an entry in the device context map for " << PrettyPrint(expr)
311+
<< "The memory planning was either not performed for this precise node, or there is bug "
312+
"in the memory planner.";
313+
310314
auto& device_context = this->device_context_map_[expr];
311315
auto call_dev_type = device_context.device_type;
312316

@@ -317,20 +321,33 @@ class LowerTensorExpr : public ExprMutator {
317321
const auto& it = targets_.begin();
318322
target = (*it).second;
319323
} else {
324+
std::cout << "DeviceType: " << call_dev_type << std::endl;
320325
// The heterogeneous execution case we have multiple targets
321326
// in this case.
322327
//
323328
// We need to identify the target and translate.
324329
std::string call_dev_name;
325330
if (call_dev_type == 0) {
326331
call_dev_name = "llvm";
332+
call_dev_type = kDLCPU;
327333
} else {
328334
call_dev_name = ::tvm::runtime::DeviceName(call_dev_type);
329335
}
336+
330337
if (targets_.count(call_dev_type) == 0) {
331-
LOG(FATAL) << "No target is provided for device " << call_dev_name;
338+
std::stringstream msg;
339+
msg << "No target is specified for provided device name: `" << call_dev_name << "`\n\n";
340+
msg << call_dev_name << " mapped to device type (" << call_dev_type << ") which was not found in the target map.\n";
341+
msg << "Availible targets: \n";
342+
for (auto target : targets_) {
343+
msg << " " << target.first << "-> " << target.second << "\n";
344+
}
345+
LOG(FATAL) << msg.str();
332346
}
347+
348+
std::cout << "DeviceName: " << call_dev_name << std::endl;
333349
target = targets_[call_dev_type];
350+
std::cout << "Target: " << target << std::endl;
334351
}
335352

336353
CCacheKey key = CCacheKey(func, target);

src/relay/backend/te_compiler_cache.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,13 @@ class ScheduleBuilder : public backend::MemoizedExprTranslator<Array<te::Tensor>
163163
candidate_name = truncated_name.str();
164164
}
165165

166-
auto prim_fn_name = renamer(candidate_name);
166+
// NB(@jroesch): unfortunately the graph runtime deals with copy in
167+
// a totally hacky way, we really need to rectify this but this will
168+
// have to work for now.
169+
std::string prim_fn_name = candidate_name;
170+
if (prim_fn_name != "__copy") {
171+
prim_fn_name = renamer(prim_fn_name);
172+
}
167173
auto prim_fn_var = GlobalVar(prim_fn_name);
168174
prim_fn_var->checked_type_ = prim_func->checked_type();
169175

src/runtime/graph_executor/graph_executor.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,7 @@ GraphExecutor::CreateTVMOp(const TVMOpParam& param, const std::vector<DLTensor>&
414414
}
415415
}
416416

417+
std::cout << "Executing: " << param.func_name << std::endl;
417418
if (param.func_name == "__nop") {
418419
return {[]() {}, arg_ptr};
419420
} else if (param.func_name == "__copy") {
@@ -422,6 +423,7 @@ GraphExecutor::CreateTVMOp(const TVMOpParam& param, const std::vector<DLTensor>&
422423
auto fexec = [arg_ptr]() {
423424
DLTensor* from = static_cast<DLTensor*>(arg_ptr->arg_values[0].v_handle);
424425
DLTensor* to = static_cast<DLTensor*>(arg_ptr->arg_values[1].v_handle);
426+
std::cout << "from: " << from->device.device_type << "to: " << to->device.device_type << std::endl;
425427
TVM_CCALL(TVMArrayCopyFromTo(from, to, nullptr));
426428
};
427429
return {fexec, arg_ptr};

tests/python/relay/test_pass_annotation.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def check_graph_executor(
4242
device_index = graph_json["attrs"]["device_index"][1]
4343
assert device_index == expected_index
4444
mod = graph_executor.create(graph, lib, contexts)
45+
import pdb; pdb.set_trace()
4546
mod.set_input(**new_params)
4647
mod.run()
4748
res = mod.get_output(0).asnumpy()

0 commit comments

Comments
 (0)