cache context

zhiics · zhiics · commit 465fd1b3a3bb · 2020-08-31T00:17:21.000Z
diff --git a/python/tvm/runtime/vm.py b/python/tvm/runtime/vm.py
@@ -309,11 +309,13 @@ def _setup_ctx(self, ctx, memory_cfg):
         """Init context and allocators."""
         ctxs = ctx
         if not isinstance(ctx, (list, tuple)):
-            assert isinstance(ctx, tvm.runtime.TVMContext)
+            if not isinstance(ctx, tvm.runtime.TVMContext):
+                raise TypeError("ctx is expected to be TVMContex")
             ctxs = [ctx]
-            # CPU is required for executing shape functions
-            if ctx.device_type != tvm.cpu(0).device_type:
-                ctxs.append(tvm.cpu())
+
+        # CPU is required for executing shape functions
+        if not any(c.device_type == tvm.cpu().device_type for c in ctxs):
+            ctxs.append(tvm.cpu())
 
         default_alloc_type = VirtualMachine.POOLED_ALLOCATOR
         if memory_cfg is None:
diff --git a/src/runtime/vm/executable.cc b/src/runtime/vm/executable.cc
@@ -644,7 +644,8 @@ Instruction DeserializeInstruction(const VMInstructionSerializer& instr) {
       return Instruction::AllocClosure(clo_index, num_freevar, free_vars, dst);
     }
     case Opcode::AllocStorage: {
-      DCHECK_GE(instr.fields.size(), 6U);
+      // Number of fields = 7
+      DCHECK_GE(instr.fields.size(), 7U);
       Index allocation_size = instr.fields[0];
       Index alignment = instr.fields[1];
 
diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc
@@ -77,7 +77,7 @@ inline ObjectRef CopyTo(ObjectRef src, const DLContext& ctx) {
     for (size_t i = 0; i < adt.size(); i++) {
       ret.push_back(CopyTo(adt[i], ctx));
     }
-    return ADT(0, ret.begin(), ret.end());
+    return ADT(adt->tag, ret.begin(), ret.end());
   }
 }
 
@@ -161,11 +161,8 @@ PackedFunc VirtualMachine::GetFunction(const std::string& name,
           << "The number of provided parameters doesn't match the number of assigned devices";
       std::vector<ObjectRef> func_args(param_names.size());
       for (int i = 1; i < args.size(); ++i) {
-        TVMContext ctx;
-        int device_type = vm_func.params_device_type[i - 1];
-        ctx.device_type = DLDeviceType(device_type);
-        // TODO(zhiics) Use virtual device id
-        ctx.device_id = 0;
+        Index device_type = vm_func.params_device_type[i - 1];
+        DLContext ctx = GetContext(device_type);
         ObjectRef obj = CopyTo(args[i], ctx);
         func_args[i - 1] = obj;
       }
@@ -178,15 +175,13 @@ PackedFunc VirtualMachine::GetFunction(const std::string& name,
   }
 }
 
-TVMContext VirtualMachine::GetContext(Index device_type) const {
-  CHECK(!ctxs_.empty()) << "Context has not been initialized yet.";
-
-  const auto& cit = std::find_if(ctxs_.begin(), ctxs_.end(), [&device_type](const TVMContext& c) {
-    return device_type == static_cast<Index>(c.device_type);
-  });
+inline TVMContext VirtualMachine::GetContext(Index device_type) const {
+  CHECK_GE(ctxs_.size(), device_type) << "ctxs_ list doesn't contain device:" << device_type;
 
-  CHECK(cit != ctxs_.end()) << "device type " << device_type << " not found int the context list.";
-  return *cit;
+  auto ctx = ctxs_[device_type];
+  CHECK_EQ(static_cast<Index>(ctx.device_type), device_type)
+      << "device type " << device_type << " has not been initialized int the context list.";
+  return ctx;
 }
 
 void VirtualMachine::PushFrame(Index arg_count, Index ret_pc, const VMFunction& vm_func) {
@@ -294,7 +289,14 @@ void VirtualMachine::LoadExecutable(const Executable* exec) {
 void VirtualMachine::Init(const std::vector<TVMContext>& ctxs,
                           const std::vector<AllocatorType>& alloc_types) {
   CHECK_EQ(ctxs.size(), alloc_types.size());
-  ctxs_ = ctxs;
+  // Cache the context
+  for (const auto& it : ctxs) {
+    auto dev_type = static_cast<size_t>(it.device_type);
+    if (ctxs_.size() <= dev_type) {
+      ctxs_.resize(dev_type + 1);
+    }
+    ctxs_[dev_type] = it;
+  }
   for (size_t i = 0; i < ctxs.size(); ++i) {
     auto alloc = MemoryManager::GetOrCreateAllocator(ctxs[i], alloc_types[i]);
     allocators_.emplace(ctxs[i], alloc);
@@ -484,9 +486,7 @@ void VirtualMachine::RunLoop() {
         goto main_loop;
       }
       case Opcode::AllocTensorReg: {
-        DLContext cpu_ctx;
-        cpu_ctx.device_type = kDLCPU;
-        cpu_ctx.device_id = 0;
+        DLContext cpu_ctx = GetContext(static_cast<Index>(kDLCPU));
         auto shape_obj = ReadRegister(instr.alloc_tensor_reg.shape_register);
         NDArray shape_tensor = Downcast<NDArray>(CopyTo(shape_obj, cpu_ctx));
         auto shape = ToShape(shape_tensor);
@@ -566,9 +566,7 @@ void VirtualMachine::RunLoop() {
         }
       }
       case Opcode::ReshapeTensor: {
-        DLContext cpu_ctx;
-        cpu_ctx.device_type = kDLCPU;
-        cpu_ctx.device_id = 0;
+        DLContext cpu_ctx = GetContext(static_cast<Index>(kDLCPU));
         auto tensor_obj = ReadRegister(instr.reshape_tensor.tensor);
         NDArray tensor_arr = Downcast<NDArray>(tensor_obj);
         // Read the shape from shape tensor

Original file line number	Diff line number	Diff line change
`@@ -644,7 +644,8 @@ Instruction DeserializeInstruction(const VMInstructionSerializer& instr) {`
`644`	`644`	`return Instruction::AllocClosure(clo_index, num_freevar, free_vars, dst);`
`645`	`645`	`}`
`646`	`646`	`case Opcode::AllocStorage: {`
`647`		`- DCHECK_GE(instr.fields.size(), 6U);`
	`647`	`+ // Number of fields = 7`
	`648`	`+ DCHECK_GE(instr.fields.size(), 7U);`
`648`	`649`	`Index allocation_size = instr.fields[0];`
`649`	`650`	`Index alignment = instr.fields[1];`
`650`	`651`