apache
diff --git a/‎apps/android_camera/app/src/main/jni/tvm_runtime.h‎
Lines changed: 1 addition & 1 deletion b/‎apps/android_camera/app/src/main/jni/tvm_runtime.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎apps/android_deploy/app/src/main/jni/tvm_runtime.h‎
Lines changed: 1 addition & 1 deletion b/‎apps/android_deploy/app/src/main/jni/tvm_runtime.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎apps/android_rpc/app/src/main/jni/tvm_runtime.h‎
Lines changed: 1 addition & 1 deletion b/‎apps/android_rpc/app/src/main/jni/tvm_runtime.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/tvm/runtime/device_api.h‎
Lines changed: 23 additions & 1 deletion b/‎include/tvm/runtime/device_api.h‎
Lines changed: 23 additions & 1 deletion
diff --git a/‎include/tvm/runtime/ndarray.h‎
Lines changed: 3 additions & 1 deletion b/‎include/tvm/runtime/ndarray.h‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/relay/backend/graph_plan_memory.cc‎
Lines changed: 9 additions & 14 deletions b/‎src/relay/backend/graph_plan_memory.cc‎
Lines changed: 9 additions & 14 deletions
diff --git a/‎src/relay/backend/token_allocator.cc‎
Lines changed: 55 additions & 115 deletions b/‎src/relay/backend/token_allocator.cc‎
Lines changed: 55 additions & 115 deletions
@@ -63,7 +63,7 @@
 #include "../src/runtime/opencl/opencl_device_api.cc"
 #include "../src/runtime/opencl/opencl_module.cc"
 #include "../src/runtime/opencl/opencl_wrapper/opencl_wrapper.cc"
-#include "../src/runtime/opencl/texture_pool.cc"
+#include "../src/runtime/opencl/memory_pool.cc"
 #include "../src/runtime/source_utils.cc"
 #endif
 
 
@@ -48,6 +48,6 @@
 #include "../src/runtime/opencl/opencl_device_api.cc"
 #include "../src/runtime/opencl/opencl_module.cc"
 #include "../src/runtime/opencl/opencl_wrapper/opencl_wrapper.cc"
-#include "../src/runtime/opencl/texture_pool.cc"
+#include "../src/runtime/opencl/memory_pool.cc"
 #include "../src/runtime/source_utils.cc"
 #endif
@@ -65,7 +65,7 @@
 #include "../src/runtime/opencl/opencl_device_api.cc"
 #include "../src/runtime/opencl/opencl_module.cc"
 #include "../src/runtime/opencl/opencl_wrapper/opencl_wrapper.cc"
-#include "../src/runtime/opencl/texture_pool.cc"
+#include "../src/runtime/opencl/memory_pool.cc"
 #include "../src/runtime/source_utils.cc"
 #endif
 
 
@@ -48,7 +48,8 @@ enum DeviceAttrKind : int {
   kMaxRegistersPerBlock = 9,
   kGcnArch = 10,
   kApiVersion = 11,
-  kDriverVersion = 12
+  kDriverVersion = 12,
+  kImagePitchAlignment = 13
 };
 
 #ifdef TVM_KALLOC_ALIGNMENT
@@ -124,12 +125,33 @@ class TVM_DLL DeviceAPI {
    */
   virtual void* AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType dtype,
                                Optional<String> mem_scope = NullOpt);
+
+  /*!
+   * \brief Create a new view with given spec over existing tensor.
+   * \param dev The device device to perform operation.
+   * \param data The source array.
+   * \param ndim The number of dimension of allocated tensor.
+   * \param shape The shape of allocated tensor.
+   * \param dtype The type of elements.
+   * \param mem_scope The memory scope of allocated tensor.
+   * \return The allocated device pointer.
+   */
+  virtual void* AllocDataSpaceView(Device dev, void* data, int ndim, const int64_t* shape,
+                                   DLDataType dtype, Optional<String> mem_scope = NullOpt);
   /*!
    * \brief Free a data space on device.
    * \param dev The device device to perform operation.
    * \param ptr The data space.
    */
   virtual void FreeDataSpace(Device dev, void* ptr) = 0;
+
+  /*!
+   * \brief Free a view data space on device.
+   * \param dev The device device to perform operation.
+   * \param ptr The data space view.
+   */
+  virtual void FreeDataSpaceView(Device dev, void* ptr);
+
   /*!
    * \brief copy data from one place to another
    * \note This API is designed to support special memory with shape dependent layout.
 
@@ -128,9 +128,11 @@ class NDArray : public ObjectRef {
    * \brief Create a NDArray that shares the data memory with the current one.
    * \param shape The shape of the new array.
    * \param dtype The data type of the new array.
+   * \param mem_scope The memory scope of the array.
    * \note The memory size of new array must be smaller than the current one.
    */
-  TVM_DLL NDArray CreateView(ShapeTuple shape, DLDataType dtype);
+  TVM_DLL NDArray CreateView(ShapeTuple shape, DLDataType dtype,
+                             Optional<String> mem_scope = NullOpt);
   /*!
    * \brief Create a reference view of NDArray that
    *  represents as DLManagedTensor.
 
@@ -120,6 +120,8 @@ class StorageAllocaBaseVisitor : public transform::DeviceAwareExprVisitor {
  protected:
   /*! \brief internal token map */
   std::unordered_map<const ExprNode*, std::vector<StorageToken*>> token_map_;
+  /*! \brief the virtual device map */
+  std::unordered_map<const ExprNode*, VirtualDevice> virtual_device_map_;
   /*! \brief empty token map */
   const std::vector<StorageToken*> no_tokens_;
 
@@ -246,13 +248,12 @@ class StorageAllocator : public StorageAllocaBaseVisitor {
       sid_sizes_byte.reserve(kv.second.size());
 
       for (StorageToken* tok : kv.second) {
-        VLOG(1) << "token: " << tok->ToString();
         if (tok->is_valid()) {
           num_annotated_nodes++;
         }
         num_nodes++;
         storage_ids.push_back(tok->storage_id);
-        virtual_devices.push_back(tok->virtual_device);
+        virtual_devices.push_back(virtual_device_map_[kv.first]);
         sid_sizes_byte.push_back(allocator_.GetMemorySize(tok));
       }
       auto storage_info = backend::StorageInfo(std::move(storage_ids), std::move(virtual_devices),
@@ -293,6 +294,7 @@ class StorageAllocator : public StorageAllocaBaseVisitor {
       }
     }
     token_map_[op] = tokens;
+    virtual_device_map_[op] = virtual_device;
   }
 
   // Mark op to reuse the input_token
@@ -356,34 +358,27 @@ class StorageAllocator : public StorageAllocaBaseVisitor {
 
   class TokenAllocator {
    public:
-    StorageToken* Alloc(StorageToken* proto) {
-      return Is2DStorage(proto) ? token_2d_.Alloc(proto, storage_ids_++)
-                                : token_1d_.Alloc(proto, storage_ids_++);
-    }
+    StorageToken* Alloc(StorageToken* proto) { return token_mixed_.Alloc(proto, storage_ids_++); }
     StorageToken* Request(StorageToken* proto) {
-      StorageToken* token =
-          Is2DStorage(proto) ? token_2d_.Request(proto) : token_1d_.Request(proto);
+      StorageToken* token = token_mixed_.Request(proto);
       return token ? token : this->Alloc(proto);
     }
-    void CheckForRelease(StorageToken* tok) {
-      return Is2DStorage(tok) ? token_2d_.CheckForRelease(tok) : token_1d_.CheckForRelease(tok);
-    }
+    void CheckForRelease(StorageToken* tok) { return token_mixed_.CheckForRelease(tok); }
 
     size_t GetMemorySize(StorageToken* tok) {
       // TODO(amalyshe): figure out who requries sizes and for what
       // size in case of texture is not enough - we can return any value if it
       // assumed to be used for memory allocatoion or we can return real size
       // if it is just for information
-      return Is2DStorage(tok) ? 0 : token_1d_.GetMemorySize(tok);
+      return token_mixed_.GetMemorySize(tok);
     }
     static bool Is2DStorage(StorageToken* tok) {
       return relay::Is2DStorage(tok->virtual_device->memory_scope);
     }
 
    private:
     int64_t storage_ids_{0};
-    TokenAllocator1D token_1d_;
-    TokenAllocator2D token_2d_;
+    TokenAllocatorMixed token_mixed_;
   };
 
  private:
 
@@ -31,22 +31,30 @@
 
 namespace tvm {
 namespace relay {
+constexpr auto Is2DStorage = runtime::IsTextureStorage;
 
-size_t TokenAllocator1D::GetMemorySize(StorageToken* prototype) {
+/*
+ * Mixed mode memory allocator
+ */
+size_t TokenAllocatorMixed::GetMemorySize(StorageToken* prototype) {
   TensorType ttype = prototype->ttype;
   ICHECK(ttype.defined());
   size_t size = 1;
-  for (IndexExpr dim : ttype->shape) {
-    const int64_t* pval = tir::as_const_int(dim);
-    ICHECK(pval != nullptr) << "Cannot allocate memory symbolic tensor shape " << ttype->shape;
-    ICHECK_GE(*pval, 0) << "Cannot allocate memory for tensor with negative shape" << *pval;
-    size *= static_cast<size_t>(pval[0]);
+  if (relay::Is2DStorage(prototype->virtual_device->memory_scope)) {
+    size = GetSize2D(prototype);
+  } else {
+    for (IndexExpr dim : ttype->shape) {
+      const int64_t* pval = tir::as_const_int(dim);
+      ICHECK(pval != nullptr) << "Cannot allocate memory symbolic tensor shape " << ttype->shape;
+      ICHECK_GE(*pval, 0) << "Cannot allocate memory for tensor with negative shape" << *pval;
+      size *= static_cast<size_t>(pval[0]);
+    }
+    size *= DivRoundUp(ttype->dtype.bits() * ttype->dtype.lanes(), 8);
   }
-  size *= DivRoundUp(ttype->dtype.bits() * ttype->dtype.lanes(), 8);
   return size;
 }
 
-StorageToken* TokenAllocator1D::Request(StorageToken* prototype) {
+StorageToken* TokenAllocatorMixed::Request(StorageToken* prototype) {
   // calculate the size;
   size_t size = GetMemorySize(prototype);
   // search memory block in [size / match_range_, size * match_range_)
@@ -56,145 +64,77 @@ StorageToken* TokenAllocator1D::Request(StorageToken* prototype) {
   auto begin = free_.lower_bound(size / match_range_);
   auto mid = free_.lower_bound(size);
   auto end = free_.upper_bound(size * match_range_);
+  auto prototype_keys = prototype->virtual_device->target->GetKeys();
+  bool is_prototype_adreno =
+      std::find(prototype_keys.begin(), prototype_keys.end(), "adreno") != prototype_keys.end();
   // search for memory blocks larger than requested
   for (auto it = mid; it != end; ++it) {
     StorageToken* tok = it->second;
-    if (!tok->is_compatible(*prototype)) continue;
-    ICHECK_EQ(tok->ref_counter, 0);
-    // Use exect matching strategy
-    tok->max_bytes = std::max(size, tok->max_bytes);
-    tok->ref_counter = prototype->ref_counter;
-    // find a exact match, erase from map and return
-    free_.erase(it);
-    return tok;
+    // TODO(Siva): We need a additional ways of comparing VirtualDevice
+    auto tok_keys = tok->virtual_device->target->GetKeys();
+    bool is_tok_adreno = std::find(tok_keys.begin(), tok_keys.end(), "adreno") != tok_keys.end();
+
+    if (tok->is_compatible(*prototype) || (is_prototype_adreno && is_tok_adreno)) {
+      ICHECK_EQ(tok->ref_counter, 0);
+      // Use exect matching strategy
+      tok->max_bytes = std::max(size, tok->max_bytes);
+      tok->ref_counter = prototype->ref_counter;
+      // find a exact match, erase from map and return
+      free_.erase(it);
+      return tok;
+    }
   }
   // then search for memory blocks smaller than requested space
   for (auto it = mid; it != begin;) {
     --it;
     StorageToken* tok = it->second;
-    if (!tok->is_compatible(*prototype)) continue;
-    ICHECK_EQ(tok->ref_counter, 0);
-    // Use exect matching strategy
-    tok->max_bytes = std::max(size, tok->max_bytes);
-    tok->ref_counter = prototype->ref_counter;
-    // erase from map and return
-    free_.erase(it);
-    return tok;
+    auto tok_keys = tok->virtual_device->target->GetKeys();
+    bool is_tok_adreno = std::find(tok_keys.begin(), tok_keys.end(), "adreno") != tok_keys.end();
+    if (tok->is_compatible(*prototype) || (is_prototype_adreno && is_tok_adreno)) {
+      ICHECK_EQ(tok->ref_counter, 0);
+      // Use exect matching strategy
+      tok->max_bytes = std::max(size, tok->max_bytes);
+      tok->ref_counter = prototype->ref_counter;
+      // erase from map and return
+      free_.erase(it);
+      return tok;
+    }
   }
   return nullptr;
 }
 
-StorageToken* TokenAllocator1D::Alloc(StorageToken* prototype, int64_t storage_id) {
+StorageToken* TokenAllocatorMixed::Alloc(StorageToken* prototype, int64_t storage_id) {
   size_t size = GetMemorySize(prototype);
   prototype->max_bytes = size;
   prototype->storage_id = storage_id;
   data_.push_back(prototype);
   return prototype;
 }
 
-void TokenAllocator1D::CheckForRelease(StorageToken* tok) {
+void TokenAllocatorMixed::CheckForRelease(StorageToken* tok) {
   ICHECK_GE(tok->storage_id, 0);
   ICHECK_GE(tok->ref_counter, 0);
   if (tok->ref_counter == 0) {
     free_.insert({tok->max_bytes, tok});
   }
 }
 
-StorageToken* TokenAllocator2D::Request(StorageToken* prototype) {
-  auto shape = GetSize2D(prototype);
-  const int64_t max_ratio = 5;
-  int64_t min_added_size_x = std::numeric_limits<int64_t>::max();
-  int64_t min_added_size_y = std::numeric_limits<int64_t>::max();
-  int64_t min_wasted_size_x = std::numeric_limits<int64_t>::max();
-  int64_t min_wasted_size_y = std::numeric_limits<int64_t>::max();
-  int64_t best_storage_id = -1;
-  MemBlock new_mem;
-  for (int64_t free_id : free_list_) {
-    MemBlock& cached = blocks_[free_id];
-    // Can only reuse texture 2d blocks of the same type
-    if (cached.token_->ttype->dtype != prototype->ttype->dtype) {
-      continue;
-    }
-    // Can only reuse texture 2d blocks of the same scope
-    // Because reusing textures with different memory scope may lead to
-    // accuracy issues, because the data will be packed in a different way for
-    // different memory scopes.
-    if (cached.token_->virtual_device->memory_scope != prototype->virtual_device->memory_scope) {
-      continue;
-    }
-    // avoid reusing too small and too big textures
-    if (shape.width / cached.x_ > max_ratio || cached.x_ / shape.width > max_ratio ||
-        shape.height / cached.y_ > max_ratio || cached.y_ / shape.height > max_ratio) {
-      continue;
-    }
-    int64_t new_width = std::max(cached.x_, shape.width);
-    int64_t new_height = std::max(cached.y_, shape.height);
-    int64_t added_size_x = new_width - cached.x_;
-    int64_t added_size_y = new_height - cached.y_;
-    int64_t wasted_size_x = new_width - shape.width;
-    int64_t wasted_size_y = new_height - shape.height;
-    // Prioritize minimization of added size first, then minimize
-    // wasted size among blocks which would not require expansion
-    if ((min_added_size_x > 0 && added_size_x < min_added_size_x) ||
-        (min_added_size_y > 0 && added_size_y < min_added_size_y) ||
-        (min_added_size_x == added_size_x && wasted_size_x < min_wasted_size_x) ||
-        (min_added_size_y == added_size_y && wasted_size_y < min_wasted_size_y)) {
-      min_added_size_x = added_size_x;
-      min_added_size_y = added_size_y;
-      min_wasted_size_x = wasted_size_x;
-      min_wasted_size_y = wasted_size_y;
-      best_storage_id = free_id;
-      new_mem.x_ = new_width;
-      new_mem.y_ = new_height;
-    }
-  }
-
-  if (min_added_size_x == 0 && min_added_size_y == 0) {
-    // use existing block
-    free_list_.erase(best_storage_id);
-    blocks_[best_storage_id].token_->ref_counter += prototype->ref_counter;
-    return blocks_[best_storage_id].token_;
-  } else if (min_added_size_x <= shape.width || min_added_size_y <= shape.height) {
-    // Reset the reference counter of the now live token
-    free_list_.erase(best_storage_id);
-    new_mem.token_ = prototype;
-    new_mem.token_->ref_counter += 1;
-    new_mem.token_->storage_id = best_storage_id;
-    blocks_[best_storage_id] = new_mem;
-    return new_mem.token_;
-  }
-  return nullptr;
-}
-
-StorageToken* TokenAllocator2D::Alloc(StorageToken* prototype, int64_t storage_id) {
-  auto shape = GetSize2D(prototype);
-  MemBlock block;
-  block.x_ = shape.width;
-  block.y_ = shape.height;
-  prototype->storage_id = storage_id;
-  block.token_ = prototype;
-  blocks_[prototype->storage_id] = block;
-  return prototype;
-}
-
-void TokenAllocator2D::CheckForRelease(StorageToken* tok) {
-  ICHECK_GE(tok->storage_id, 0);
-  ICHECK_GE(tok->ref_counter, 0);
-  if (tok->ref_counter == 0) {
-    free_list_.insert(tok->storage_id);
-  }
-}
-
-runtime::Texture2DShape<int64_t> TokenAllocator2D::GetSize2D(StorageToken* prototype) {
+size_t TokenAllocatorMixed::GetSize2D(StorageToken* prototype) {
   TensorType ttype = prototype->ttype;
   ICHECK(ttype.defined());
-  size_t axis = runtime::DefaultTextureLayoutSeparator(ttype->shape.size(),
-                                                       prototype->virtual_device->memory_scope);
   struct Shape {
     const Array<PrimExpr>& shape;
     int64_t operator[](size_t i) const { return *tir::as_const_int(shape[i]); }
+    int size() { return this->shape.size(); }
   };
-  return runtime::ApplyTexture2DFlattening<int64_t>(Shape{ttype->shape}, ttype->shape.size(), axis);
+  auto shape = Shape{ttype->shape};
+  int image_row_align =
+      prototype->virtual_device->target->GetAttr<Integer>("image_base_address_alignment")
+          .value_or(Integer(64))
+          ->value;
+  return runtime::GetTextureMemorySize<Shape>(shape, ttype->dtype.bits(), ttype->dtype.lanes(),
+                                              prototype->virtual_device->memory_scope,
+                                              image_row_align);
 }
 
 }  // namespace relay