Skip to content

Commit 537e87f

Browse files
committed
[OpenCL][Texture] Improved texture memmory planning and runtime memory allocation
Motivated form the fact that textures can be allocated over a clBuffer object and the size of backing clBuffer can be computed based on hardware image pitch alignment. This optimizes the overall memory allocation on device and helps greately the models with large memory requirements. Improvised the graph memory planner to not differentiate buffer and texture storage tokens and reuse them across. The texture pool in OpenCL runtime is rebranded as memory pool that handles allocation for both buffer and image objects. NDArray to DeviceAPI interface is extended with AllocDataSpaceView and FreeDataSpaceView. These new API's acommodates accessing same physical memory as clBuffer / clImage objects. * MemoryPool test cases and lint errors. * test cases and fallback support. * bug fix and cpp-runtime tests cases for texture views. * various cl device info organized * fix graph plan memory bug and correct the testcase. * device attribute handling * Some fallback for texture plan on devices w/o cl_khr_image2d_from_buffer * Memory Manager Move the VM memory manager to the runtime level. Use this memory manager for graph runtime. * Resolve conflicts for VerifyDataType and Buffer * review comments
1 parent e754bc2 commit 537e87f

34 files changed

+941
-818
lines changed

apps/android_camera/app/src/main/jni/tvm_runtime.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,10 @@
6161
#include "../src/runtime/workspace_pool.cc"
6262

6363
#ifdef TVM_OPENCL_RUNTIME
64+
#include "../src/runtime/opencl/memory_pool.cc"
6465
#include "../src/runtime/opencl/opencl_device_api.cc"
6566
#include "../src/runtime/opencl/opencl_module.cc"
6667
#include "../src/runtime/opencl/opencl_wrapper/opencl_wrapper.cc"
67-
#include "../src/runtime/opencl/texture_pool.cc"
6868
#include "../src/runtime/source_utils.cc"
6969
#endif
7070

apps/android_deploy/app/src/main/jni/tvm_runtime.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,5 @@
4949
#include "../src/runtime/opencl/opencl_device_api.cc"
5050
#include "../src/runtime/opencl/opencl_module.cc"
5151
#include "../src/runtime/opencl/opencl_wrapper/opencl_wrapper.cc"
52-
#include "../src/runtime/opencl/texture_pool.cc"
5352
#include "../src/runtime/source_utils.cc"
5453
#endif

apps/android_rpc/app/src/main/jni/tvm_runtime.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@
6666
#include "../src/runtime/opencl/opencl_device_api.cc"
6767
#include "../src/runtime/opencl/opencl_module.cc"
6868
#include "../src/runtime/opencl/opencl_wrapper/opencl_wrapper.cc"
69-
#include "../src/runtime/opencl/texture_pool.cc"
7069
#include "../src/runtime/source_utils.cc"
7170
#endif
7271

include/tvm/runtime/device_api.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ enum DeviceAttrKind : int {
5050
kApiVersion = 11,
5151
kDriverVersion = 12,
5252
kL2CacheSizeBytes = 13,
53+
kImagePitchAlignment = 14
5354
};
5455

5556
#ifdef TVM_KALLOC_ALIGNMENT
@@ -133,12 +134,32 @@ class TVM_DLL DeviceAPI {
133134
*/
134135
virtual void* AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType dtype,
135136
Optional<String> mem_scope = NullOpt);
137+
138+
/*!
139+
* \brief Create a new view with given spec over existing tensor.
140+
* \param dev The device device to perform operation.
141+
* \param data The source array.
142+
* \param shape The shape of allocated tensor.
143+
* \param dtype The type of elements.
144+
* \param mem_scope The memory scope of allocated tensor.
145+
* \return The allocated device pointer.
146+
*/
147+
virtual void* AllocDataSpaceView(Device dev, void* data, ShapeTuple shape, DLDataType dtype,
148+
Optional<String> mem_scope = NullOpt);
136149
/*!
137150
* \brief Free a data space on device.
138151
* \param dev The device device to perform operation.
139152
* \param ptr The data space.
140153
*/
141154
virtual void FreeDataSpace(Device dev, void* ptr) = 0;
155+
156+
/*!
157+
* \brief Free a view data space on device.
158+
* \param dev The device device to perform operation.
159+
* \param ptr The data space view.
160+
*/
161+
virtual void FreeDataSpaceView(Device dev, void* ptr);
162+
142163
/*!
143164
* \brief copy data from one place to another
144165
* \note This API is designed to support special memory with shape dependent layout.

include/tvm/runtime/memory/memory_manager.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,13 @@ class Storage : public ObjectRef {
160160
};
161161

162162
} // namespace memory
163+
164+
using memory::Allocator;
165+
using memory::AllocatorType;
166+
using memory::Buffer;
167+
using memory::MemoryManager;
168+
using memory::StorageObj;
169+
163170
} // namespace runtime
164171
} // namespace tvm
165172

include/tvm/runtime/ndarray.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,9 +129,11 @@ class NDArray : public ObjectRef {
129129
* \brief Create a NDArray that shares the data memory with the current one.
130130
* \param shape The shape of the new array.
131131
* \param dtype The data type of the new array.
132+
* \param mem_scope The memory scope of the array.
132133
* \note The memory size of new array must be smaller than the current one.
133134
*/
134-
TVM_DLL NDArray CreateView(ShapeTuple shape, DLDataType dtype);
135+
TVM_DLL NDArray CreateView(ShapeTuple shape, DLDataType dtype,
136+
Optional<String> mem_scope = NullOpt);
135137
/*!
136138
* \brief Create a reference view of NDArray that
137139
* represents as DLManagedTensor.

src/relay/backend/graph_plan_memory.cc

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,16 @@ class StorageAllocator : public StorageAllocaBaseVisitor {
229229
VLOG_CONTEXT << "StorageAllocator";
230230
VLOG(1) << "planning:" << std::endl << PrettyPrint(func);
231231
prototype_ = StorageAllocaInit(&arena_).GetInitTokenMap(func);
232+
// Backup the virtual devices as token reuse might lost the original memory scope
233+
std::unordered_map<const ExprNode*, std::vector<VirtualDevice>> virtual_device_map_;
234+
for (const auto& kv : prototype_) {
235+
std::vector<VirtualDevice> virtual_devices;
236+
virtual_devices.reserve(kv.second.size());
237+
for (StorageToken* tok : kv.second) {
238+
virtual_devices.push_back(tok->virtual_device);
239+
}
240+
virtual_device_map_.insert({kv.first, virtual_devices});
241+
}
232242
this->Run(func);
233243

234244
// The value of smap contains two integer arrays where the first array
@@ -252,9 +262,13 @@ class StorageAllocator : public StorageAllocaBaseVisitor {
252262
}
253263
num_nodes++;
254264
storage_ids.push_back(tok->storage_id);
255-
virtual_devices.push_back(tok->virtual_device);
256265
sid_sizes_byte.push_back(allocator_.GetMemorySize(tok));
257266
}
267+
ICHECK(kv.second.size() == virtual_device_map_[kv.first].size())
268+
<< "Mismatch of tokens and virtual devices";
269+
for (auto vdev : virtual_device_map_[kv.first]) {
270+
virtual_devices.push_back(vdev);
271+
}
258272
auto storage_info = backend::StorageInfo(std::move(storage_ids), std::move(virtual_devices),
259273
std::move(sid_sizes_byte));
260274
smap.Set(GetRef<Expr>(kv.first), storage_info);
@@ -356,34 +370,27 @@ class StorageAllocator : public StorageAllocaBaseVisitor {
356370

357371
class TokenAllocator {
358372
public:
359-
StorageToken* Alloc(StorageToken* proto) {
360-
return Is2DStorage(proto) ? token_2d_.Alloc(proto, storage_ids_++)
361-
: token_1d_.Alloc(proto, storage_ids_++);
362-
}
373+
StorageToken* Alloc(StorageToken* proto) { return token_mixed_.Alloc(proto, storage_ids_++); }
363374
StorageToken* Request(StorageToken* proto) {
364-
StorageToken* token =
365-
Is2DStorage(proto) ? token_2d_.Request(proto) : token_1d_.Request(proto);
375+
StorageToken* token = token_mixed_.Request(proto);
366376
return token ? token : this->Alloc(proto);
367377
}
368-
void CheckForRelease(StorageToken* tok) {
369-
return Is2DStorage(tok) ? token_2d_.CheckForRelease(tok) : token_1d_.CheckForRelease(tok);
370-
}
378+
void CheckForRelease(StorageToken* tok) { return token_mixed_.CheckForRelease(tok); }
371379

372380
size_t GetMemorySize(StorageToken* tok) {
373381
// TODO(amalyshe): figure out who requries sizes and for what
374382
// size in case of texture is not enough - we can return any value if it
375383
// assumed to be used for memory allocatoion or we can return real size
376384
// if it is just for information
377-
return Is2DStorage(tok) ? 0 : token_1d_.GetMemorySize(tok);
385+
return token_mixed_.GetMemorySize(tok);
378386
}
379387
static bool Is2DStorage(StorageToken* tok) {
380388
return relay::Is2DStorage(tok->virtual_device->memory_scope);
381389
}
382390

383391
private:
384392
int64_t storage_ids_{0};
385-
TokenAllocator1D token_1d_;
386-
TokenAllocator2D token_2d_;
393+
TokenAllocatorMixed token_mixed_;
387394
};
388395

389396
private:

0 commit comments

Comments
 (0)