Skip to content

Commit 2e571e2

Browse files
committed
* Some fallback for texture plan on devices w/o cl_khr_image2d_from_buffer
1 parent 7d66f71 commit 2e571e2

File tree

5 files changed

+103
-55
lines changed

5 files changed

+103
-55
lines changed

src/relay/backend/graph_plan_memory.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,7 @@ class StorageAllocator : public StorageAllocaBaseVisitor {
256256
sid_sizes_byte.reserve(kv.second.size());
257257

258258
for (StorageToken* tok : kv.second) {
259+
VLOG(1) << "token: " << tok->ToString();
259260
if (tok->is_valid()) {
260261
num_annotated_nodes++;
261262
}

src/runtime/opencl/opencl_common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,10 +417,12 @@ struct BufferDescriptor {
417417
static String ScopeFromMemoryLayout(MemoryLayout mem_scope);
418418

419419
/* clBuffer object */
420+
// buffer should be the first element here
420421
cl_mem buffer{nullptr};
421422
cl::BufferDescriptor* back_buffer{nullptr};
422423
cl_uchar* host_ptr{nullptr};
423424
MemoryLayout layout{MemoryLayout::kBuffer1D};
425+
bool is_compat_view{false};
424426
};
425427
} // namespace cl
426428

src/runtime/opencl/opencl_device_api.cc

Lines changed: 68 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -199,10 +199,7 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
199199
break;
200200
}
201201
case kImagePitchAlignment: {
202-
cl_uint row_pitch;
203-
OPENCL_CALL(clGetDeviceInfo(device_id, CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR, sizeof(row_pitch),
204-
&row_pitch, nullptr));
205-
*rv = static_cast<int64_t>(row_pitch);
202+
*rv = static_cast<int64_t>(device_info[device_id].image_row_align);
206203
break;
207204
}
208205
}
@@ -280,12 +277,45 @@ void* OpenCLWorkspace::AllocCLImage(Device dev, void* back_buffer, size_t width,
280277
return desc;
281278
}
282279

280+
static size_t GetMemObjectSize(Device dev, int ndim, const int64_t* shape, DLDataType dtype) {
281+
DLTensor temp;
282+
temp.data = nullptr;
283+
temp.device = dev;
284+
temp.ndim = ndim;
285+
temp.dtype = dtype;
286+
temp.shape = const_cast<int64_t*>(shape);
287+
temp.strides = nullptr;
288+
temp.byte_offset = 0;
289+
size_t size = GetDataSize(temp);
290+
return size;
291+
}
292+
283293
void* OpenCLWorkspace::AllocDataSpaceView(Device dev, void* data, int ndim, const int64_t* shape,
284294
DLDataType dtype, Optional<String> mem_scope) {
295+
cl::BufferDescriptor* desc = static_cast<cl::BufferDescriptor*>(data);
296+
297+
// Fall back for devices w/o "cl_khr_image2d_from_buffer"
285298
if (!IsBufferToImageSupported(dev.device_id)) {
286-
return data;
299+
cl::BufferDescriptor* ret_desc = desc; // buffer -> buffer
300+
if (!mem_scope.defined() || mem_scope.value() == "global") {
301+
if (desc->layout != cl::BufferDescriptor::MemoryLayout::kBuffer1D) {
302+
// image -> buffer
303+
size_t nbytes = GetMemObjectSize(dev, ndim, shape, dtype);
304+
ret_desc = static_cast<cl::BufferDescriptor*>(
305+
OpenCLWorkspace::AllocCLBuffer(dev, nbytes, kTempAllocaAlignment, dtype));
306+
ret_desc->is_compat_view = true;
307+
}
308+
} else {
309+
// Any -> Image
310+
size_t axis = DefaultTextureLayoutSeparator(ndim, mem_scope.value());
311+
auto texture = ApplyTexture2DFlattening<int64_t>(shape, ndim, axis);
312+
size_t row_pitch = GetRowPitch(dev, texture.width, dtype);
313+
ret_desc = static_cast<cl::BufferDescriptor*>(OpenCLWorkspace::Global()->AllocCLImage(
314+
dev, nullptr, texture.width, texture.height, row_pitch, dtype, mem_scope));
315+
ret_desc->is_compat_view = true;
316+
}
317+
return ret_desc;
287318
}
288-
cl::BufferDescriptor* desc = static_cast<cl::BufferDescriptor*>(data);
289319

290320
if (!mem_scope.defined() || mem_scope.value() == "global") {
291321
if (desc->layout == cl::BufferDescriptor::MemoryLayout::kBuffer1D) {
@@ -298,7 +328,6 @@ void* OpenCLWorkspace::AllocDataSpaceView(Device dev, void* data, int ndim, cons
298328
}
299329
size_t axis = DefaultTextureLayoutSeparator(ndim, mem_scope.value());
300330
auto texture = ApplyTexture2DFlattening<int64_t>(shape, ndim, axis);
301-
302331
size_t row_pitch = GetRowPitch(dev, texture.width, dtype);
303332

304333
cl::BufferDescriptor* back_buffer;
@@ -314,6 +343,24 @@ void* OpenCLWorkspace::AllocDataSpaceView(Device dev, void* data, int ndim, cons
314343
row_pitch, dtype, mem_scope);
315344
}
316345

346+
void OpenCLWorkspace::FreeDataSpaceView(Device dev, void* ptr) {
347+
OPENCL_CALL(clFinish(this->GetQueue(dev)));
348+
auto* desc = static_cast<const cl::BufferDescriptor*>(ptr);
349+
// Handle the fall back
350+
if (!IsBufferToImageSupported(dev.device_id)) {
351+
if (desc->is_compat_view) {
352+
OPENCL_CALL(clReleaseMemObject(desc->buffer));
353+
delete desc;
354+
}
355+
return;
356+
}
357+
358+
if (desc->layout != cl::BufferDescriptor::MemoryLayout::kBuffer1D) {
359+
OPENCL_CALL(clReleaseMemObject(desc->buffer));
360+
delete desc;
361+
}
362+
}
363+
317364
void* OpenCLWorkspace::GetNativePtr(const tvm::runtime::NDArray& narr) {
318365
cl::BufferDescriptor* desc = static_cast<cl::BufferDescriptor*>(narr.operator->()->data);
319366
return desc->host_ptr;
@@ -329,16 +376,22 @@ void OpenCLWorkspace::FreeDataSpace(Device dev, void* ptr) {
329376
clEnqueueUnmapMemObject(this->GetQueue(dev), desc->buffer,
330377
reinterpret_cast<void*>(desc->host_ptr), 0, nullptr, nullptr);
331378
}
332-
if (!IsBufferToImageSupported(dev.device_id)) {
333-
OPENCL_CALL(clReleaseMemObject(desc->buffer));
334-
return;
335-
}
379+
336380
if (desc->back_buffer) {
381+
// 2D Image w/ back buffer allocated from pool
337382
OPENCL_CALL(clReleaseMemObject(desc->buffer));
338383
GetThreadEntry()->mpool.FreeMemory(dev, desc->back_buffer);
339384
delete desc;
340385
} else {
341-
GetThreadEntry()->mpool.FreeMemory(dev, desc);
386+
if (desc->layout == cl::BufferDescriptor::MemoryLayout::kBuffer1D) {
387+
// 1D buffer allocated from pool
388+
GetThreadEntry()->mpool.FreeMemory(dev, desc);
389+
} else if (!IsBufferToImageSupported(dev.device_id)) {
390+
// 2D Image allocated w/o pool
391+
OPENCL_CALL(clReleaseMemObject(desc->buffer));
392+
delete desc;
393+
return;
394+
}
342395
}
343396
}
344397

@@ -349,18 +402,6 @@ void OpenCLWorkspace::FreeCLBuffer(Device dev, void* ptr) {
349402
delete desc;
350403
}
351404

352-
void OpenCLWorkspace::FreeDataSpaceView(Device dev, void* ptr) {
353-
OPENCL_CALL(clFinish(this->GetQueue(dev)));
354-
if (!IsBufferToImageSupported(dev.device_id)) {
355-
return;
356-
}
357-
auto* desc = static_cast<const cl::BufferDescriptor*>(ptr);
358-
if (desc->layout != cl::BufferDescriptor::MemoryLayout::kBuffer1D) {
359-
OPENCL_CALL(clReleaseMemObject(desc->buffer));
360-
delete desc;
361-
}
362-
}
363-
364405
void OpenCLWorkspace::CopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream) {
365406
size_t nbytes = GetDataSize(*from);
366407
ICHECK_EQ(nbytes, GetDataSize(*to));
@@ -593,14 +634,15 @@ void OpenCLWorkspace::Init(const std::string& type_key, const std::string& devic
593634
cl_uint row_pitch;
594635
OPENCL_CALL(clGetDeviceInfo(did, CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR, sizeof(row_pitch),
595636
&row_pitch, nullptr));
637+
if (0 == row_pitch) {
638+
row_pitch = kAllocAlignment; // Fallback
639+
}
596640
dev_info.image_row_align = row_pitch;
597-
598641
size_t reqd_size = 0;
599642
OPENCL_CALL(clGetDeviceInfo(did, CL_DEVICE_EXTENSIONS, 0, nullptr, &reqd_size));
600643
std::vector<char> extn_buf(reqd_size);
601644
OPENCL_CALL(clGetDeviceInfo(did, CL_DEVICE_EXTENSIONS, reqd_size, extn_buf.data(), nullptr));
602645
std::string extensions(extn_buf.data());
603-
LOG(WARNING) << "OpenCL Extensions:" << extensions;
604646

605647
if (extensions.find("cl_khr_image2d_from_buffer") != std::string::npos) {
606648
dev_info.image_from_buffer_support = true;

tests/cpp-runtime/opencl/opencl_texture_pool_test.cc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,10 @@ TEST(OpenCLTexturePool, reuse_buffers_as_texture_back_buffers) {
150150
DLDataType type{kDLFloat, 16, 1};
151151
auto did = t->device.device_id;
152152

153+
if (!workspace->IsBufferToImageSupported(did)) {
154+
return;
155+
}
156+
153157
// Allocate and free 2 buffer
154158
void* buf_data1 = pool.AllocMemory(t->device, 10240, type);
155159
EXPECT_EQ(pool.AllocatedListSize(did), 1);
@@ -191,6 +195,11 @@ TEST(OpenCLTexturePool, views_over_buffers) {
191195
MemoryPoolWrapper pool(t->device, workspace);
192196
DLDataType type{kDLFloat, 16, 1};
193197
auto did = t->device.device_id;
198+
199+
if (!workspace->IsBufferToImageSupported(did)) {
200+
return;
201+
}
202+
194203
Optional<String> mem_scope("global.texture");
195204
Optional<String> global_scope("global");
196205

@@ -245,6 +254,11 @@ TEST(OpenCLTexturePool, views_over_images) {
245254
MemoryPoolWrapper pool(t->device, workspace);
246255
DLDataType type{kDLFloat, 16, 1};
247256
auto did = t->device.device_id;
257+
258+
if (!workspace->IsBufferToImageSupported(did)) {
259+
return;
260+
}
261+
248262
Optional<String> mem_scope("global.texture");
249263
Optional<String> global_scope("global");
250264

tests/cpp/texture_copy_test.cc

Lines changed: 18 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -98,39 +98,28 @@ TEST(TextureCopy, OverwritePoolSubview) {
9898
static_cast<float*>(cpu_pool0->data)[i] = random(mt);
9999
}
100100

101-
// Random initialize host array
102-
for (int64_t h = 0; h < shape[0]; h++) {
103-
for (int64_t w = 0; w < shape[1]; w++) {
104-
for (int64_t rgba = 0; rgba < shape[2]; rgba++) {
105-
static_cast<float*>(cpu_arr0->data)[shape[1] * shape[2] * h + shape[2] * w + rgba] = 1.1f;
106-
}
107-
}
101+
// Random initialize host array storage
102+
for (size_t i = 0; i < size; i++) {
103+
static_cast<float*>(cpu_arr0->data)[i] = random(mt);
108104
}
109105

110-
// Copy to texture pool for initialization
106+
// Loop through pool
111107
cpu_pool0.CopyTo(opencl_txpool);
112-
// Copy host data to subview into texture storage
113-
cpu_arr0.CopyTo(opencl_txarr0);
114-
// Copy modified pool back
115108
opencl_txpool.CopyTo(cpu_pool1);
116109

117-
// Check that modifications to pool follow two dimensional
118-
// strides according to the written texture shape.
119-
for (int64_t h = 0; h < shape_pool[0]; h++) {
120-
for (int64_t w = 0; w < shape_pool[1]; w++) {
121-
for (int64_t rgba = 0; rgba < shape_pool[2]; rgba++) {
122-
size_t i = shape_pool[1] * shape_pool[2] * h + shape_pool[2] * w + rgba;
123-
if (h < shape[0] && w < shape[1] && rgba < shape[2]) {
124-
size_t j = shape[1] * shape[2] * h + shape[2] * w + rgba;
125-
ICHECK_LT(std::fabs(static_cast<float*>(cpu_pool1->data)[i] -
126-
static_cast<float*>(cpu_arr0->data)[j]),
127-
1e-5);
128-
} else {
129-
ICHECK_LT(std::fabs(static_cast<float*>(cpu_pool1->data)[i] -
130-
static_cast<float*>(cpu_pool0->data)[i]),
131-
1e-5);
132-
}
133-
}
134-
}
110+
for (size_t i = 0; i < size_pool; i++) {
111+
ICHECK_LT(std::fabs(static_cast<float*>(cpu_pool0->data)[i] -
112+
static_cast<float*>(cpu_pool1->data)[i]),
113+
1e-5);
114+
}
115+
116+
// Loop through view
117+
cpu_arr0.CopyTo(opencl_txarr0);
118+
opencl_txarr0.CopyTo(cpu_arr1);
119+
120+
for (size_t i = 0; i < size; i++) {
121+
ICHECK_LT(
122+
std::fabs(static_cast<float*>(cpu_arr0->data)[i] - static_cast<float*>(cpu_arr1->data)[i]),
123+
1e-5);
135124
}
136125
}

0 commit comments

Comments
 (0)