Skip to content

Commit 2f5a3aa

Browse files
Lunderbergtrevor-m
authored andcommitted
[Runtime] Driver version + consistent clock speed units (apache#7867)
* Added kDriverVersion to DeviceAttrKind, implemented for VulkanDeviceAPI. The vulkan backend has had inconsistencies that look correlated to drivers used. This will help in collecting information for troubleshooting. * Changed units for OpenCL's clock rate from MHz to kHz, to match Cuda/ROCm. * [Docs][Runtime] Additional documentation for tvm.runtime.Device, DeviceAPI feature matching Primarily documentation, with some changes to the OpenCL DeviceAPI to match available features in cuda/vulkan. * Added CL_TARGET_OPENCL_VERSION definition, for use with unified OpenCL headers. Co-authored-by: Eric Lunderberg <elunderberg@octoml.ai>
1 parent 8cb612f commit 2f5a3aa

File tree

8 files changed

+213
-29
lines changed

8 files changed

+213
-29
lines changed

include/tvm/runtime/device_api.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ enum DeviceAttrKind : int {
4747
kMaxThreadDimensions = 8,
4848
kMaxRegistersPerBlock = 9,
4949
kGcnArch = 10,
50-
kApiVersion = 11
50+
kApiVersion = 11,
51+
kDriverVersion = 12
5152
};
5253

5354
/*! \brief Number of bytes each allocation must align to */

python/tvm/_ffi/runtime_ctypes.py

Lines changed: 148 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,16 @@ def __ne__(self, other):
150150

151151

152152
class Device(ctypes.Structure):
153-
"""TVM device strucure."""
153+
"""TVM device strucure.
154+
155+
Typically constructed using convenience function
156+
:meth:`tvm.runtime.device`.
157+
158+
Exposes uniform interface to device-specific APIs such as CUDA or
159+
OpenCL. Some properties may return None depending on whether an
160+
API exposes that particular property.
161+
162+
"""
154163

155164
_fields_ = [("device_type", ctypes.c_int), ("device_id", ctypes.c_int)]
156165
MASK2STR = {
@@ -205,63 +214,190 @@ def _GetDeviceAttr(self, device_type, device_id, attr_id):
205214

206215
@property
207216
def exist(self):
208-
"""Whether this device exist."""
217+
"""Whether this device exists.
218+
219+
Returns True if TVM has support for the device, if the
220+
physical device is present, and the device is accessible
221+
through appropriate drivers (e.g. cuda/vulkan).
222+
223+
Returns
224+
-------
225+
exist : bool
226+
True if the device exists
227+
228+
"""
209229
return self._GetDeviceAttr(self.device_type, self.device_id, 0) != 0
210230

211231
@property
212232
def max_threads_per_block(self):
213-
"""Maximum number of threads on each block."""
233+
"""Maximum number of threads on each block.
234+
235+
Returns device value for cuda, metal, rocm, opencl, and vulkan
236+
devices. Returns remote device value for RPC devices.
237+
Returns None for all other devices.
238+
239+
Returns
240+
-------
241+
max_threads_per_block : int or None
242+
The number of threads on each block
243+
244+
"""
214245
return self._GetDeviceAttr(self.device_type, self.device_id, 1)
215246

216247
@property
217248
def warp_size(self):
218-
"""Number of threads that executes in concurrent."""
249+
"""Number of threads that execute concurrently.
250+
251+
Returns device value for for cuda, rocm, and vulkan. Returns
252+
1 for metal and opencl devices, regardless of the physical
253+
device. Returns remote device value for RPC devices. Returns
254+
None for all other devices.
255+
256+
Returns
257+
-------
258+
warp_size : int or None
259+
Number of threads that execute concurrently
260+
261+
"""
219262
return self._GetDeviceAttr(self.device_type, self.device_id, 2)
220263

221264
@property
222265
def max_shared_memory_per_block(self):
223-
"""Total amount of shared memory per block in bytes."""
266+
"""Total amount of shared memory per block in bytes.
267+
268+
Returns device value for cuda, rocm, opencl, and vulkan.
269+
Returns remote device value for RPC devices. Returns None for
270+
all other devices.
271+
272+
Returns
273+
-------
274+
max_shared_memory_per_block : int or None
275+
Total amount of shared memory per block in bytes
276+
277+
"""
224278
return self._GetDeviceAttr(self.device_type, self.device_id, 3)
225279

226280
@property
227281
def compute_version(self):
228-
"""Get compute verison number in string.
282+
"""Get compute version number as string.
229283
230-
Currently used to get compute capability of CUDA device.
284+
Returns maximum API version (e.g. CUDA/OpenCL/Vulkan)
285+
supported by the device.
286+
287+
Returns device value for cuda, rocm, opencl, and
288+
vulkan. Returns remote device value for RPC devices. Returns
289+
None for all other devices.
231290
232291
Returns
233292
-------
234-
version : str
293+
version : str or None
235294
The version string in `major.minor` format.
295+
236296
"""
237297
return self._GetDeviceAttr(self.device_type, self.device_id, 4)
238298

239299
@property
240300
def device_name(self):
241-
"""Return the string name of device."""
301+
"""Return the vendor-specific name of device.
302+
303+
Returns device value for cuda, rocm, opencl, and vulkan.
304+
Returns remote device value for RPC devices. Returns None for
305+
all other devices.
306+
307+
Returns
308+
-------
309+
device_name : str or None
310+
The name of the device.
311+
312+
"""
242313
return self._GetDeviceAttr(self.device_type, self.device_id, 5)
243314

244315
@property
245316
def max_clock_rate(self):
246-
"""Return the max clock frequency of device."""
317+
"""Return the max clock frequency of device (kHz).
318+
319+
Returns device value for cuda, rocm, and opencl. Returns
320+
remote device value for RPC devices. Returns None for all
321+
other devices.
322+
323+
Returns
324+
-------
325+
max_clock_rate : int or None
326+
The maximum clock frequency of the device (kHz)
327+
328+
"""
247329
return self._GetDeviceAttr(self.device_type, self.device_id, 6)
248330

249331
@property
250332
def multi_processor_count(self):
251-
"""Return the number of compute units of device."""
333+
"""Return the number of compute units in the device.
334+
335+
Returns device value for cuda, rocm, and opencl. Returns
336+
remote device value for RPC devices. Returns None for all
337+
other devices.
338+
339+
Returns
340+
-------
341+
multi_processor_count : int or None
342+
Thee number of compute units in the device
343+
344+
"""
252345
return self._GetDeviceAttr(self.device_type, self.device_id, 7)
253346

254347
@property
255348
def max_thread_dimensions(self):
256349
"""Return the maximum size of each thread axis
257350
351+
Returns device value for cuda, rocm, opencl, and vulkan.
352+
Returns remote device value for RPC devices. Returns None for
353+
all other devices.
354+
258355
Returns
259356
-------
260-
dims: List of int
357+
dims: List of int, or None
261358
The maximum length of threadIdx.x, threadIdx.y, threadIdx.z
359+
262360
"""
263361
return json.loads(self._GetDeviceAttr(self.device_type, self.device_id, 8))
264362

363+
@property
364+
def api_version(self):
365+
"""Returns version number of the SDK used to compile TVM.
366+
367+
For example, CUDA_VERSION for cuda or VK_HEADER_VERSION for
368+
Vulkan.
369+
370+
Returns device value for cuda, rocm, opencl, and vulkan.
371+
Returns remote device value for RPC devices. Returns None for
372+
all other devices.
373+
374+
Returns
375+
-------
376+
version : int or None
377+
The version of the SDK
378+
379+
"""
380+
return self._GetDeviceAttr(self.device_type, self.device_id, 12)
381+
382+
@property
383+
def driver_version(self):
384+
"""Returns version number of the driver
385+
386+
Returns driver vendor's internal version number.
387+
(e.g. "450.408.256" for nvidia-driver-450)
388+
389+
Returns device value for opencl and vulkan. Returns remote
390+
device value for RPC devices. Returns None for all other
391+
devices.
392+
393+
Returns
394+
-------
395+
version : str or None
396+
The version string in `major.minor.patch` format.
397+
398+
"""
399+
return self._GetDeviceAttr(self.device_type, self.device_id, 12)
400+
265401
def create_raw_stream(self):
266402
"""Create a new runtime stream at the context.
267403

src/runtime/cuda/cuda_device_api.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ class CUDADeviceAPI final : public DeviceAPI {
103103
*rv = CUDA_VERSION;
104104
return;
105105
}
106+
case kDriverVersion:
107+
return;
106108
}
107109
*rv = value;
108110
}

src/runtime/metal/metal_device_api.mm

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@
7777
return;
7878
case kApiVersion:
7979
return;
80+
case kDriverVersion:
81+
return;
8082
}
8183
}
8284
}

src/runtime/opencl/opencl_common.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,17 @@
4040
*/
4141
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
4242

43+
/* Newer releases of OpenCL header files (after May 2018) work with
44+
* any OpenCL version, with an application's target version
45+
* specified. Setting the target version disables APIs from after that
46+
* version, and sets appropriate USE_DEPRECATED macros. The above
47+
* macro for CL_USE_DEPRECATED_OPENCL_1_2_APIS is still needed in case
48+
* we are compiling against the earlier version-specific OpenCL header
49+
* files. This also allows us to expose the OpenCL version through
50+
* tvm.runtime.Device.
51+
*/
52+
#define CL_TARGET_OPENCL_VERSION 120
53+
4354
#ifdef __APPLE__
4455
#include <OpenCL/opencl.h>
4556
#else

src/runtime/opencl/opencl_device_api.cc

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ namespace tvm {
2929
namespace runtime {
3030
namespace cl {
3131

32+
std::string GetPlatformInfo(cl_platform_id pid, cl_platform_info param_name);
33+
std::string GetDeviceInfo(cl_device_id pid, cl_device_info param_name);
34+
3235
OpenCLThreadEntry* OpenCLWorkspace::GetThreadEntry() { return OpenCLThreadEntry::ThreadLocal(); }
3336

3437
OpenCLWorkspace* OpenCLWorkspace::Global() {
@@ -72,20 +75,27 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
7275
*rv = static_cast<int64_t>(value);
7376
break;
7477
}
75-
case kComputeVersion:
76-
return;
77-
case kDeviceName: {
78-
char value[128] = {0};
79-
OPENCL_CALL(
80-
clGetDeviceInfo(devices[index], CL_DEVICE_NAME, sizeof(value) - 1, value, nullptr));
81-
*rv = std::string(value);
78+
case kComputeVersion: {
79+
// String returned is "OpenCL $MAJOR.$MINOR $VENDOR_INFO". To
80+
// match other implementations, we want to return "$MAJOR.$MINOR"
81+
std::string ret = GetDeviceInfo(devices[index], CL_DEVICE_VERSION);
82+
83+
const size_t version_start = 7; // Length of initial "OpenCL " prefix to skip
84+
const size_t version_end = ret.find(' ', version_start);
85+
*rv = ret.substr(version_start, version_end - version_start);
8286
break;
8387
}
88+
return;
89+
case kDeviceName:
90+
*rv = GetDeviceInfo(devices[index], CL_DEVICE_NAME);
91+
break;
8492
case kMaxClockRate: {
8593
cl_uint value;
8694
OPENCL_CALL(clGetDeviceInfo(devices[index], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(cl_uint),
8795
&value, nullptr));
88-
*rv = static_cast<int32_t>(value);
96+
// OpenCL returns the clock rate in MHz, while CUDA/ROCm return the
97+
// clock rate in kHz. Converting to the same units for each.
98+
*rv = static_cast<int32_t>(value * 1000);
8999
break;
90100
}
91101
case kMultiProcessorCount: {
@@ -109,8 +119,17 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
109119
return;
110120
case kGcnArch:
111121
return;
112-
case kApiVersion:
113-
return;
122+
case kApiVersion: {
123+
*rv = CL_TARGET_OPENCL_VERSION;
124+
break;
125+
}
126+
case kDriverVersion: {
127+
char value[128] = {0};
128+
OPENCL_CALL(
129+
clGetDeviceInfo(devices[index], CL_DRIVER_VERSION, sizeof(value) - 1, value, nullptr));
130+
*rv = std::string(value);
131+
break;
132+
}
114133
}
115134
}
116135

src/runtime/rocm/rocm_device_api.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,8 @@ class ROCMDeviceAPI final : public DeviceAPI {
120120
*rv = HIP_VERSION;
121121
return;
122122
}
123+
case kDriverVersion:
124+
return;
123125
}
124126
*rv = value;
125127
}

src/runtime/vulkan/vulkan.cc

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <tvm/runtime/device_api.h>
2323
#include <tvm/runtime/registry.h>
2424
#include <vulkan/vulkan.h>
25+
#include <vulkan/vulkan_core.h>
2526

2627
#include <array>
2728
#include <cstring>
@@ -469,11 +470,12 @@ void VulkanDeviceAPI::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
469470
break;
470471
}
471472
case kDeviceName:
472-
return;
473+
*rv = std::string(phy_prop.deviceName);
474+
break;
473475
case kMaxClockRate:
474-
return;
476+
break;
475477
case kMultiProcessorCount:
476-
return;
478+
break;
477479
case kExist:
478480
break;
479481
case kMaxThreadDimensions: {
@@ -487,11 +489,20 @@ void VulkanDeviceAPI::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
487489
break;
488490
}
489491
case kMaxRegistersPerBlock:
490-
return;
492+
break;
491493
case kGcnArch:
492-
return;
494+
break;
493495
case kApiVersion:
494-
return;
496+
*rv = VK_HEADER_VERSION;
497+
break;
498+
case kDriverVersion: {
499+
int64_t value = phy_prop.driverVersion;
500+
std::ostringstream os;
501+
os << VK_VERSION_MAJOR(value) << "." << VK_VERSION_MINOR(value) << "."
502+
<< VK_VERSION_PATCH(value);
503+
*rv = os.str();
504+
break;
505+
}
495506
}
496507
}
497508

0 commit comments

Comments
 (0)