@@ -33,22 +33,22 @@ typedef void (*ur_context_extended_deleter_t)(void *UserData);
33
33
// / with a given device and control access to said device from the user side.
34
34
// / UR API context are objects that are passed to functions, and not bound
35
35
// / to threads.
36
- // / The ur_context_handle_t_ object doesn't implement this behavior. It only
37
- // / holds the HIP context data. The RAII object \ref ScopedContext implements
38
- // / the active context behavior.
39
36
// /
40
- // / <b> Primary vs UserDefined context </b>
37
+ // / Since the ur_context_handle_t can contain multiple devices, and a `hipCtx_t`
38
+ // / refers to only a single device, the `hipCtx_t` is more tightly coupled to a
39
+ // / ur_device_handle_t than a ur_context_handle_t. In order to remove some
40
+ // / ambiguities about the different semantics of ur_context_handle_t s and
41
+ // / native `hipCtx_t`, we access the native `hipCtx_t` solely through the
42
+ // / ur_device_handle_t class, by using the RAII object \ref ScopedDevice, which
43
+ // / sets the active device (by setting the active native `hipCtx_t`).
41
44
// /
42
- // / HIP has two different types of context, the Primary context,
43
- // / which is usable by all threads on a given process for a given device, and
44
- // / the aforementioned custom contexts.
45
- // / The HIP documentation, and performance analysis, suggest using the Primary
46
- // / context whenever possible. The Primary context is also used by the HIP
47
- // / Runtime API. For UR applications to interop with HIP Runtime API, they have
48
- // / to use the primary context - and make that active in the thread. The
49
- // / `ur_context_handle_t_` object can be constructed with a `kind` parameter
50
- // / that allows to construct a Primary or `UserDefined` context, so that
51
- // / the UR object interface is always the same.
45
+ // / <b> Primary vs User-defined `hipCtx_t` </b>
46
+ // /
47
+ // / HIP has two different types of `hipCtx_t`, the Primary context, which is
48
+ // / usable by all threads on a given process for a given device, and the
49
+ // / aforementioned custom `hipCtx_t`s.
50
+ // / The HIP documentation, confirmed with performance analysis, suggest using
51
+ // / the Primary context whenever possible.
52
52
// /
53
53
// / <b> Destructor callback </b>
54
54
// /
@@ -58,6 +58,15 @@ typedef void (*ur_context_extended_deleter_t)(void *UserData);
58
58
// / See proposal for details.
59
59
// / https://github.com/codeplaysoftware/standards-proposals/blob/master/extended-context-destruction/index.md
60
60
// /
61
+ // / <b> Memory Management for Devices in a Context <\b>
62
+ // /
63
+ // / A ur_buffer_ is associated with a ur_context_handle_t_, which may refer to
64
+ // / multiple devices. Therefore the ur_buffer_ must handle a native allocation
65
+ // / for each device in the context. UR is responsible for automatically
66
+ // / handling event dependencies for kernels writing to or reading from the
67
+ // / same ur_buffer_ and migrating memory between native allocations for
68
+ // / devices in the same ur_context_handle_t_ if necessary.
69
+ // /
61
70
struct ur_context_handle_t_ {
62
71
63
72
struct deleter_data {
@@ -69,15 +78,23 @@ struct ur_context_handle_t_ {
69
78
70
79
using native_type = hipCtx_t;
71
80
72
- ur_device_handle_t DeviceId;
81
+ std::vector<ur_device_handle_t > Devices;
82
+ uint32_t NumDevices;
83
+
73
84
std::atomic_uint32_t RefCount;
74
85
75
- ur_context_handle_t_ (ur_device_handle_t DevId)
76
- : DeviceId{DevId}, RefCount{1 } {
77
- urDeviceRetain (DeviceId);
86
+ ur_context_handle_t_ (const ur_device_handle_t *Devs, uint32_t NumDevices)
87
+ : Devices{Devs, Devs + NumDevices}, NumDevices{NumDevices}, RefCount{1 } {
88
+ for (auto &Dev : Devices) {
89
+ urDeviceRetain (Dev);
90
+ }
78
91
};
79
92
80
- ~ur_context_handle_t_ () { urDeviceRelease (DeviceId); }
93
+ ~ur_context_handle_t_ () {
94
+ for (auto &Dev : Devices) {
95
+ urDeviceRelease (Dev);
96
+ }
97
+ }
81
98
82
99
void invokeExtendedDeleters () {
83
100
std::lock_guard<std::mutex> Guard (Mutex);
@@ -92,7 +109,9 @@ struct ur_context_handle_t_ {
92
109
ExtendedDeleters.emplace_back (deleter_data{Function, UserData});
93
110
}
94
111
95
- ur_device_handle_t getDevice () const noexcept { return DeviceId; }
112
+ const std::vector<ur_device_handle_t > &getDevices () const noexcept {
113
+ return Devices;
114
+ }
96
115
97
116
uint32_t incrementReferenceCount () noexcept { return ++RefCount; }
98
117
0 commit comments