Skip to content

Commit ce915ef

Browse files
authored
[SYCL][USM] Improve USM Allocator. (#2026)
Add ability to use std::allocate_shared. Add equality operators for allocators. Add tests. Disallow device allocations in usm_allocator as there are too many incompatibilities with how C++ allocators are used. Signed-off-by: James Brodman <james.brodman@intel.com>
1 parent a43dcc2 commit ce915ef

File tree

6 files changed

+144
-278
lines changed

6 files changed

+144
-278
lines changed

sycl/include/CL/sycl/usm/usm_allocator.hpp

Lines changed: 40 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -26,112 +26,45 @@ __SYCL_EXPORT void *aligned_alloc(size_t alignment, size_t size,
2626
usm::alloc kind);
2727
__SYCL_EXPORT void free(void *ptr, const context &ctxt);
2828

29-
template <typename T, usm::alloc AllocKind, size_t Alignment = 0>
29+
template <typename T, usm::alloc AllocKind, size_t Alignment = alignof(T)>
3030
class usm_allocator {
3131
public:
3232
using value_type = T;
33-
using pointer = T *;
34-
using const_pointer = const T *;
35-
using reference = T &;
36-
using const_reference = const T &;
33+
using propagate_on_container_copy_assignment = std::true_type;
34+
using propagate_on_container_move_assignment = std::true_type;
35+
using propagate_on_container_swap = std::true_type;
3736

3837
public:
3938
template <typename U> struct rebind {
4039
typedef usm_allocator<U, AllocKind, Alignment> other;
4140
};
4241

43-
usm_allocator() = delete;
44-
usm_allocator(const context &Ctxt, const device &Dev)
42+
static_assert(
43+
AllocKind != usm::alloc::device,
44+
"usm_allocator does not support AllocKind == usm::alloc::device");
45+
46+
usm_allocator() noexcept = delete;
47+
usm_allocator(const context &Ctxt, const device &Dev) noexcept
4548
: MContext(Ctxt), MDevice(Dev) {}
46-
usm_allocator(const queue &Q)
49+
usm_allocator(const queue &Q) noexcept
4750
: MContext(Q.get_context()), MDevice(Q.get_device()) {}
48-
usm_allocator(const usm_allocator &Other)
49-
: MContext(Other.MContext), MDevice(Other.MDevice) {}
50-
51-
/// Constructs an object on memory pointed by Ptr.
52-
///
53-
/// Note: AllocKind == alloc::device is not allowed.
54-
///
55-
/// \param Ptr is a pointer to memory that will be used to construct the
56-
/// object.
57-
/// \param Val is a value to initialize the newly constructed object.
58-
template <
59-
usm::alloc AllocT = AllocKind,
60-
typename std::enable_if<AllocT != usm::alloc::device, int>::type = 0>
61-
void construct(pointer Ptr, const_reference Val) {
62-
new (Ptr) value_type(Val);
63-
}
64-
65-
template <
66-
usm::alloc AllocT = AllocKind,
67-
typename std::enable_if<AllocT == usm::alloc::device, int>::type = 0>
68-
void construct(pointer, const_reference) {
69-
throw feature_not_supported(
70-
"Device pointers do not support construct on host",
71-
PI_INVALID_OPERATION);
72-
}
51+
usm_allocator(const usm_allocator &) noexcept = default;
52+
usm_allocator(usm_allocator &&) noexcept = default;
53+
usm_allocator &operator=(const usm_allocator &) = delete;
54+
usm_allocator &operator=(usm_allocator &&) = default;
7355

74-
/// Destroys an object.
75-
///
76-
/// Note:: AllocKind == alloc::device is not allowed
77-
///
78-
/// \param Ptr is a pointer to memory where the object resides.
79-
template <
80-
usm::alloc AllocT = AllocKind,
81-
typename std::enable_if<AllocT != usm::alloc::device, int>::type = 0>
82-
void destroy(pointer Ptr) {
83-
Ptr->~value_type();
84-
}
85-
86-
template <
87-
usm::alloc AllocT = AllocKind,
88-
typename std::enable_if<AllocT == usm::alloc::device, int>::type = 0>
89-
void destroy(pointer) {
90-
// This method must be a NOP for device pointers.
91-
}
92-
93-
/// Note:: AllocKind == alloc::device is not allowed.
94-
///
95-
/// \param Val is a reference to object.
96-
/// \return an address of the object referenced by Val.
97-
template <
98-
usm::alloc AllocT = AllocKind,
99-
typename std::enable_if<AllocT != usm::alloc::device, int>::type = 0>
100-
pointer address(reference Val) const {
101-
return &Val;
102-
}
103-
104-
template <
105-
usm::alloc AllocT = AllocKind,
106-
typename std::enable_if<AllocT == usm::alloc::device, int>::type = 0>
107-
pointer address(reference) const {
108-
throw feature_not_supported(
109-
"Device pointers do not support address on host", PI_INVALID_OPERATION);
110-
}
111-
112-
template <
113-
usm::alloc AllocT = AllocKind,
114-
typename std::enable_if<AllocT != usm::alloc::device, int>::type = 0>
115-
const_pointer address(const_reference Val) const {
116-
return &Val;
117-
}
118-
119-
template <
120-
usm::alloc AllocT = AllocKind,
121-
typename std::enable_if<AllocT == usm::alloc::device, int>::type = 0>
122-
const_pointer address(const_reference) const {
123-
throw feature_not_supported(
124-
"Device pointers do not support address on host", PI_INVALID_OPERATION);
125-
}
56+
template <class U>
57+
usm_allocator(const usm_allocator<U, AllocKind, Alignment> &Other) noexcept
58+
: MContext(Other.MContext), MDevice(Other.MDevice) {}
12659

12760
/// Allocates memory.
12861
///
12962
/// \param NumberOfElements is a count of elements to allocate memory for.
130-
pointer allocate(size_t NumberOfElements) {
63+
T *allocate(size_t NumberOfElements) {
13164

132-
auto Result = reinterpret_cast<pointer>(
65+
auto Result = reinterpret_cast<T *>(
13366
aligned_alloc(getAlignment(), NumberOfElements * sizeof(value_type),
134-
MDevice, MContext, AllocKind));
67+
MDevice, MContext, AllocKind));
13568
if (!Result) {
13669
throw memory_allocation_error();
13770
}
@@ -142,24 +75,32 @@ class usm_allocator {
14275
///
14376
/// \param Ptr is a pointer to memory being deallocated.
14477
/// \param Size is a number of elements previously passed to allocate.
145-
void deallocate(pointer Ptr, size_t) {
78+
void deallocate(T *Ptr, size_t) {
14679
if (Ptr) {
14780
free(Ptr, MContext);
14881
}
14982
}
15083

151-
private:
152-
constexpr size_t getAlignment() const {
153-
/*
154-
// This form might be preferable if the underlying implementation
155-
// doesn't do the right thing when given 0 for alignment
156-
return ((Alignment == 0)
157-
? alignof(value_type)
158-
: Alignment);
159-
*/
160-
return Alignment;
84+
template <class U, usm::alloc AllocKindU, size_t AlignmentU>
85+
friend bool operator==(const usm_allocator<T, AllocKind, Alignment> &One,
86+
const usm_allocator<U, AllocKindU, AlignmentU> &Two) {
87+
return ((AllocKind == AllocKindU) && (One.MContext == Two.MContext) &&
88+
(One.MDevice == Two.MDevice));
89+
}
90+
91+
template <class U, usm::alloc AllocKindU, size_t AlignmentU>
92+
friend bool operator!=(const usm_allocator<T, AllocKind, Alignment> &One,
93+
const usm_allocator<U, AllocKindU, AlignmentU> &Two) {
94+
return !((AllocKind == AllocKindU) && (One.MContext == Two.MContext) &&
95+
(One.MDevice == Two.MDevice));
16196
}
16297

98+
private:
99+
constexpr size_t getAlignment() const { return Alignment; }
100+
101+
template <class U, usm::alloc AllocKindU, size_t AlignmentU>
102+
friend class usm_allocator;
103+
163104
const context MContext;
164105
const device MDevice;
165106
};

sycl/test/usm/allocator_equal.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// piextUSM*Alloc functions for CUDA are not behaving as described in
2+
// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
3+
// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
4+
//
5+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
6+
// RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
7+
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
8+
// RUN: %GPU_RUN_PLACEHOLDER %t1.out
9+
10+
//==---------- allocator_equal.cpp - Allocator Equality test ---------------==//
11+
//
12+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
13+
// See https://llvm.org/LICENSE.txt for license information.
14+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
15+
//
16+
//===----------------------------------------------------------------------===//
17+
18+
#include <CL/sycl.hpp>
19+
20+
#include <cassert>
21+
22+
using namespace cl::sycl;
23+
24+
int main() {
25+
queue q;
26+
auto dev = q.get_device();
27+
auto ctxt = q.get_context();
28+
29+
queue q2;
30+
auto dev2 = q2.get_device();
31+
auto ctxt2 = q2.get_context();
32+
33+
// Test allocator equality
34+
if (dev.get_info<info::device::usm_host_allocations>()) {
35+
usm_allocator<int, usm::alloc::host> alloc1(ctxt, dev);
36+
usm_allocator<int, usm::alloc::host> alloc2(q);
37+
38+
assert((alloc1 == alloc2) && "Allocators should be equal.");
39+
40+
usm_allocator<int, usm::alloc::host, 8> alloc3(ctxt, dev);
41+
usm_allocator<int, usm::alloc::host, 16> alloc4(q);
42+
43+
assert((alloc1 == alloc2) && "Allocators should be equal.");
44+
}
45+
46+
if (dev.get_info<info::device::usm_shared_allocations>() &&
47+
dev.get_info<info::device::usm_host_allocations>()) {
48+
usm_allocator<int, usm::alloc::shared> alloc1(ctxt, dev);
49+
usm_allocator<int, usm::alloc::host> alloc2(ctxt, dev);
50+
51+
assert((alloc1 != alloc2) && "Allocators should NOT be equal.");
52+
}
53+
54+
return 0;
55+
}

sycl/test/usm/allocator_shared.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
2+
// RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
3+
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t1.out
5+
6+
//==-------- allocator_shared.cpp - Allocate Shared test -------------------==//
7+
//
8+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9+
// See https://llvm.org/LICENSE.txt for license information.
10+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include <CL/sycl.hpp>
15+
16+
#include <cassert>
17+
#include <memory>
18+
19+
using namespace cl::sycl;
20+
21+
int main() {
22+
queue q;
23+
auto dev = q.get_device();
24+
auto ctxt = q.get_context();
25+
26+
// Test ability to create a shared pointer.
27+
if (dev.get_info<info::device::usm_host_allocations>()) {
28+
usm_allocator<int, usm::alloc::host> alloc(ctxt, dev);
29+
auto ptr1 = std::allocate_shared<int>(alloc);
30+
31+
// Test construction
32+
auto ptr2 = std::allocate_shared<int>(alloc, 42);
33+
assert((*ptr2 == 42) && "Host construct passed.");
34+
}
35+
36+
if (dev.get_info<info::device::usm_shared_allocations>()) {
37+
usm_allocator<int, usm::alloc::shared> alloc(ctxt, dev);
38+
auto ptr1 = std::allocate_shared<int>(alloc);
39+
40+
// Test construction
41+
auto ptr2 = std::allocate_shared<int>(alloc, 42);
42+
assert((*ptr2 == 42) && "Shared construct passed.");
43+
}
44+
45+
// Device allocations are not supported due to how allocated_shared is
46+
// written.
47+
48+
return 0;
49+
}

sycl/test/usm/allocator_vector.cpp

Lines changed: 0 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,3 @@
1-
// XFAIL: cuda || level0
2-
// piextUSM*Alloc functions for CUDA are not behaving as described in
3-
// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
4-
// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
5-
//
61
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
72
// RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
83
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
@@ -88,43 +83,5 @@ int main() {
8883
return -1;
8984
}
9085

91-
if (dev.get_info<info::device::usm_device_allocations>()) {
92-
usm_allocator<int, usm::alloc::device> alloc(ctxt, dev);
93-
94-
std::vector<int, decltype(alloc)> vec(alloc);
95-
vec.resize(N);
96-
97-
int *res = &vec[0];
98-
int *vals = &vec[0];
99-
100-
auto e0 = q.submit([=](handler &h) {
101-
h.single_task<class baz_init>([=]() {
102-
res[0] = 0;
103-
for (int i = 0; i < N; i++) {
104-
vals[i] = i;
105-
}
106-
});
107-
});
108-
109-
auto e1 = q.submit([=](handler &h) {
110-
h.depends_on(e0);
111-
h.single_task<class baz>([=]() {
112-
for (int i = 1; i < N; i++) {
113-
res[0] += vals[i];
114-
}
115-
});
116-
});
117-
118-
e1.wait();
119-
120-
int answer = (N * (N - 1)) / 2;
121-
int result;
122-
q.memcpy(&result, res, sizeof(int));
123-
q.wait();
124-
125-
if (result != answer)
126-
return -1;
127-
}
128-
12986
return 0;
13087
}

sycl/test/usm/allocator_vector_fail.cpp

Lines changed: 0 additions & 48 deletions
This file was deleted.

0 commit comments

Comments
 (0)