Skip to content

Commit 83bb79e

Browse files
sergey-semenovromanovvlad
authored andcommitted
[SYCL] Reuse user ptr in buffer & fix default memory allocation (#243)
[SYCL] Reuse user ptr if buffer & fix default memory allocation Reuse the pointer provided by the user in the buffer constructor (even if use_host_ptr wasn't specified) if its alignment is sufficient. Use the type-specific aligned_allocator<T> for buffer host memory allocation instead of the non-template buffer_allocator (which is aligned_allocator<char>). Signed-off-by: Sergey Semenov <sergey.semenov@intel.com>
1 parent a480528 commit 83bb79e

File tree

7 files changed

+51
-27
lines changed

7 files changed

+51
-27
lines changed

sycl/include/CL/sycl/accessor.hpp

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -336,11 +336,11 @@ class accessor :
336336
using reference = DataT &;
337337
using const_reference = const DataT &;
338338

339-
template <int Dims = Dimensions>
339+
template <typename AllocatorT, int Dims = Dimensions>
340340
accessor(
341341
enable_if_t<Dims == 0 && ((!IsPlaceH && IsHostBuf) ||
342342
(IsPlaceH && (IsGlobalBuf || IsConstantBuf))),
343-
buffer<DataT, 1>> &BufferRef)
343+
buffer<DataT, 1, AllocatorT>> &BufferRef)
344344
#ifdef __SYCL_DEVICE_ONLY__
345345
: impl(id<AdjustedDim>(), BufferRef.get_range(), BufferRef.MemRange) {
346346
#else
@@ -357,9 +357,9 @@ class accessor :
357357
#endif
358358
}
359359

360-
template <int Dims = Dimensions>
360+
template <typename AllocatorT, int Dims = Dimensions>
361361
accessor(
362-
buffer<DataT, 1> &BufferRef,
362+
buffer<DataT, 1, AllocatorT> &BufferRef,
363363
enable_if_t<Dims == 0 && (!IsPlaceH && (IsGlobalBuf || IsConstantBuf)),
364364
handler> &CommandGroupHandler)
365365
#ifdef __SYCL_DEVICE_ONLY__
@@ -376,11 +376,11 @@ class accessor :
376376
}
377377
#endif
378378

379-
template <int Dims = Dimensions,
379+
template <typename AllocatorT, int Dims = Dimensions,
380380
typename = enable_if_t<
381381
(Dims > 0) && ((!IsPlaceH && IsHostBuf) ||
382382
(IsPlaceH && (IsGlobalBuf || IsConstantBuf)))>>
383-
accessor(buffer<DataT, Dimensions> &BufferRef)
383+
accessor(buffer<DataT, Dimensions, AllocatorT> &BufferRef)
384384
#ifdef __SYCL_DEVICE_ONLY__
385385
: impl(id<Dimensions>(), BufferRef.get_range(), BufferRef.MemRange) {
386386
}
@@ -398,10 +398,11 @@ class accessor :
398398
}
399399
#endif
400400

401-
template <int Dims = Dimensions,
401+
template <typename AllocatorT, int Dims = Dimensions,
402402
typename = enable_if_t<
403403
(Dims > 0) && (!IsPlaceH && (IsGlobalBuf || IsConstantBuf))>>
404-
accessor(buffer<DataT, Dimensions> &BufferRef, handler &CommandGroupHandler)
404+
accessor(buffer<DataT, Dimensions, AllocatorT> &BufferRef,
405+
handler &CommandGroupHandler)
405406
#ifdef __SYCL_DEVICE_ONLY__
406407
: impl(id<AdjustedDim>(), BufferRef.get_range(), BufferRef.MemRange) {
407408
}
@@ -416,12 +417,12 @@ class accessor :
416417
}
417418
#endif
418419

419-
template <int Dims = Dimensions,
420+
template <typename AllocatorT, int Dims = Dimensions,
420421
typename = enable_if_t<
421422
(Dims > 0) && ((!IsPlaceH && IsHostBuf) ||
422423
(IsPlaceH && (IsGlobalBuf || IsConstantBuf)))>>
423-
accessor(buffer<DataT, Dimensions> &BufferRef, range<Dimensions> AccessRange,
424-
id<Dimensions> AccessOffset = {})
424+
accessor(buffer<DataT, Dimensions, AllocatorT> &BufferRef,
425+
range<Dimensions> AccessRange, id<Dimensions> AccessOffset = {})
425426
#ifdef __SYCL_DEVICE_ONLY__
426427
: impl(AccessOffset, AccessRange, BufferRef.MemRange) {
427428
}
@@ -438,11 +439,12 @@ class accessor :
438439
}
439440
#endif
440441

441-
template <int Dims = Dimensions,
442+
template <typename AllocatorT, int Dims = Dimensions,
442443
typename = enable_if_t<
443444
(Dims > 0) && (!IsPlaceH && (IsGlobalBuf || IsConstantBuf))>>
444-
accessor(buffer<DataT, Dimensions> &BufferRef, handler &CommandGroupHandler,
445-
range<Dimensions> AccessRange, id<Dimensions> AccessOffset = {})
445+
accessor(buffer<DataT, Dimensions, AllocatorT> &BufferRef,
446+
handler &CommandGroupHandler, range<Dimensions> AccessRange,
447+
id<Dimensions> AccessOffset = {})
446448
#ifdef __SYCL_DEVICE_ONLY__
447449
: impl(AccessOffset, AccessRange, BufferRef.MemRange) {
448450
}

sycl/include/CL/sycl/buffer.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class queue;
2121
template <int dimensions> class range;
2222

2323
template <typename T, int dimensions = 1,
24-
typename AllocatorT = cl::sycl::buffer_allocator>
24+
typename AllocatorT = cl::sycl::detail::aligned_allocator<T>>
2525
class buffer {
2626
public:
2727
using value_type = T;

sycl/include/CL/sycl/detail/aligned_allocator.hpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,20 @@
1010

1111
#include <CL/cl.h>
1212
#include <CL/sycl/detail/cnri.h>
13+
#include <CL/sycl/detail/common.hpp>
1314
#include <CL/sycl/detail/os_util.hpp>
1415
#include <CL/sycl/range.hpp>
1516

17+
#include <algorithm>
1618
#include <cstring>
1719
#include <cstdlib>
1820
#include <memory>
1921
#include <vector>
2022

2123
namespace cl {
2224
namespace sycl {
23-
template <typename T, size_t Alignment>
24-
class aligned_allocator {
25+
namespace detail {
26+
template <typename T> class aligned_allocator {
2527
public:
2628
using value_type = T;
2729
using pointer = T*;
@@ -30,10 +32,7 @@ class aligned_allocator {
3032
using const_reference = const T&;
3133

3234
public:
33-
template<typename U>
34-
struct rebind {
35-
typedef aligned_allocator<U, Alignment> other;
36-
};
35+
template <typename U> struct rebind { typedef aligned_allocator<U> other; };
3736

3837
// Construct an object
3938
void construct(pointer Ptr, const_reference Val) {
@@ -46,11 +45,15 @@ class aligned_allocator {
4645
pointer address(reference Val) const { return &Val; }
4746
const_pointer address(const_reference Val) { return &Val; }
4847

49-
// Allocate aligned (to Alignment) memory
48+
// Allocate sufficiently aligned memory
5049
pointer allocate(size_t Size) {
51-
Size += Alignment - Size % Alignment;
50+
size_t NumBytes = Size * sizeof(value_type);
51+
const size_t Alignment =
52+
std::max<size_t>(getNextPowerOfTwo(sizeof(value_type)), 64);
53+
NumBytes = ((NumBytes - 1) | (Alignment - 1)) + 1;
54+
5255
pointer Result = reinterpret_cast<pointer>(
53-
detail::OSUtil::alignedAlloc(Alignment, Size * sizeof(value_type)));
56+
detail::OSUtil::alignedAlloc(Alignment, NumBytes));
5457
if (!Result)
5558
throw std::bad_alloc();
5659
return Result;
@@ -65,5 +68,6 @@ class aligned_allocator {
6568
bool operator==(const aligned_allocator&) { return true; }
6669
bool operator!=(const aligned_allocator& rhs) { return false; }
6770
};
71+
} // namespace detail
6872
} // namespace sycl
6973
} // namespace cl

sycl/include/CL/sycl/detail/buffer_impl.hpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <CL/sycl/stl.hpp>
2323
#include <CL/sycl/types.hpp>
2424

25+
#include <cstdint>
2526
#include <functional>
2627
#include <memory>
2728
#include <type_traits>
@@ -35,7 +36,7 @@ class accessor;
3536
template <typename T, int Dimensions, typename AllocatorT> class buffer;
3637
class handler;
3738

38-
using buffer_allocator = aligned_allocator<char, /*Alignment*/64>;
39+
using buffer_allocator = detail::aligned_allocator<char>;
3940

4041
namespace detail {
4142
using EventImplPtr = std::shared_ptr<detail::event_impl>;
@@ -59,7 +60,10 @@ template <typename AllocatorT> class buffer_impl : public SYCLMemObjT {
5960
return;
6061

6162
set_final_data(reinterpret_cast<char *>(HostData));
62-
if (MProps.has_property<property::buffer::use_host_ptr>()) {
63+
size_t RequiredAlignment =
64+
getNextPowerOfTwo(sizeof(typename AllocatorT::value_type));
65+
if (reinterpret_cast<std::uintptr_t>(HostData) % RequiredAlignment == 0 ||
66+
MProps.has_property<property::buffer::use_host_ptr>()) {
6367
MUserPtr = HostData;
6468
return;
6569
}

sycl/include/CL/sycl/detail/common.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,9 @@ template <class T> T createSyclObjFromImpl(decltype(T::impl) ImplObj) {
103103
return T(ImplObj);
104104
}
105105

106+
// Returns the smallest power of two not less than Var
107+
size_t getNextPowerOfTwo(size_t Var);
108+
106109
} // namespace detail
107110
} // namespace sycl
108111
} // namespace cl

sycl/include/CL/sycl/detail/image_impl.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ enum class image_channel_type : unsigned int;
2626
namespace detail {
2727

2828
// utility functions and typedefs for image_impl
29-
using image_allocator = aligned_allocator<byte, /*alignment*/ 64>;
29+
using image_allocator = aligned_allocator<byte>;
3030

3131
// utility function: Returns the Number of Channels for a given Order.
3232
uint8_t getImageNumberChannels(image_channel_order Order);

sycl/source/detail/common.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,17 @@ vector_class<string_class> split_string(const string_class &str,
237237
return result;
238238
}
239239

240+
size_t getNextPowerOfTwo(size_t Var) {
241+
--Var;
242+
Var |= Var >> 1;
243+
Var |= Var >> 2;
244+
Var |= Var >> 4;
245+
Var |= Var >> 8;
246+
Var |= Var >> 16;
247+
Var |= Var >> 32;
248+
return ++Var;
249+
}
250+
240251
} // namespace detail
241252
} // namespace sycl
242253
} // namespace cl

0 commit comments

Comments
 (0)