Skip to content

[AsyncAlloc][SYCL][CUDA][Exp] Initial device side implementation for the sycl_ext_oneapi_async_memory_alloc extension #16900

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 45 commits into from
Mar 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
8ac949b
[AsyncAlloc][SYCL][CUDA][Exp] Initial device side implementation for the
Seanst98 Feb 6, 2025
13cea8d
Address feedback. Replace UR USMPoolSetThresholdExp with USMPoolSetIn…
Seanst98 Mar 5, 2025
553247f
Merge branch 'sycl' into sean/async-alloc
Seanst98 Mar 5, 2025
b261147
Address feedback
Seanst98 Mar 6, 2025
5646474
Merge branch 'sycl' into sean/async-alloc
Seanst98 Mar 6, 2025
c308b22
Merge branch 'sycl' into sean/async-alloc
Seanst98 Mar 6, 2025
3d83097
Merge branch 'sycl' into sean/async-alloc
Seanst98 Mar 10, 2025
e332e79
Address feedback
Seanst98 Mar 10, 2025
51db796
Merge branch 'sycl' into sean/async-alloc
Seanst98 Mar 10, 2025
9f32b79
Merge branch 'sycl' into sean/async-alloc
Seanst98 Mar 13, 2025
c9411b9
Address feedback:
Seanst98 Mar 13, 2025
9018de4
Merge branch 'sycl' into sean/async-alloc
Seanst98 Mar 14, 2025
aa8599d
Add symbols dump
Seanst98 Mar 17, 2025
aefd4a2
Merge branch 'sycl' into sean/async-alloc
Seanst98 Mar 17, 2025
7d3155d
Fix self-contained-check
Seanst98 Mar 17, 2025
73772c4
Merge branch 'sycl' into sean/async-alloc
Seanst98 Mar 17, 2025
6b7152c
Add EOF lines
Seanst98 Mar 17, 2025
8e9ea97
Minor style changes
Seanst98 Mar 17, 2025
8fcaa39
Merge branch 'sycl' into sean/async-alloc
Seanst98 Mar 17, 2025
8d90123
Warning fixup
Seanst98 Mar 17, 2025
3e3cda5
Address feedback
Seanst98 Mar 17, 2025
d7f1c82
Remove some redundant includes
Seanst98 Mar 17, 2025
0ddd2d1
Fix missing constructor default
Seanst98 Mar 17, 2025
c452e5a
Add missing include
Seanst98 Mar 17, 2025
5757d13
Test broken build
Seanst98 Mar 17, 2025
4d48509
Test broken build
Seanst98 Mar 17, 2025
00de9a4
Add includes to see if that fixes build
Seanst98 Mar 17, 2025
ac392af
Move memory_pool.cpp to source top level
Seanst98 Mar 17, 2025
8a0d4c3
Undo broken changes
Seanst98 Mar 17, 2025
3780680
Bring back changes. Failure likely CI related
Seanst98 Mar 17, 2025
d40690c
Merge branch 'sycl' into sean/async-alloc
Seanst98 Mar 17, 2025
b4da32b
Modify createSyclObjFromImpl to follow new style
Seanst98 Mar 17, 2025
a78d3a4
Address changes to spec. Add tests
Seanst98 Mar 18, 2025
cbfc355
Merge branch 'sycl' into sean/async-alloc
Seanst98 Mar 19, 2025
f511bd5
Remove erroneous message about a non-error
Seanst98 Mar 19, 2025
1af1e7e
Update API to spec changes
Seanst98 Mar 20, 2025
363bbc4
Merge branch 'sycl' into sean/async-alloc
Seanst98 Mar 20, 2025
83adbe8
Merge branch 'sycl' into sean/async-alloc
Seanst98 Mar 24, 2025
da17dd1
Add const to memory_pool and remove watermark queries/resetters
Seanst98 Mar 26, 2025
2ba3fd9
Merge branch 'sycl' into sean/async-alloc
Seanst98 Mar 26, 2025
9bc5dc9
Update symbols
Seanst98 Mar 26, 2025
e48d88a
Merge branch 'sycl' into sean/async-alloc
Seanst98 Mar 26, 2025
2b81311
Merge branch 'sycl' into sean/async-alloc
Seanst98 Mar 26, 2025
22e6311
Merge branch 'sycl' into sean/async-alloc
Seanst98 Mar 26, 2025
285f593
Merge branch 'sycl' into sean/async-alloc
npmiller Mar 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def AspectExt_oneapi_bindless_images_gather : Aspect<"ext_oneapi_bindless_images
def AspectExt_intel_current_clock_throttle_reasons : Aspect<"ext_intel_current_clock_throttle_reasons">;
def AspectExt_intel_fan_speed : Aspect<"ext_intel_fan_speed">;
def AspectExt_intel_power_limits : Aspect<"ext_intel_power_limits">;
def AspectExt_oneapi_async_memory_alloc : Aspect<"ext_oneapi_async_memory_alloc">;

// Deprecated aspects
def AspectInt64_base_atomics : Aspect<"int64_base_atomics">;
Expand Down Expand Up @@ -161,7 +162,8 @@ def : TargetInfo<"__TestAspectList",
AspectExt_intel_spill_memory_size,
AspectExt_intel_current_clock_throttle_reasons,
AspectExt_intel_fan_speed,
AspectExt_intel_power_limits],
AspectExt_intel_power_limits,
AspectExt_oneapi_async_memory_alloc],
[]>;
// This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT
// match.
Expand Down
18 changes: 18 additions & 0 deletions sycl/include/sycl/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <sycl/detail/owner_less_base.hpp> // for OwnerLessBase
#include <sycl/platform.hpp> // for platform
#include <sycl/property_list.hpp> // for property_list
#include <sycl/usm/usm_enums.hpp> // for usm::alloc
#include <ur_api.h> // for ur_native_handle_t

#ifdef __SYCL_INTERNAL_API
Expand All @@ -36,6 +37,10 @@ inline namespace _V1 {
class device;
class platform;

namespace ext::oneapi::experimental {
class memory_pool;
} // namespace ext::oneapi::experimental

namespace detail {
class context_impl;
}
Expand Down Expand Up @@ -245,6 +250,19 @@ class __SYCL_EXPORT context : public detail::OwnerLessBase<context> {
/// \return a vector of valid SYCL device instances.
std::vector<device> get_devices() const;

/// Gets default memory pool associated with a device and context.
///
/// \return a memory pool for a particular device and context.
sycl::ext::oneapi::experimental::memory_pool
ext_oneapi_get_default_memory_pool(const device &dev,
sycl::usm::alloc kind) const;

/// Gets default memory pool associated with the context and allocation kind.
///
/// \return a memory pool associated with this context.
sycl::ext::oneapi::experimental::memory_pool
ext_oneapi_get_default_memory_pool(sycl::usm::alloc kind) const;

private:
/// Constructs a SYCL context object from a valid context_impl instance.
context(std::shared_ptr<detail::context_impl> Impl);
Expand Down
2 changes: 2 additions & 0 deletions sycl/include/sycl/detail/cg_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ enum class CGType : unsigned int {
SemaphoreSignal = 25,
ProfilingTag = 26,
EnqueueNativeCommand = 27,
AsyncAlloc = 28,
AsyncFree = 29,
};

template <typename, typename T> struct check_fn_signature {
Expand Down
8 changes: 6 additions & 2 deletions sycl/include/sycl/detail/property_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,10 @@ enum DataLessPropKind {
GraphDependOnAllLeaves = 24,
GraphUpdatable = 25,
GraphEnableProfiling = 26,
MemPoolReadOnly = 27,
MemPoolZeroInit = 28,
// Indicates the last known dataless property.
LastKnownDataLessPropKind = 26,
LastKnownDataLessPropKind = 28,
// Exceeding 32 may cause ABI breaking change on some of OSes.
DataLessPropKindSize = 32
};
Expand All @@ -67,7 +69,9 @@ enum PropWithDataKind {
AccPropBufferLocation = 5,
QueueComputeIndex = 6,
GraphNodeDependencies = 7,
PropWithDataKindSize = 8
MemPoolInitialThreshold = 8,
MemPoolMaximumSize = 9,
PropWithDataKindSize = 10
};

// Base class for dataless properties, needed to check that the type of an
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
//==----------- async_alloc.hpp --- SYCL asynchronous allocation -----------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once
#include <sycl/detail/common.hpp> // for code_location
#include <sycl/handler.hpp> // for handler
#include <sycl/queue.hpp> // for queue
#include <sycl/usm/usm_enums.hpp> // for usm::alloc

namespace sycl {
inline namespace _V1 {
namespace ext::oneapi::experimental {

// Forward declare memory_pool.
class memory_pool;

/**
* @brief Asynchronousy allocate memory from a default pool.
*
* @param q The queue with which to enqueue the asynchronous allocation.
* @param kind The kind of memory pool allocation - device, host, shared, etc.
* @param size The size in bytes to allocate.
*
* @return Generic pointer to allocated USM memory.
*/
__SYCL_EXPORT void *async_malloc(const sycl::queue &q, sycl::usm::alloc kind,
size_t size,
const sycl::detail::code_location &CodeLoc =
sycl::detail::code_location::current());

/**
* @brief Asynchronously allocate memory from a default pool.
*
* @param h The handler with which to enqueue the asynchronous allocation.
* @param kind The kind of memory pool allocation - device, host, shared, etc.
* @param size The size in bytes to allocate.
*
* @return Generic pointer to allocated USM memory.
*/
__SYCL_EXPORT void *async_malloc(sycl::handler &h, sycl::usm::alloc kind,
size_t size);

/**
* @brief Asynchronously allocate memory from a specified pool.
*
* @param q The queue with which to enqueue the asynchronous allocation.
* @param size The size in bytes to allocate.
* @param pool The pool with which to allocate from.
*
* @return Generic pointer to allocated USM memory.
*/
__SYCL_EXPORT void *
async_malloc_from_pool(const sycl::queue &q, size_t size,
const memory_pool &pool,
const sycl::detail::code_location &CodeLoc =
sycl::detail::code_location::current());

/**
* @brief Asynchronously allocate memory from a specified pool.
*
* @param h The handler with which to enqueue the asynchronous allocation.
* @param size The size in bytes to allocate.
* @param pool The pool with which to allocate from.
*
* @return Generic pointer to allocated USM memory.
*/
__SYCL_EXPORT void *async_malloc_from_pool(sycl::handler &h, size_t size,
const memory_pool &pool);

/**
* @brief Asynchronously free memory.
*
* @param q The queue with which to enqueue the asynchronous free.
* @param ptr The generic pointer to be freed.
*/
__SYCL_EXPORT void async_free(const sycl::queue &q, void *ptr,
const sycl::detail::code_location &CodeLoc =
sycl::detail::code_location::current());

/**
* @brief Asynchronously free memory.
*
* @param h The handler with which to enqueue the asynchronous free.
* @param ptr The generic pointer to be freed.
*/
__SYCL_EXPORT void async_free(sycl::handler &h, void *ptr);

} // namespace ext::oneapi::experimental
} // namespace _V1
} // namespace sycl
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
//==----------- memory_pool.hpp --- SYCL asynchronous allocation -----------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once
#include <sycl/context.hpp> // for context
#include <sycl/device.hpp> // for device
#include <sycl/ext/oneapi/experimental/async_alloc/memory_pool_properties.hpp>
#include <sycl/queue.hpp> // for queue
#include <sycl/usm/usm_enums.hpp> // for usm::alloc

namespace sycl {
inline namespace _V1 {
namespace ext::oneapi::experimental {

// Forward declare memory_pool_impl.
namespace detail {
class memory_pool_impl;
} // namespace detail

/// Memory pool
class __SYCL_EXPORT memory_pool {

public:
// NOT SUPPORTED: Host side pools unsupported.
memory_pool(const sycl::context &, sycl::usm::alloc kind,
const property_list & = {}) {
if (kind == sycl::usm::alloc::device || kind == sycl::usm::alloc::shared)
throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
"Device and shared allocation kinds are disallowed "
"without specifying a device!");
if (kind == sycl::usm::alloc::unknown)
throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
"Unknown allocation kinds are disallowed!");

throw sycl::exception(
sycl::make_error_code(sycl::errc::feature_not_supported),
"Host allocated pools are unsupported!");
}

memory_pool(const sycl::context &ctx, const sycl::device &dev,
sycl::usm::alloc kind, const property_list &props = {});

memory_pool(const sycl::queue &q, sycl::usm::alloc kind,
const property_list &props = {})
: memory_pool(q.get_context(), q.get_device(), kind, props) {}

// NOT SUPPORTED: Creating a pool from an existing allocation is unsupported.
memory_pool(const sycl::context &, void *, size_t,
const property_list & = {}) {
throw sycl::exception(
sycl::make_error_code(sycl::errc::feature_not_supported),
"Creating a pool from an existing allocation is unsupported!");
}

~memory_pool() = default;

// Copy constructible/assignable, move constructible/assignable.
memory_pool(const memory_pool &) = default;
memory_pool(memory_pool &&) = default;
memory_pool &operator=(const memory_pool &) = default;
memory_pool &operator=(memory_pool &&) = default;

// Equality comparison.
bool operator==(const memory_pool &rhs) const { return impl == rhs.impl; }
bool operator!=(const memory_pool &rhs) const { return !(*this == rhs); }

// Impl handles getters and setters.
sycl::context get_context() const;
sycl::device get_device() const;
sycl::usm::alloc get_alloc_kind() const;
size_t get_threshold() const;
size_t get_reserved_size_current() const;
size_t get_used_size_current() const;

void increase_threshold_to(size_t newThreshold);

// Property getters.
template <typename PropertyT> bool has_property() const noexcept {
return getPropList().template has_property<PropertyT>();
}
template <typename PropertyT> PropertyT get_property() const {
return getPropList().template get_property<PropertyT>();
}

protected:
std::shared_ptr<detail::memory_pool_impl> impl;

memory_pool(std::shared_ptr<detail::memory_pool_impl> Impl) : impl(Impl) {}

template <class Obj>
friend const decltype(Obj::impl) &
sycl::detail::getSyclObjImpl(const Obj &SyclObject);

template <class T>
friend T sycl::detail::createSyclObjFromImpl(
std::add_rvalue_reference_t<decltype(T::impl)> ImplObj);
template <class T>
friend T sycl::detail::createSyclObjFromImpl(
std::add_lvalue_reference_t<const decltype(T::impl)> ImplObj);

const property_list &getPropList() const;
};

} // namespace ext::oneapi::experimental
} // namespace _V1
} // namespace sycl

namespace std {
template <> struct hash<sycl::ext::oneapi::experimental::memory_pool> {
size_t operator()(
const sycl::ext::oneapi::experimental::memory_pool &mem_pool) const {
return hash<std::shared_ptr<
sycl::ext::oneapi::experimental::detail::memory_pool_impl>>()(
sycl::detail::getSyclObjImpl(mem_pool));
}
};
} // namespace std
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
//==------ memory_pool_properties.hpp --- SYCL asynchronous allocation -----==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once
#include <cstddef>
#include <sycl/properties/property_traits.hpp>

namespace sycl {
inline namespace _V1 {
namespace ext::oneapi::experimental {

// Forward declare memory_pool.
class memory_pool;

namespace property::memory_pool {

// Property that determines the initial threshold of a memory pool.
struct initial_threshold : public sycl::detail::PropertyWithData<
sycl::detail::MemPoolInitialThreshold> {
initial_threshold(size_t initialThreshold)
: initialThreshold(initialThreshold) {};
size_t get_initial_threshold() { return initialThreshold; }

private:
size_t initialThreshold;
};

// Property that determines the maximum size of a memory pool.
struct maximum_size
: public sycl::detail::PropertyWithData<sycl::detail::MemPoolMaximumSize> {
maximum_size(size_t maxSize) : maxSize(maxSize) {};
size_t get_maximum_size() { return maxSize; }

private:
size_t maxSize;
};

// Property that provides a performance hint that all allocations from this pool
// will only be read from within SYCL kernel functions.
struct read_only
: public sycl::detail::DataLessProperty<sycl::detail::MemPoolReadOnly> {
read_only() = default;
};

// Property that initial allocations to a pool (not subsequent allocations
// from prior frees) are iniitialised to zero.
struct zero_init
: public sycl::detail::DataLessProperty<sycl::detail::MemPoolZeroInit> {
zero_init() = default;
};
} // namespace property::memory_pool
} // namespace ext::oneapi::experimental

template <>
struct is_property<
sycl::ext::oneapi::experimental::property::memory_pool::initial_threshold>
: std::true_type {};

template <>
struct is_property<
sycl::ext::oneapi::experimental::property::memory_pool::maximum_size>
: std::true_type {};

template <>
struct is_property<
sycl::ext::oneapi::experimental::property::memory_pool::read_only>
: std::true_type {};

template <>
struct is_property<
sycl::ext::oneapi::experimental::property::memory_pool::zero_init>
: std::true_type {};

} // namespace _V1
} // namespace sycl
Loading
Loading